Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386
264 files changed, 80767 insertions, 0 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
new file mode 100644
index 00000000000..17a0cbce6f3
--- /dev/null
+++ b/arch/i386/Kconfig
@@ -0,0 +1,1269 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "Linux Kernel Configuration"
+
+config X86
+	bool
+	default y
+	help
+	  This is Linux's home port.  Linux was originally native to the Intel
+	  386, and runs on all the later x86 processors including the Intel
+	  486, 586, Pentiums, and various instruction-set-compatible chips by
+	  AMD, Cyrix, and others.
+
+config MMU
+	bool
+	default y
+
+config SBUS
+	bool
+
+config UID16
+	bool
+	default y
+
+config GENERIC_ISA_DMA
+	bool
+	default y
+
+config GENERIC_IOMAP
+	bool
+	default y
+
+source "init/Kconfig"
+
+menu "Processor type and features"
+
+choice
+	prompt "Subarchitecture Type"
+	default X86_PC
+
+config X86_PC
+	bool "PC-compatible"
+	help
+	  Choose this option if your computer is a standard PC or compatible.
+
+config X86_ELAN
+	bool "AMD Elan"
+	help
+	  Select this for an AMD Elan processor.
+
+	  Do not use this option for K6/Athlon/Opteron processors!
+
+	  If unsure, choose "PC-compatible" instead.
+
+config X86_VOYAGER
+	bool "Voyager (NCR)"
+	help
+	  Voyager is an MCA-based 32-way capable SMP architecture proprietary
+	  to NCR Corp.  Machine classes 345x/35xx/4100/51xx are Voyager-based.
+
+	  *** WARNING ***
+
+	  If you do not specifically know you have a Voyager based machine,
+	  say N here, otherwise the kernel you build will not be bootable.
+
+config X86_NUMAQ
+	bool "NUMAQ (IBM/Sequent)"
+	select DISCONTIGMEM
+	select NUMA
+	help
+	  This option is used for getting Linux to run on a (IBM/Sequent) NUMA
+	  multiquad box. This changes the way that processors are bootstrapped,
+	  and uses Clustered Logical APIC addressing mode instead of Flat Logical.
+	  You will need a new lynxer.elf file to flash your firmware with - send
+	  email to <Martin.Bligh@us.ibm.com>.
+
+config X86_SUMMIT
+	bool "Summit/EXA (IBM x440)"
+	depends on SMP
+	help
+	  This option is needed for IBM systems that use the Summit/EXA chipset.
+	  In particular, it is needed for the x440.
+
+	  If you don't have one of these computers, you should say N here.
+
+config X86_BIGSMP
+	bool "Support for other sub-arch SMP systems with more than 8 CPUs"
+	depends on SMP
+	help
+	  This option is needed for the systems that have more than 8 CPUs
+	  and if the system is not of any sub-arch type above.
+
+	  If you don't have such a system, you should say N here.
+
+config X86_VISWS
+	bool "SGI 320/540 (Visual Workstation)"
+	help
+	  The SGI Visual Workstation series is an IA32-based workstation
+	  based on SGI systems chips with some legacy PC hardware attached.
+
+	  Say Y here to create a kernel to run on the SGI 320 or 540.
+
+	  A kernel compiled for the Visual Workstation will not run on PCs
+	  and vice versa. See <file:Documentation/sgi-visws.txt> for details.
+
+config X86_GENERICARCH
+       bool "Generic architecture (Summit, bigsmp, ES7000, default)"
+       depends on SMP
+       help
+          This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
+	  It is intended for a generic binary kernel.
+
+config X86_ES7000
+	bool "Support for Unisys ES7000 IA32 series"
+	depends on SMP
+	help
+	  Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
+	  supposed to run on an IA32-based Unisys ES7000 system.
+	  Only choose this option if you have such a system, otherwise you
+	  should say N here.
+
+endchoice
+
+config ACPI_SRAT
+	bool
+	default y
+	depends on NUMA && (X86_SUMMIT || X86_GENERICARCH)
+
+config X86_SUMMIT_NUMA
+	bool
+	default y
+	depends on NUMA && (X86_SUMMIT || X86_GENERICARCH)
+
+config X86_CYCLONE_TIMER
+	bool
+	default y
+	depends on X86_SUMMIT || X86_GENERICARCH
+
+config ES7000_CLUSTERED_APIC
+	bool
+	default y
+	depends on SMP && X86_ES7000 && MPENTIUMIII
+
+if !X86_ELAN
+
+choice
+	prompt "Processor family"
+	default M686
+
+config M386
+	bool "386"
+	---help---
+	  This is the processor type of your CPU. This information is used for
+	  optimizing purposes. In order to compile a kernel that can run on
+	  all x86 CPU types (albeit not optimally fast), you can specify
+	  "386" here.
+
+	  The kernel will not necessarily run on earlier architectures than
+	  the one you have chosen, e.g. a Pentium optimized kernel will run on
+	  a PPro, but not necessarily on a i486.
+
+	  Here are the settings recommended for greatest speed:
+	  - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
+	  486DLC/DLC2, UMC 486SX-S and NexGen Nx586.  Only "386" kernels
+	  will run on a 386 class machine.
+	  - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
+	  SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
+	  - "586" for generic Pentium CPUs lacking the TSC
+	  (time stamp counter) register.
+	  - "Pentium-Classic" for the Intel Pentium.
+	  - "Pentium-MMX" for the Intel Pentium MMX.
+	  - "Pentium-Pro" for the Intel Pentium Pro.
+	  - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
+	  - "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
+	  - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
+	  - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
+	  - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
+	  - "Crusoe" for the Transmeta Crusoe series.
+	  - "Efficeon" for the Transmeta Efficeon series.
+	  - "Winchip-C6" for original IDT Winchip.
+	  - "Winchip-2" for IDT Winchip 2.
+	  - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
+	  - "MediaGX/Geode" for Cyrix MediaGX aka Geode.
+	  - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
+	  - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
+
+	  If you don't know what to do, choose "386".
+
+config M486
+	bool "486"
+	help
+	  Select this for a 486 series processor, either Intel or one of the
+	  compatible processors from AMD, Cyrix, IBM, or Intel.  Includes DX,
+	  DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
+	  U5S.
+
+config M586
+	bool "586/K5/5x86/6x86/6x86MX"
+	help
+	  Select this for an 586 or 686 series processor such as the AMD K5,
+	  the Cyrix 5x86, 6x86 and 6x86MX.  This choice does not
+	  assume the RDTSC (Read Time Stamp Counter) instruction.
+
+config M586TSC
+	bool "Pentium-Classic"
+	help
+	  Select this for a Pentium Classic processor with the RDTSC (Read
+	  Time Stamp Counter) instruction for benchmarking.
+
+config M586MMX
+	bool "Pentium-MMX"
+	help
+	  Select this for a Pentium with the MMX graphics/multimedia
+	  extended instructions.
+
+config M686
+	bool "Pentium-Pro"
+	help
+	  Select this for Intel Pentium Pro chips.  This enables the use of
+	  Pentium Pro extended instructions, and disables the init-time guard
+	  against the f00f bug found in earlier Pentiums.
+
+config MPENTIUMII
+	bool "Pentium-II/Celeron(pre-Coppermine)"
+	help
+	  Select this for Intel chips based on the Pentium-II and
+	  pre-Coppermine Celeron core.  This option enables an unaligned
+	  copy optimization, compiles the kernel with optimization flags
+	  tailored for the chip, and applies any applicable Pentium Pro
+	  optimizations.
+
+config MPENTIUMIII
+	bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
+	help
+	  Select this for Intel chips based on the Pentium-III and
+	  Celeron-Coppermine core.  This option enables use of some
+	  extended prefetch instructions in addition to the Pentium II
+	  extensions.
+
+config MPENTIUMM
+	bool "Pentium M"
+	help
+	  Select this for Intel Pentium M (not Pentium-4 M)
+	  notebook chips.
+
+config MPENTIUM4
+	bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
+	help
+	  Select this for Intel Pentium 4 chips.  This includes the
+	  Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
+	  (not Pentium M) chips.  This option enables compile flags
+	  optimized for the chip, uses the correct cache shift, and
+	  applies any applicable Pentium III optimizations.
+
+config MK6
+	bool "K6/K6-II/K6-III"
+	help
+	  Select this for an AMD K6-family processor.  Enables use of
+	  some extended instructions, and passes appropriate optimization
+	  flags to GCC.
+
+config MK7
+	bool "Athlon/Duron/K7"
+	help
+	  Select this for an AMD Athlon K7-family processor.  Enables use of
+	  some extended instructions, and passes appropriate optimization
+	  flags to GCC.
+
+config MK8
+	bool "Opteron/Athlon64/Hammer/K8"
+	help
+	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.  Enables
+	  use of some extended instructions, and passes appropriate optimization
+	  flags to GCC.
+
+config MCRUSOE
+	bool "Crusoe"
+	help
+	  Select this for a Transmeta Crusoe processor.  Treats the processor
+	  like a 586 with TSC, and sets some GCC optimization flags (like a
+	  Pentium Pro with no alignment requirements).
+
+config MEFFICEON
+	bool "Efficeon"
+	help
+	  Select this for a Transmeta Efficeon processor.
+
+config MWINCHIPC6
+	bool "Winchip-C6"
+	help
+	  Select this for an IDT Winchip C6 chip.  Linux and GCC
+	  treat this chip as a 586TSC with some extended instructions
+	  and alignment requirements.
+
+config MWINCHIP2
+	bool "Winchip-2"
+	help
+	  Select this for an IDT Winchip-2.  Linux and GCC
+	  treat this chip as a 586TSC with some extended instructions
+	  and alignment requirements.
+
+config MWINCHIP3D
+	bool "Winchip-2A/Winchip-3"
+	help
+	  Select this for an IDT Winchip-2A or 3.  Linux and GCC
+	  treat this chip as a 586TSC with some extended instructions
+	  and alignment reqirements.  Also enable out of order memory
+	  stores for this CPU, which can increase performance of some
+	  operations.
+
+config MGEODE
+	bool "MediaGX/Geode"
+	help
+	  Select this for a Cyrix MediaGX aka Geode chip. Linux and GCC
+          treat this chip as a 586TSC with some extended instructions
+          and alignment reqirements.
+
+config MCYRIXIII
+	bool "CyrixIII/VIA-C3"
+	help
+	  Select this for a Cyrix III or C3 chip.  Presently Linux and GCC
+	  treat this chip as a generic 586. Whilst the CPU is 686 class,
+	  it lacks the cmov extension which gcc assumes is present when
+	  generating 686 code.
+	  Note that Nehemiah (Model 9) and above will not boot with this
+	  kernel due to them lacking the 3DNow! instructions used in earlier
+	  incarnations of the CPU.
+
+config MVIAC3_2
+	bool "VIA C3-2 (Nehemiah)"
+	help
+	  Select this for a VIA C3 "Nehemiah". Selecting this enables usage
+	  of SSE and tells gcc to treat the CPU as a 686.
+	  Note, this kernel will not boot on older (pre model 9) C3s.
+
+endchoice
+
+config X86_GENERIC
+       bool "Generic x86 support"
+       help
+	  Instead of just including optimizations for the selected
+	  x86 variant (e.g. PII, Crusoe or Athlon), include some more
+	  generic optimizations as well. This will make the kernel
+	  perform better on x86 CPUs other than that selected.
+
+	  This is really intended for distributors who need more
+	  generic optimizations.
+
+endif
+
+#
+# Define implied options from the CPU selection here
+#
+config X86_CMPXCHG
+	bool
+	depends on !M386
+	default y
+
+config X86_XADD
+	bool
+	depends on !M386
+	default y
+
+config X86_L1_CACHE_SHIFT
+	int
+	default "7" if MPENTIUM4 || X86_GENERIC
+	default "4" if X86_ELAN || M486 || M386
+	default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE
+	default "6" if MK7 || MK8 || MPENTIUMM
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+	depends on M386
+	default y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+	depends on !M386
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config X86_PPRO_FENCE
+	bool
+	depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODE
+	default y
+
+config X86_F00F_BUG
+	bool
+	depends on M586MMX || M586TSC || M586 || M486 || M386
+	default y
+
+config X86_WP_WORKS_OK
+	bool
+	depends on !M386
+	default y
+
+config X86_INVLPG
+	bool
+	depends on !M386
+	default y
+
+config X86_BSWAP
+	bool
+	depends on !M386
+	default y
+
+config X86_POPAD_OK
+	bool
+	depends on !M386
+	default y
+
+config X86_ALIGNMENT_16
+	bool
+	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODE
+	default y
+
+config X86_GOOD_APIC
+	bool
+	depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON
+	default y
+
+config X86_INTEL_USERCOPY
+	bool
+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON
+	default y
+
+config X86_USE_PPRO_CHECKSUM
+	bool
+	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON
+	default y
+
+config X86_USE_3DNOW
+	bool
+	depends on MCYRIXIII || MK7
+	default y
+
+config X86_OOSTORE
+	bool
+	depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MGEODE) && MTRR
+	default y
+
+config HPET_TIMER
+	bool "HPET Timer Support"
+	help
+	  This enables the use of the HPET for the kernel's internal timer.
+	  HPET is the next generation timer replacing legacy 8254s.
+	  You can safely choose Y here.  However, HPET will only be
+	  activated if the platform and the BIOS support this feature.
+	  Otherwise the 8254 will be used for timing services.
+
+	  Choose N to continue using the legacy 8254 timer.
+
+config HPET_EMULATE_RTC
+	bool "Provide RTC interrupt"
+	depends on HPET_TIMER && RTC=y
+
+config SMP
+	bool "Symmetric multi-processing support"
+	---help---
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, like most personal computers, say N. If
+	  you have a system with more than one CPU, say Y.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on many, but not all,
+	  singleprocessor machines. On a singleprocessor machine, the kernel
+	  will run faster if you say N here.
+
+	  Note that if you say Y here and choose architecture "586" or
+	  "Pentium" under "Processor family", the kernel will not work on 486
+	  architectures. Similarly, multiprocessor kernels for the "PPro"
+	  architecture may not work on all Pentium based boards.
+
+	  People using multiprocessor machines who say Y here should also say
+	  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
+	  Management" code will be disabled if you say Y here.
+
+	  See also the <file:Documentation/smp.txt>,
+	  <file:Documentation/i386/IO-APIC.txt>,
+	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-255)"
+	range 2 255
+	depends on SMP
+	default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
+	default "8"
+	help
+	  This allows you to specify the maximum number of CPUs which this
+	  kernel will support.  The maximum supported value is 255 and the
+	  minimum value which makes sense is 2.
+
+	  This is purely to save memory - each supported CPU adds
+	  approximately eight kilobytes to the kernel image.
+
+config SCHED_SMT
+	bool "SMT (Hyperthreading) scheduler support"
+	depends on SMP
+	default off
+	help
+	  SMT scheduler support improves the CPU scheduler's decision making
+	  when dealing with Intel Pentium 4 chips with HyperThreading at a
+	  cost of slightly increased overhead in some places. If unsure say
+	  N here.
+
+config PREEMPT
+	bool "Preemptible Kernel"
+	help
+	  This option reduces the latency of the kernel when reacting to
+	  real-time or interactive events by allowing a low priority process to
+	  be preempted even if it is in kernel mode executing a system call.
+	  This allows applications to run more reliably even when the system is
+	  under load.
+
+	  Say Y here if you are building a kernel for a desktop, embedded
+	  or real-time system.  Say N if you are unsure.
+
+config PREEMPT_BKL
+	bool "Preempt The Big Kernel Lock"
+	depends on PREEMPT
+	default y
+	help
+	  This option reduces the latency of the kernel by making the
+	  big kernel lock preemptible.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
+config X86_UP_APIC
+	bool "Local APIC support on uniprocessors"
+	depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+	help
+	  A local APIC (Advanced Programmable Interrupt Controller) is an
+	  integrated interrupt controller in the CPU. If you have a single-CPU
+	  system which has a processor with a local APIC, you can say Y here to
+	  enable and use it. If you say Y here even though your machine doesn't
+	  have a local APIC, then the kernel will still run with no slowdown at
+	  all. The local APIC supports CPU-generated self-interrupts (timer,
+	  performance counters), and the NMI watchdog which detects hard
+	  lockups.
+
+config X86_UP_IOAPIC
+	bool "IO-APIC support on uniprocessors"
+	depends on X86_UP_APIC
+	help
+	  An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
+	  SMP-capable replacement for PC-style interrupt controllers. Most
+	  SMP systems and many recent uniprocessor systems have one.
+
+	  If you have a single-CPU system with an IO-APIC, you can say Y here
+	  to use it. If you say Y here even though your machine doesn't have
+	  an IO-APIC, then the kernel will still run with no slowdown at all.
+
+config X86_LOCAL_APIC
+	bool
+	depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
+	default y
+
+config X86_IO_APIC
+	bool
+	depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
+	default y
+
+config X86_VISWS_APIC
+	bool
+	depends on X86_VISWS
+	default y
+
+config X86_TSC
+	bool
+	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODE) && !X86_NUMAQ
+	default y
+
+config X86_MCE
+	bool "Machine Check Exception"
+	depends on !X86_VOYAGER
+	---help---
+	  Machine Check Exception support allows the processor to notify the
+	  kernel if it detects a problem (e.g. overheating, component failure).
+	  The action the kernel takes depends on the severity of the problem,
+	  ranging from a warning message on the console, to halting the machine.
+	  Your processor must be a Pentium or newer to support this - check the
+	  flags in /proc/cpuinfo for mce.  Note that some older Pentium systems
+	  have a design flaw which leads to false MCE events - hence MCE is
+	  disabled on all P5 processors, unless explicitly enabled with "mce"
+	  as a boot argument.  Similarly, if MCE is built in and creates a
+	  problem on some new non-standard machine, you can boot with "nomce"
+	  to disable it.  MCE support simply ignores non-MCE processors like
+	  the 386 and 486, so nearly everyone can say Y here.
+
+config X86_MCE_NONFATAL
+	tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
+	depends on X86_MCE
+	help
+	  Enabling this feature starts a timer that triggers every 5 seconds which
+	  will look at the machine check registers to see if anything happened.
+	  Non-fatal problems automatically get corrected (but still logged).
+	  Disable this if you don't want to see these messages.
+	  Seeing the messages this option prints out may be indicative of dying hardware,
+	  or out-of-spec (ie, overclocked) hardware.
+	  This option only does something on certain CPUs.
+	  (AMD Athlon/Duron and Intel Pentium 4)
+
+config X86_MCE_P4THERMAL
+	bool "check for P4 thermal throttling interrupt."
+	depends on X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS
+	help
+	  Enabling this feature will cause a message to be printed when the P4
+	  enters thermal throttling.
+
+config TOSHIBA
+	tristate "Toshiba Laptop support"
+	---help---
+	  This adds a driver to safely access the System Management Mode of
+	  the CPU on Toshiba portables with a genuine Toshiba BIOS. It does
+	  not work on models with a Phoenix BIOS. The System Management Mode
+	  is used to set the BIOS and power saving options on Toshiba portables.
+
+	  For information on utilities to make use of this driver see the
+	  Toshiba Linux utilities web site at:
+	  <http://www.buzzard.org.uk/toshiba/>.
+
+	  Say Y if you intend to run this kernel on a Toshiba portable.
+	  Say N otherwise.
+
+config I8K
+	tristate "Dell laptop support"
+	---help---
+	  This adds a driver to safely access the System Management Mode
+	  of the CPU on the Dell Inspiron 8000. The System Management Mode
+	  is used to read cpu temperature and cooling fan status and to
+	  control the fans on the I8K portables.
+
+	  This driver has been tested only on the Inspiron 8000 but it may
+	  also work with other Dell laptops. You can force loading on other
+	  models by passing the parameter `force=1' to the module. Use at
+	  your own risk.
+
+	  For information on utilities to make use of this driver see the
+	  I8K Linux utilities web site at:
+	  <http://people.debian.org/~dz/i8k/>
+
+	  Say Y if you intend to run this kernel on a Dell Inspiron 8000.
+	  Say N otherwise.
+
+config MICROCODE
+	tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+	---help---
+	  If you say Y here and also to "/dev file system support" in the
+	  'File systems' section, you will be able to update the microcode on
+	  Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II,
+	  Pentium III, Pentium 4, Xeon etc.  You will obviously need the
+	  actual microcode binary data itself which is not shipped with the
+	  Linux kernel.
+
+	  For latest news and information on obtaining all the required
+	  ingredients for this driver, check:
+	  <http://www.urbanmyth.org/microcode/>.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called microcode.
+
+config X86_MSR
+	tristate "/dev/cpu/*/msr - Model-specific register support"
+	help
+	  This device gives privileged processes access to the x86
+	  Model-Specific Registers (MSRs).  It is a character device with
+	  major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
+	  MSR accesses are directed to a specific CPU on multi-processor
+	  systems.
+
+config X86_CPUID
+	tristate "/dev/cpu/*/cpuid - CPU information support"
+	help
+	  This device gives processes access to the x86 CPUID instruction to
+	  be executed on a specific processor.  It is a character device
+	  with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
+	  /dev/cpu/31/cpuid.
+
+source "drivers/firmware/Kconfig"
+
+choice
+	prompt "High Memory Support"
+	default NOHIGHMEM
+
+config NOHIGHMEM
+	bool "off"
+	---help---
+	  Linux can use up to 64 Gigabytes of physical memory on x86 systems.
+	  However, the address space of 32-bit x86 processors is only 4
+	  Gigabytes large. That means that, if you have a large amount of
+	  physical memory, not all of it can be "permanently mapped" by the
+	  kernel. The physical memory that's not permanently mapped is called
+	  "high memory".
+
+	  If you are compiling a kernel which will never run on a machine with
+	  more than 1 Gigabyte total physical RAM, answer "off" here (default
+	  choice and suitable for most users). This will result in a "3GB/1GB"
+	  split: 3GB are mapped so that each process sees a 3GB virtual memory
+	  space and the remaining part of the 4GB virtual memory space is used
+	  by the kernel to permanently map as much physical memory as
+	  possible.
+
+	  If the machine has between 1 and 4 Gigabytes physical RAM, then
+	  answer "4GB" here.
+
+	  If more than 4 Gigabytes is used then answer "64GB" here. This
+	  selection turns Intel PAE (Physical Address Extension) mode on.
+	  PAE implements 3-level paging on IA32 processors. PAE is fully
+	  supported by Linux, PAE mode is implemented on all recent Intel
+	  processors (Pentium Pro and better). NOTE: If you say "64GB" here,
+	  then the kernel will not boot on CPUs that don't support PAE!
+
+	  The actual amount of total physical memory will either be
+	  auto detected or can be forced by using a kernel command line option
+	  such as "mem=256M". (Try "man bootparam" or see the documentation of
+	  your boot loader (lilo or loadlin) about how to pass options to the
+	  kernel at boot time.)
+
+	  If unsure, say "off".
+
+config HIGHMEM4G
+	bool "4GB"
+	help
+	  Select this if you have a 32-bit processor and between 1 and 4
+	  gigabytes of physical RAM.
+
+config HIGHMEM64G
+	bool "64GB"
+	help
+	  Select this if you have a 32-bit processor and more than 4
+	  gigabytes of physical RAM.
+
+endchoice
+
+config HIGHMEM
+	bool
+	depends on HIGHMEM64G || HIGHMEM4G
+	default y
+
+config X86_PAE
+	bool
+	depends on HIGHMEM64G
+	default y
+
+# Common NUMA Features
+config NUMA
+	bool "Numa Memory Allocation and Scheduler Support"
+	depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
+	default n if X86_PC
+	default y if (X86_NUMAQ || X86_SUMMIT)
+
+# Need comments to help the hapless user trying to turn on NUMA support
+comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
+	depends on X86_NUMAQ && (!HIGHMEM64G || !SMP)
+
+comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
+	depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
+
+config DISCONTIGMEM
+	bool
+	depends on NUMA
+	default y
+
+config HAVE_ARCH_BOOTMEM_NODE
+	bool
+	depends on NUMA
+	default y
+
+config HAVE_MEMORY_PRESENT
+	bool
+	depends on DISCONTIGMEM
+	default y
+
+config NEED_NODE_MEMMAP_SIZE
+	bool
+	depends on DISCONTIGMEM
+	default y
+
+config HIGHPTE
+	bool "Allocate 3rd-level pagetables from highmem"
+	depends on HIGHMEM4G || HIGHMEM64G
+	help
+	  The VM uses one page table entry for each page of physical memory.
+	  For systems with a lot of RAM, this can be wasteful of precious
+	  low memory.  Setting this option will put user-space page table
+	  entries in high memory.
+
+config MATH_EMULATION
+	bool "Math emulation"
+	---help---
+	  Linux can emulate a math coprocessor (used for floating point
+	  operations) if you don't have one. 486DX and Pentium processors have
+	  a math coprocessor built in, 486SX and 386 do not, unless you added
+	  a 487DX or 387, respectively. (The messages during boot time can
+	  give you some hints here ["man dmesg"].) Everyone needs either a
+	  coprocessor or this emulation.
+
+	  If you don't have a math coprocessor, you need to say Y here; if you
+	  say Y here even though you have a coprocessor, the coprocessor will
+	  be used nevertheless. (This behavior can be changed with the kernel
+	  command line option "no387", which comes handy if your coprocessor
+	  is broken. Try "man bootparam" or see the documentation of your boot
+	  loader (lilo or loadlin) about how to pass options to the kernel at
+	  boot time.) This means that it is a good idea to say Y here if you
+	  intend to use this kernel on different machines.
+
+	  More information about the internals of the Linux math coprocessor
+	  emulation can be found in <file:arch/i386/math-emu/README>.
+
+	  If you are not sure, say Y; apart from resulting in a 66 KB bigger
+	  kernel, it won't hurt.
+
+config MTRR
+	bool "MTRR (Memory Type Range Register) support"
+	---help---
+	  On Intel P6 family processors (Pentium Pro, Pentium II and later)
+	  the Memory Type Range Registers (MTRRs) may be used to control
+	  processor access to memory ranges. This is most useful if you have
+	  a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+	  allows bus write transfers to be combined into a larger transfer
+	  before bursting over the PCI/AGP bus. This can increase performance
+	  of image write operations 2.5 times or more. Saying Y here creates a
+	  /proc/mtrr file which may be used to manipulate your processor's
+	  MTRRs. Typically the X server should use this.
+
+	  This code has a reasonably generic interface so that similar
+	  control registers on other processors can be easily supported
+	  as well:
+
+	  The Cyrix 6x86, 6x86MX and M II processors have Address Range
+	  Registers (ARRs) which provide a similar functionality to MTRRs. For
+	  these, the ARRs are used to emulate the MTRRs.
+	  The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
+	  MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing
+	  write-combining. All of these processors are supported by this code
+	  and it makes sense to say Y here if you have one of them.
+
+	  Saying Y here also fixes a problem with buggy SMP BIOSes which only
+	  set the MTRRs for the boot CPU and not for the secondary CPUs. This
+	  can lead to all sorts of problems, so it's good to say Y here.
+
+	  You can safely say Y even if your machine doesn't have MTRRs, you'll
+	  just add about 9 KB to your kernel.
+
+	  See <file:Documentation/mtrr.txt> for more information.
+
+config EFI
+	bool "Boot from EFI support (EXPERIMENTAL)"
+	depends on ACPI
+	default n
+	---help---
+	This enables the the kernel to boot on EFI platforms using
+	system configuration information passed to it from the firmware.
+	This also enables the kernel to use any EFI runtime services that are
+	available (such as the EFI variable services).
+
+	This option is only useful on systems that have EFI firmware
+	and will result in a kernel image that is ~8k larger.  In addition,
+	you must use the latest ELILO loader available at
+	<http://elilo.sourceforge.net> in order to take advantage of
+	kernel initialization using EFI information (neither GRUB nor LILO know
+	anything about EFI).  However, even with this option, the resultant
+	kernel should continue to boot on existing non-EFI platforms.
+
+config IRQBALANCE
+ 	bool "Enable kernel irq balancing"
+	depends on SMP && X86_IO_APIC
+	default y
+	help
+ 	  The default yes will allow the kernel to do irq load balancing.
+	  Saying no will keep the kernel from doing irq load balancing.
+
+config HAVE_DEC_LOCK
+	bool
+	depends on (SMP || PREEMPT) && X86_CMPXCHG
+	default y
+
+# turning this on wastes a bunch of space.
+# Summit needs it only when NUMA is on
+config BOOT_IOREMAP
+	bool
+	depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
+	default y
+
+config REGPARM
+	bool "Use register arguments (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	default n
+	help
+	Compile the kernel with -mregparm=3. This uses a different ABI
+	and passes the first three arguments of a function call in registers.
+	This will probably break binary only modules.
+
+	This feature is only enabled for gcc-3.0 and later - earlier compilers
+	generate incorrect output with certain kernel constructs when
+	-mregparm=3 is used.
+
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	depends on PROC_FS
+	default y
+	help
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via /proc/<pid>/seccomp, it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
+	  If unsure, say Y. Only embedded should say N here.
+
+endmenu
+
+
+menu "Power management options (ACPI, APM)"
+	depends on !X86_VOYAGER
+
+source kernel/power/Kconfig
+
+source "drivers/acpi/Kconfig"
+
+menu "APM (Advanced Power Management) BIOS Support"
+depends on PM && !X86_VISWS
+
+config APM
+	tristate "APM (Advanced Power Management) BIOS support"
+	depends on PM
+	---help---
+	  APM is a BIOS specification for saving power using several different
+	  techniques. This is mostly useful for battery powered laptops with
+	  APM compliant BIOSes. If you say Y here, the system time will be
+	  reset after a RESUME operation, the /proc/apm device will provide
+	  battery status information, and user-space programs will receive
+	  notification of APM "events" (e.g. battery status change).
+
+	  If you select "Y" here, you can disable actual use of the APM
+	  BIOS by passing the "apm=off" option to the kernel at boot time.
+
+	  Note that the APM support is almost completely disabled for
+	  machines with more than one CPU.
+
+	  In order to use APM, you will need supporting software. For location
+	  and more information, read <file:Documentation/pm.txt> and the
+	  Battery Powered Linux mini-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  This driver does not spin down disk drives (see the hdparm(8)
+	  manpage ("man 8 hdparm") for that), and it doesn't turn off
+	  VESA-compliant "green" monitors.
+
+	  This driver does not support the TI 4000M TravelMate and the ACER
+	  486/DX4/75 because they don't have compliant BIOSes. Many "green"
+	  desktop machines also don't have compliant BIOSes, and this driver
+	  may cause those machines to panic during the boot phase.
+
+	  Generally, if you don't have a battery in your machine, there isn't
+	  much point in using this driver and you should say N. If you get
+	  random kernel OOPSes or reboots that don't seem to be related to
+	  anything, try disabling/enabling this option (or disabling/enabling
+	  APM in your BIOS).
+
+	  Some other things you should try when experiencing seemingly random,
+	  "weird" problems:
+
+	  1) make sure that you have enough swap space and that it is
+	  enabled.
+	  2) pass the "no-hlt" option to the kernel
+	  3) switch on floating point emulation in the kernel and pass
+	  the "no387" option to the kernel
+	  4) pass the "floppy=nodma" option to the kernel
+	  5) pass the "mem=4M" option to the kernel (thereby disabling
+	  all but the first 4 MB of RAM)
+	  6) make sure that the CPU is not over clocked.
+	  7) read the sig11 FAQ at <http://www.bitwizard.nl/sig11/>
+	  8) disable the cache from your BIOS settings
+	  9) install a fan for the video card or exchange video RAM
+	  10) install a better fan for the CPU
+	  11) exchange RAM chips
+	  12) exchange the motherboard.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called apm.
+
+config APM_IGNORE_USER_SUSPEND
+	bool "Ignore USER SUSPEND"
+	depends on APM
+	help
+	  This option will ignore USER SUSPEND requests. On machines with a
+	  compliant APM BIOS, you want to say N. However, on the NEC Versa M
+	  series notebooks, it is necessary to say Y because of a BIOS bug.
+
+config APM_DO_ENABLE
+	bool "Enable PM at boot time"
+	depends on APM
+	---help---
+	  Enable APM features at boot time. From page 36 of the APM BIOS
+	  specification: "When disabled, the APM BIOS does not automatically
+	  power manage devices, enter the Standby State, enter the Suspend
+	  State, or take power saving steps in response to CPU Idle calls."
+	  This driver will make CPU Idle calls when Linux is idle (unless this
+	  feature is turned off -- see "Do CPU IDLE calls", below). This
+	  should always save battery power, but more complicated APM features
+	  will be dependent on your BIOS implementation. You may need to turn
+	  this option off if your computer hangs at boot time when using APM
+	  support, or if it beeps continuously instead of suspending. Turn
+	  this off if you have a NEC UltraLite Versa 33/C or a Toshiba
+	  T400CDT. This is off by default since most machines do fine without
+	  this feature.
+
+config APM_CPU_IDLE
+	bool "Make CPU Idle calls when idle"
+	depends on APM
+	help
+	  Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
+	  On some machines, this can activate improved power savings, such as
+	  a slowed CPU clock rate, when the machine is idle. These idle calls
+	  are made after the idle loop has run for some length of time (e.g.,
+	  333 mS). On some machines, this will cause a hang at boot time or
+	  whenever the CPU becomes idle. (On machines with more than one CPU,
+	  this option does nothing.)
+
+config APM_DISPLAY_BLANK
+	bool "Enable console blanking using APM"
+	depends on APM
+	help
+	  Enable console blanking using the APM. Some laptops can use this to
+	  turn off the LCD backlight when the screen blanker of the Linux
+	  virtual console blanks the screen. Note that this is only used by
+	  the virtual console screen blanker, and won't turn off the backlight
+	  when using the X Window system. This also doesn't have anything to
+	  do with your VESA-compliant power-saving monitor. Further, this
+	  option doesn't work for all laptops -- it might not turn off your
+	  backlight at all, or it might print a lot of errors to the console,
+	  especially if you are using gpm.
+
+config APM_RTC_IS_GMT
+	bool "RTC stores time in GMT"
+	depends on APM
+	help
+	  Say Y here if your RTC (Real Time Clock a.k.a. hardware clock)
+	  stores the time in GMT (Greenwich Mean Time). Say N if your RTC
+	  stores localtime.
+
+	  It is in fact recommended to store GMT in your RTC, because then you
+	  don't have to worry about daylight savings time changes. The only
+	  reason not to use GMT in your RTC is if you also run a broken OS
+	  that doesn't understand GMT.
+
+config APM_ALLOW_INTS
+	bool "Allow interrupts during APM BIOS calls"
+	depends on APM
+	help
+	  Normally we disable external interrupts while we are making calls to
+	  the APM BIOS as a measure to lessen the effects of a badly behaving
+	  BIOS implementation.  The BIOS should reenable interrupts if it
+	  needs to.  Unfortunately, some BIOSes do not -- especially those in
+	  many of the newer IBM Thinkpads.  If you experience hangs when you
+	  suspend, try setting this to Y.  Otherwise, say N.
+
+config APM_REAL_MODE_POWER_OFF
+	bool "Use real mode APM BIOS call to power off"
+	depends on APM
+	help
+	  Use real mode APM BIOS calls to switch off the computer. This is
+	  a work-around for a number of buggy BIOSes. Switch this option on if
+	  your computer crashes instead of powering off properly.
+
+endmenu
+
+source "arch/i386/kernel/cpu/cpufreq/Kconfig"
+
+endmenu
+
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+
+config PCI
+	bool "PCI support" if !X86_VISWS
+	depends on !X86_VOYAGER
+	default y if X86_VISWS
+	help
+	  Find out whether you have a PCI motherboard. PCI is the name of a
+	  bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+	  VESA. If you have PCI, say Y, otherwise N.
+
+	  The PCI-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>, contains valuable
+	  information about which PCI hardware does work under Linux and which
+	  doesn't.
+
+choice
+	prompt "PCI access mode"
+	depends on PCI && !X86_VISWS
+	default PCI_GOANY
+	---help---
+	  On PCI systems, the BIOS can be used to detect the PCI devices and
+	  determine their configuration. However, some old PCI motherboards
+	  have BIOS bugs and may crash if this is done. Also, some embedded
+	  PCI-based systems don't have any BIOS at all. Linux can also try to
+	  detect the PCI hardware directly without using the BIOS.
+
+	  With this option, you can specify how Linux should detect the
+	  PCI devices. If you choose "BIOS", the BIOS will be used,
+	  if you choose "Direct", the BIOS won't be used, and if you
+	  choose "MMConfig", then PCI Express MMCONFIG will be used.
+	  If you choose "Any", the kernel will try MMCONFIG, then the
+	  direct access method and falls back to the BIOS if that doesn't
+	  work. If unsure, go with the default, which is "Any".
+
+config PCI_GOBIOS
+	bool "BIOS"
+
+config PCI_GOMMCONFIG
+	bool "MMConfig"
+
+config PCI_GODIRECT
+	bool "Direct"
+
+config PCI_GOANY
+	bool "Any"
+
+endchoice
+
+config PCI_BIOS
+	bool
+	depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+	default y
+
+config PCI_DIRECT
+	bool
+ 	depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
+	default y
+
+config PCI_MMCONFIG
+	bool
+	depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI))
+	select ACPI_BOOT
+	default y
+
+source "drivers/pci/pcie/Kconfig"
+
+source "drivers/pci/Kconfig"
+
+config ISA
+	bool "ISA support"
+	depends on !(X86_VOYAGER || X86_VISWS)
+	help
+	  Find out whether you have ISA slots on your motherboard.  ISA is the
+	  name of a bus system, i.e. the way the CPU talks to the other stuff
+	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
+	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
+	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
+
+config EISA
+	bool "EISA support"
+	depends on ISA
+	---help---
+	  The Extended Industry Standard Architecture (EISA) bus was
+	  developed as an open alternative to the IBM MicroChannel bus.
+
+	  The EISA bus provided some of the features of the IBM MicroChannel
+	  bus while maintaining backward compatibility with cards made for
+	  the older ISA bus.  The EISA bus saw limited use between 1988 and
+	  1995 when it was made obsolete by the PCI bus.
+
+	  Say Y here if you are building a kernel for an EISA-based machine.
+
+	  Otherwise, say N.
+
+source "drivers/eisa/Kconfig"
+
+config MCA
+	bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+	default y if X86_VOYAGER
+	help
+	  MicroChannel Architecture is found in some IBM PS/2 machines and
+	  laptops.  It is a bus system similar to PCI or ISA. See
+	  <file:Documentation/mca.txt> (and especially the web page given
+	  there) before attempting to build an MCA bus kernel.
+
+source "drivers/mca/Kconfig"
+
+config SCx200
+	tristate "NatSemi SCx200 support"
+	depends on !X86_VOYAGER
+	help
+	  This provides basic support for the National Semiconductor SCx200
+	  processor.  Right now this is just a driver for the GPIO pins.
+
+	  If you don't know what to do here, say N.
+
+	  This support is also available as a module.  If compiled as a
+	  module, it will be called scx200.
+
+source "drivers/pcmcia/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+endmenu
+
+menu "Executable file formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/i386/oprofile/Kconfig"
+
+source "arch/i386/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config GENERIC_IRQ_PROBE
+	bool
+	default y
+
+config X86_SMP
+	bool
+	depends on SMP && !X86_VOYAGER
+	default y
+
+config X86_HT
+	bool
+	depends on SMP && !(X86_VISWS || X86_VOYAGER)
+	default y
+
+config X86_BIOS_REBOOT
+	bool
+	depends on !(X86_VISWS || X86_VOYAGER)
+	default y
+
+config X86_TRAMPOLINE
+	bool
+	depends on X86_SMP || (X86_VOYAGER && SMP)
+	default y
+
+config PC
+	bool
+	depends on X86 && !EMBEDDED
+	default y
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
new file mode 100644
index 00000000000..bfb2064f710
--- /dev/null
+++ b/arch/i386/Kconfig.debug
@@ -0,0 +1,72 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config EARLY_PRINTK
+	bool "Early printk" if EMBEDDED && DEBUG_KERNEL
+	default y
+	help
+	  Write kernel log output directly into the VGA buffer or to a serial
+	  port.
+
+	  This is useful for kernel debugging when your machine crashes very
+	  early before the console code is initialized. For normal operation
+	  it is not recommended because it looks ugly and doesn't cooperate
+	  with klogd/syslogd or the X server. You should normally N here,
+	  unless you want to debug such a crash.
+
+config DEBUG_STACKOVERFLOW
+	bool "Check for stack overflows"
+	depends on DEBUG_KERNEL
+
+config KPROBES
+	bool "Kprobes"
+	depends on DEBUG_KERNEL
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.  register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+
+config DEBUG_STACK_USAGE
+	bool "Stack utilization instrumentation"
+	depends on DEBUG_KERNEL
+	help
+	  Enables the display of the minimum amount of free stack which each
+	  task has ever had available in the sysrq-T and sysrq-P debug output.
+
+	  This option will slow down process creation somewhat.
+
+comment "Page alloc debug is incompatible with Software Suspend on i386"
+	depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
+
+config DEBUG_PAGEALLOC
+	bool "Page alloc debugging"
+	depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND
+	help
+	  Unmap pages from the kernel linear mapping after free_pages().
+	  This results in a large slowdown, but helps to find certain types
+	  of memory corruptions.
+
+config 4KSTACKS
+	bool "Use 4Kb for kernel stacks instead of 8Kb"
+	depends on DEBUG_KERNEL
+	help
+	  If you say Y here the kernel will use a 4Kb stacksize for the
+	  kernel stack attached to each process/thread. This facilitates
+	  running more threads on a system and also reduces the pressure
+	  on the VM subsystem for higher order allocations. This option
+	  will also use IRQ stacks to compensate for the reduced stackspace.
+
+config X86_FIND_SMP_CONFIG
+	bool
+	depends on X86_LOCAL_APIC || X86_VOYAGER
+	default y
+
+config X86_MPPARSE
+	bool
+	depends on X86_LOCAL_APIC && !X86_VISWS
+	default y
+
+endmenu
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
new file mode 100644
index 00000000000..314c7146e9b
--- /dev/null
+++ b/arch/i386/Makefile
@@ -0,0 +1,173 @@
+#
+# i386/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" cleaning up for this architecture.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+# 19990713  Artur Skawina <skawina@geocities.com>
+#           Added '-march' and '-mpreferred-stack-boundary' support
+#
+#           Kianusch Sayah Karadji <kianusch@sk-tech.net>
+#           Added support for GEODE CPU
+
+LDFLAGS		:= -m elf_i386
+OBJCOPYFLAGS	:= -O binary -R .note -R .comment -S
+LDFLAGS_vmlinux :=
+CHECKFLAGS	+= -D__i386__
+
+CFLAGS += -pipe -msoft-float
+
+# prevent gcc from keeping the stack 16 byte aligned
+CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
+
+align := $(cc-option-align)
+cflags-$(CONFIG_M386)		+= -march=i386
+cflags-$(CONFIG_M486)		+= -march=i486
+cflags-$(CONFIG_M586)		+= -march=i586
+cflags-$(CONFIG_M586TSC)	+= -march=i586
+cflags-$(CONFIG_M586MMX)	+= $(call cc-option,-march=pentium-mmx,-march=i586)
+cflags-$(CONFIG_M686)		+= -march=i686
+cflags-$(CONFIG_MPENTIUMII)	+= -march=i686 $(call cc-option,-mtune=pentium2)
+cflags-$(CONFIG_MPENTIUMIII)	+= -march=i686 $(call cc-option,-mtune=pentium3)
+cflags-$(CONFIG_MPENTIUMM)	+= -march=i686 $(call cc-option,-mtune=pentium3)
+cflags-$(CONFIG_MPENTIUM4)	+= -march=i686 $(call cc-option,-mtune=pentium4)
+cflags-$(CONFIG_MK6)		+= -march=k6
+# Please note, that patches that add -march=athlon-xp and friends are pointless.
+# They make zero difference whatsosever to performance at this time.
+cflags-$(CONFIG_MK7)		+= $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)
+cflags-$(CONFIG_MK8)		+= $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4))
+cflags-$(CONFIG_MCRUSOE)	+= -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+cflags-$(CONFIG_MEFFICEON)	+= -march=i686 $(call cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+cflags-$(CONFIG_MWINCHIPC6)	+= $(call cc-option,-march=winchip-c6,-march=i586)
+cflags-$(CONFIG_MWINCHIP2)	+= $(call cc-option,-march=winchip2,-march=i586)
+cflags-$(CONFIG_MWINCHIP3D)	+= $(call cc-option,-march=winchip2,-march=i586)
+cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
+
+# AMD Elan support
+cflags-$(CONFIG_X86_ELAN)	+= -march=i486
+
+# MediaGX aka Geode support
+cflags-$(CONFIG_MGEODE)		+= $(call cc-option,-march=pentium-mmx,-march=i586)
+
+# -mregparm=3 works ok on gcc-3.0 and later
+#
+GCC_VERSION			:= $(call cc-version)
+cflags-$(CONFIG_REGPARM) 	+= $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;)
+
+# Disable unit-at-a-time mode, it makes gcc use a lot more stack
+# due to the lack of sharing of stacklots.
+CFLAGS += $(call cc-option,-fno-unit-at-a-time)
+
+CFLAGS += $(cflags-y)
+
+# Default subarch .c files
+mcore-y  := mach-default
+
+# Voyager subarch support
+mflags-$(CONFIG_X86_VOYAGER)	:= -Iinclude/asm-i386/mach-voyager
+mcore-$(CONFIG_X86_VOYAGER)	:= mach-voyager
+
+# VISWS subarch support
+mflags-$(CONFIG_X86_VISWS)	:= -Iinclude/asm-i386/mach-visws
+mcore-$(CONFIG_X86_VISWS)	:= mach-visws
+
+# NUMAQ subarch support
+mflags-$(CONFIG_X86_NUMAQ)	:= -Iinclude/asm-i386/mach-numaq
+mcore-$(CONFIG_X86_NUMAQ)	:= mach-default
+
+# BIGSMP subarch support
+mflags-$(CONFIG_X86_BIGSMP)	:= -Iinclude/asm-i386/mach-bigsmp
+mcore-$(CONFIG_X86_BIGSMP)	:= mach-default
+
+#Summit subarch support
+mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit
+mcore-$(CONFIG_X86_SUMMIT)  := mach-default
+
+# generic subarchitecture
+mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic
+mcore-$(CONFIG_X86_GENERICARCH) := mach-default
+core-$(CONFIG_X86_GENERICARCH) += arch/i386/mach-generic/
+
+# ES7000 subarch support
+mflags-$(CONFIG_X86_ES7000)	:= -Iinclude/asm-i386/mach-es7000
+mcore-$(CONFIG_X86_ES7000)	:= mach-default
+core-$(CONFIG_X86_ES7000)	:= arch/i386/mach-es7000/
+
+# default subarch .h files
+mflags-y += -Iinclude/asm-i386/mach-default
+
+head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o
+
+libs-y 					+= arch/i386/lib/
+core-y					+= arch/i386/kernel/ \
+					   arch/i386/mm/ \
+					   arch/i386/$(mcore-y)/ \
+					   arch/i386/crypto/
+drivers-$(CONFIG_MATH_EMULATION)	+= arch/i386/math-emu/
+drivers-$(CONFIG_PCI)			+= arch/i386/pci/
+# must be linked after kernel/
+drivers-$(CONFIG_OPROFILE)		+= arch/i386/oprofile/
+drivers-$(CONFIG_PM)			+= arch/i386/power/
+
+CFLAGS += $(mflags-y)
+AFLAGS += $(mflags-y)
+
+boot := arch/i386/boot
+
+.PHONY: zImage bzImage compressed zlilo bzlilo \
+	zdisk bzdisk fdimage fdimage144 fdimage288 install
+
+all: bzImage
+
+# KBUILD_IMAGE specify target image being built
+                    KBUILD_IMAGE := $(boot)/bzImage
+zImage zlilo zdisk: KBUILD_IMAGE := arch/i386/boot/zImage
+
+zImage bzImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
+
+compressed: zImage
+
+zlilo bzlilo: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
+
+zdisk bzdisk: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
+
+fdimage fdimage144 fdimage288: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
+
+install:
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
+
+prepare: include/asm-$(ARCH)/asm_offsets.h
+CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h
+
+arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
+				   include/config/MARKER
+
+include/asm-$(ARCH)/asm_offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
+	$(call filechk,gen-asm-offsets)
+
+archclean:
+	$(Q)$(MAKE) $(clean)=arch/i386/boot
+
+define archhelp
+  echo  '* bzImage	- Compressed kernel image (arch/$(ARCH)/boot/bzImage)'
+  echo  '  install	- Install kernel using'
+  echo  '		   (your) ~/bin/installkernel or'
+  echo  '		   (distribution) /sbin/installkernel or'
+  echo  '		   install to $$(INSTALL_PATH) and run lilo'
+  echo  '  bzdisk       - Create a boot floppy in /dev/fd0'
+  echo  '  fdimage      - Create a boot floppy image'
+endef
+
+CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
new file mode 100644
index 00000000000..aa7064a75ee
--- /dev/null
+++ b/arch/i386/boot/Makefile
@@ -0,0 +1,104 @@
+#
+# arch/i386/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+
+# ROOT_DEV specifies the default root-device when making the image.
+# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
+# the default of FLOPPY is used by 'build'.
+
+ROOT_DEV := CURRENT
+
+# If you want to preset the SVGA mode, uncomment the next line and
+# set SVGA_MODE to whatever number you want.
+# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
+# The number is the same as you would ordinarily press at bootup.
+
+SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
+
+# If you want the RAM disk device, define this to be the size in blocks.
+
+#RAMDISK := -DRAMDISK=512
+
+targets		:= vmlinux.bin bootsect bootsect.o setup setup.o \
+		   zImage bzImage
+subdir- 	:= compressed
+
+hostprogs-y	:= tools/build
+
+HOSTCFLAGS_build.o := $(LINUXINCLUDE)
+
+# ---------------------------------------------------------------------------
+
+$(obj)/zImage:  IMAGE_OFFSET := 0x1000
+$(obj)/zImage:  EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK)
+$(obj)/bzImage: IMAGE_OFFSET := 0x100000
+$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
+$(obj)/bzImage: BUILDFLAGS   := -b
+
+quiet_cmd_image = BUILD   $@
+cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
+	    $(obj)/vmlinux.bin $(ROOT_DEV) > $@
+
+$(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
+			      $(obj)/vmlinux.bin $(obj)/tools/build FORCE
+	$(call if_changed,image)
+	@echo 'Kernel: $@ is ready'
+
+$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,objcopy)
+
+LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary
+LDFLAGS_setup	 := -Ttext 0x0 -s --oformat binary -e begtext
+
+$(obj)/setup $(obj)/bootsect: %: %.o FORCE
+	$(call if_changed,ld)
+
+$(obj)/compressed/vmlinux: FORCE
+	$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
+
+# Set this if you want to pass append arguments to the zdisk/fdimage kernel
+FDARGS = 
+
+$(obj)/mtools.conf: $(src)/mtools.conf.in
+	sed -e 's|@OBJ@|$(obj)|g' < $< > $@
+
+# This requires write access to /dev/fd0
+zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
+	MTOOLSRC=$(obj)/mtools.conf mformat a:			; sync
+	syslinux /dev/fd0					; sync
+	echo 'default linux $(FDARGS)' | \
+		MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg
+	MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux	; sync
+
+# These require being root or having syslinux 2.02 or higher installed
+fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
+	dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
+	MTOOLSRC=$(obj)/mtools.conf mformat v:			; sync
+	syslinux $(obj)/fdimage					; sync
+	echo 'default linux $(FDARGS)' | \
+		MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg
+	MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux	; sync
+
+fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
+	dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
+	MTOOLSRC=$(obj)/mtools.conf mformat w:			; sync
+	syslinux $(obj)/fdimage					; sync
+	echo 'default linux $(FDARGS)' | \
+		MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg
+	MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux	; sync
+
+zlilo: $(BOOTIMAGE)
+	if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
+	if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
+	cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz
+	cp System.map $(INSTALL_PATH)/
+	if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
+
+install: $(BOOTIMAGE)
+	sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)"
diff --git a/arch/i386/boot/bootsect.S b/arch/i386/boot/bootsect.S
new file mode 100644
index 00000000000..ba9fe14db6a
--- /dev/null
+++ b/arch/i386/boot/bootsect.S
@@ -0,0 +1,98 @@
+/*
+ *	bootsect.S		Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ *	modified by Drew Eckhardt
+ *	modified by Bruce Evans (bde)
+ *	modified by Chris Noe (May 1999) (as86 -> gas)
+ *	gutted by H. Peter Anvin (Jan 2003)
+ *
+ * BIG FAT NOTE: We're in real mode using 64k segments.  Therefore segment
+ * addresses must be multiplied by 16 to obtain their respective linear
+ * addresses. To avoid confusion, linear addresses are written using leading
+ * hex while segment addresses are written as segment:offset.
+ *
+ */
+
+#include <asm/boot.h>
+
+SETUPSECTS	= 4			/* default nr of setup-sectors */
+BOOTSEG		= 0x07C0		/* original address of boot-sector */
+INITSEG		= DEF_INITSEG		/* we move boot here - out of the way */
+SETUPSEG	= DEF_SETUPSEG		/* setup starts here */
+SYSSEG		= DEF_SYSSEG		/* system loaded at 0x10000 (65536) */
+SYSSIZE		= DEF_SYSSIZE		/* system size: # of 16-byte clicks */
+					/* to be loaded */
+ROOT_DEV	= 0 			/* ROOT_DEV is now written by "build" */
+SWAP_DEV	= 0			/* SWAP_DEV is now written by "build" */
+
+#ifndef SVGA_MODE
+#define SVGA_MODE ASK_VGA
+#endif
+
+#ifndef RAMDISK
+#define RAMDISK 0
+#endif
+
+#ifndef ROOT_RDONLY
+#define ROOT_RDONLY 1
+#endif
+
+.code16
+.text
+
+.global _start
+_start:
+
+	# Normalize the start address
+	jmpl	$BOOTSEG, $start2
+
+start2:
+	movw	%cs, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %ss
+	movw	$0x7c00, %sp
+	sti
+	cld
+
+	movw	$bugger_off_msg, %si
+
+msg_loop:
+	lodsb
+	andb	%al, %al
+	jz	die
+	movb	$0xe, %ah
+	movw	$7, %bx
+	int	$0x10
+	jmp	msg_loop
+
+die:
+	# Allow the user to press a key, then reboot
+	xorw	%ax, %ax
+	int	$0x16
+	int	$0x19
+
+	# int 0x19 should never return.  In case it does anyway,
+	# invoke the BIOS reset code...
+	ljmp	$0xf000,$0xfff0
+
+
+bugger_off_msg:
+	.ascii	"Direct booting from floppy is no longer supported.\r\n"
+	.ascii	"Please use a boot loader program instead.\r\n"
+	.ascii	"\n"
+	.ascii	"Remove disk and press any key to reboot . . .\r\n"
+	.byte	0
+	
+
+	# Kernel attributes; used by setup
+
+	.org 497
+setup_sects:	.byte SETUPSECTS
+root_flags:	.word ROOT_RDONLY
+syssize:	.word SYSSIZE
+swap_dev:	.word SWAP_DEV
+ram_size:	.word RAMDISK
+vid_mode:	.word SVGA_MODE
+root_dev:	.word ROOT_DEV
+boot_flag:	.word 0xAA55
diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile
new file mode 100644
index 00000000000..258ea95224f
--- /dev/null
+++ b/arch/i386/boot/compressed/Makefile
@@ -0,0 +1,25 @@
+#
+# linux/arch/i386/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
+EXTRA_AFLAGS	:= -traditional
+
+LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32
+
+$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+	$(call if_changed,ld)
+	@:
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
+
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,ld)
diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
new file mode 100644
index 00000000000..c5e80b69e7d
--- /dev/null
+++ b/arch/i386/boot/compressed/head.S
@@ -0,0 +1,128 @@
+/*
+ *  linux/boot/head.S
+ *
+ *  Copyright (C) 1991, 1992, 1993  Linus Torvalds
+ */
+
+/*
+ *  head.S contains the 32-bit startup code.
+ *
+ * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
+ * the page directory will exist. The startup code will be overwritten by
+ * the page directory. [According to comments etc elsewhere on a compressed
+ * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
+ *
+ * Page 0 is deliberately kept safe, since System Management Mode code in 
+ * laptops may need to access the BIOS data stored there.  This is also
+ * useful for future device drivers that either access the BIOS via VM86 
+ * mode.
+ */
+
+/*
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+.text
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+
+	.globl startup_32
+	
+startup_32:
+	cld
+	cli
+	movl $(__BOOT_DS),%eax
+	movl %eax,%ds
+	movl %eax,%es
+	movl %eax,%fs
+	movl %eax,%gs
+
+	lss stack_start,%esp
+	xorl %eax,%eax
+1:	incl %eax		# check that A20 really IS enabled
+	movl %eax,0x000000	# loop forever if it isn't
+	cmpl %eax,0x100000
+	je 1b
+
+/*
+ * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
+ * confuse the debugger if this code is traced.
+ * XXX - best to initialize before switching to protected mode.
+ */
+	pushl $0
+	popfl
+/*
+ * Clear BSS
+ */
+	xorl %eax,%eax
+	movl $_edata,%edi
+	movl $_end,%ecx
+	subl %edi,%ecx
+	cld
+	rep
+	stosb
+/*
+ * Do the decompression, and jump to the new kernel..
+ */
+	subl $16,%esp	# place for structure on the stack
+	movl %esp,%eax
+	pushl %esi	# real mode pointer as second arg
+	pushl %eax	# address of structure as first arg
+	call decompress_kernel
+	orl  %eax,%eax 
+	jnz  3f
+	popl %esi	# discard address
+	popl %esi	# real mode pointer
+	xorl %ebx,%ebx
+	ljmp $(__BOOT_CS), $0x100000
+
+/*
+ * We come here, if we were loaded high.
+ * We need to move the move-in-place routine down to 0x1000
+ * and then start it with the buffer addresses in registers,
+ * which we got from the stack.
+ */
+3:
+	movl $move_routine_start,%esi
+	movl $0x1000,%edi
+	movl $move_routine_end,%ecx
+	subl %esi,%ecx
+	addl $3,%ecx
+	shrl $2,%ecx
+	cld
+	rep
+	movsl
+
+	popl %esi	# discard the address
+	popl %ebx	# real mode pointer
+	popl %esi	# low_buffer_start
+	popl %ecx	# lcount
+	popl %edx	# high_buffer_start
+	popl %eax	# hcount
+	movl $0x100000,%edi
+	cli		# make sure we don't get interrupted
+	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
+
+/*
+ * Routine (template) for moving the decompressed kernel in place,
+ * if we were high loaded. This _must_ PIC-code !
+ */
+move_routine_start:
+	movl %ecx,%ebp
+	shrl $2,%ecx
+	rep
+	movsl
+	movl %ebp,%ecx
+	andl $3,%ecx
+	rep
+	movsb
+	movl %edx,%esi
+	movl %eax,%ecx	# NOTE: rep movsb won't move if %ecx == 0
+	addl $3,%ecx
+	shrl $2,%ecx
+	rep
+	movsl
+	movl %ebx,%esi	# Restore setup pointer
+	xorl %ebx,%ebx
+	ljmp $(__BOOT_CS), $0x100000
+move_routine_end:
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
new file mode 100644
index 00000000000..fa67045234a
--- /dev/null
+++ b/arch/i386/boot/compressed/misc.c
@@ -0,0 +1,382 @@
+/*
+ * misc.c
+ * 
+ * This is a collection of several routines from gzip-1.0.3 
+ * adapted for Linux.
+ *
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ * puts by Nick Holloway 1993, better puts by Martin Mares 1995
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+
+#include <linux/linkage.h>
+#include <linux/vmalloc.h>
+#include <linux/tty.h>
+#include <video/edid.h>
+#include <asm/io.h>
+
+/*
+ * gzip declarations
+ */
+
+#define OF(args)  args
+#define STATIC static
+
+#undef memset
+#undef memcpy
+
+/*
+ * Why do we do this? Don't ask me..
+ *
+ * Incomprehensible are the ways of bootloaders.
+ */
+static void* memset(void *, int, size_t);
+static void* memcpy(void *, __const void *, size_t);
+#define memzero(s, n)     memset ((s), 0, (n))
+
+typedef unsigned char  uch;
+typedef unsigned short ush;
+typedef unsigned long  ulg;
+
+#define WSIZE 0x8000		/* Window size must be at least 32k, */
+				/* and a power of two */
+
+static uch *inbuf;	     /* input buffer */
+static uch window[WSIZE];    /* Sliding window buffer */
+
+static unsigned insize = 0;  /* valid bytes in inbuf */
+static unsigned inptr = 0;   /* index of next byte to be processed in inbuf */
+static unsigned outcnt = 0;  /* bytes in output buffer */
+
+/* gzip flag byte */
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
+#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
+#define COMMENT      0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
+#define RESERVED     0xC0 /* bit 6,7:   reserved */
+
+#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+		
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond,msg) {if(!(cond)) error(msg);}
+#  define Trace(x) fprintf x
+#  define Tracev(x) {if (verbose) fprintf x ;}
+#  define Tracevv(x) {if (verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+static int  fill_inbuf(void);
+static void flush_window(void);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+  
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+static unsigned char *real_mode; /* Pointer to real-mode data */
+
+#define RM_EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
+#ifndef STANDARD_MEMORY_BIOS_CALL
+#define RM_ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
+#endif
+#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
+
+extern char input_data[];
+extern int input_len;
+
+static long bytes_out = 0;
+static uch *output_data;
+static unsigned long output_ptr = 0;
+
+static void *malloc(int size);
+static void free(void *where);
+
+static void putstr(const char *);
+
+extern int end;
+static long free_mem_ptr = (long)&end;
+static long free_mem_end_ptr;
+
+#define INPLACE_MOVE_ROUTINE  0x1000
+#define LOW_BUFFER_START      0x2000
+#define LOW_BUFFER_MAX       0x90000
+#define HEAP_SIZE             0x3000
+static unsigned int low_buffer_end, low_buffer_size;
+static int high_loaded =0;
+static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
+
+static char *vidmem = (char *)0xb8000;
+static int vidport;
+static int lines, cols;
+
+#ifdef CONFIG_X86_NUMAQ
+static void * xquad_portio = NULL;
+#endif
+
+#include "../../../../lib/inflate.c"
+
+static void *malloc(int size)
+{
+	void *p;
+
+	if (size <0) error("Malloc error");
+	if (free_mem_ptr <= 0) error("Memory error");
+
+	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
+
+	p = (void *)free_mem_ptr;
+	free_mem_ptr += size;
+
+	if (free_mem_ptr >= free_mem_end_ptr)
+		error("Out of memory");
+
+	return p;
+}
+
+static void free(void *where)
+{	/* Don't care */
+}
+
+static void gzip_mark(void **ptr)
+{
+	*ptr = (void *) free_mem_ptr;
+}
+
+static void gzip_release(void **ptr)
+{
+	free_mem_ptr = (long) *ptr;
+}
+ 
+static void scroll(void)
+{
+	int i;
+
+	memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
+	for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
+		vidmem[i] = ' ';
+}
+
+static void putstr(const char *s)
+{
+	int x,y,pos;
+	char c;
+
+	x = RM_SCREEN_INFO.orig_x;
+	y = RM_SCREEN_INFO.orig_y;
+
+	while ( ( c = *s++ ) != '\0' ) {
+		if ( c == '\n' ) {
+			x = 0;
+			if ( ++y >= lines ) {
+				scroll();
+				y--;
+			}
+		} else {
+			vidmem [ ( x + cols * y ) * 2 ] = c; 
+			if ( ++x >= cols ) {
+				x = 0;
+				if ( ++y >= lines ) {
+					scroll();
+					y--;
+				}
+			}
+		}
+	}
+
+	RM_SCREEN_INFO.orig_x = x;
+	RM_SCREEN_INFO.orig_y = y;
+
+	pos = (x + cols * y) * 2;	/* Update cursor position */
+	outb_p(14, vidport);
+	outb_p(0xff & (pos >> 9), vidport+1);
+	outb_p(15, vidport);
+	outb_p(0xff & (pos >> 1), vidport+1);
+}
+
+static void* memset(void* s, int c, size_t n)
+{
+	int i;
+	char *ss = (char*)s;
+
+	for (i=0;i<n;i++) ss[i] = c;
+	return s;
+}
+
+static void* memcpy(void* __dest, __const void* __src,
+			    size_t __n)
+{
+	int i;
+	char *d = (char *)__dest, *s = (char *)__src;
+
+	for (i=0;i<__n;i++) d[i] = s[i];
+	return __dest;
+}
+
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+static int fill_inbuf(void)
+{
+	if (insize != 0) {
+		error("ran out of input data");
+	}
+
+	inbuf = input_data;
+	insize = input_len;
+	inptr = 1;
+	return inbuf[0];
+}
+
+/* ===========================================================================
+ * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+ * (Used for the decompressed data only.)
+ */
+static void flush_window_low(void)
+{
+    ulg c = crc;         /* temporary variable */
+    unsigned n;
+    uch *in, *out, ch;
+    
+    in = window;
+    out = &output_data[output_ptr]; 
+    for (n = 0; n < outcnt; n++) {
+	    ch = *out++ = *in++;
+	    c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+    bytes_out += (ulg)outcnt;
+    output_ptr += (ulg)outcnt;
+    outcnt = 0;
+}
+
+static void flush_window_high(void)
+{
+    ulg c = crc;         /* temporary variable */
+    unsigned n;
+    uch *in,  ch;
+    in = window;
+    for (n = 0; n < outcnt; n++) {
+	ch = *output_data++ = *in++;
+	if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
+	c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+    bytes_out += (ulg)outcnt;
+    outcnt = 0;
+}
+
+static void flush_window(void)
+{
+	if (high_loaded) flush_window_high();
+	else flush_window_low();
+}
+
+static void error(char *x)
+{
+	putstr("\n\n");
+	putstr(x);
+	putstr("\n\n -- System halted");
+
+	while(1);	/* Halt */
+}
+
+#define STACK_SIZE (4096)
+
+long user_stack [STACK_SIZE];
+
+struct {
+	long * a;
+	short b;
+	} stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS };
+
+static void setup_normal_output_buffer(void)
+{
+#ifdef STANDARD_MEMORY_BIOS_CALL
+	if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
+#else
+	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
+#endif
+	output_data = (char *)0x100000; /* Points to 1M */
+	free_mem_end_ptr = (long)real_mode;
+}
+
+struct moveparams {
+	uch *low_buffer_start;  int lcount;
+	uch *high_buffer_start; int hcount;
+};
+
+static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
+{
+	high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
+#ifdef STANDARD_MEMORY_BIOS_CALL
+	if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
+#else
+	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) <
+			(3*1024))
+		error("Less than 4MB of memory");
+#endif	
+	mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+	low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
+	  ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
+	low_buffer_size = low_buffer_end - LOW_BUFFER_START;
+	high_loaded = 1;
+	free_mem_end_ptr = (long)high_buffer_start;
+	if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) {
+		high_buffer_start = (uch *)(0x100000 + low_buffer_size);
+		mv->hcount = 0; /* say: we need not to move high_buffer */
+	}
+	else mv->hcount = -1;
+	mv->high_buffer_start = high_buffer_start;
+}
+
+static void close_output_buffer_if_we_run_high(struct moveparams *mv)
+{
+	if (bytes_out > low_buffer_size) {
+		mv->lcount = low_buffer_size;
+		if (mv->hcount)
+			mv->hcount = bytes_out - low_buffer_size;
+	} else {
+		mv->lcount = bytes_out;
+		mv->hcount = 0;
+	}
+}
+
+
+asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
+{
+	real_mode = rmode;
+
+	if (RM_SCREEN_INFO.orig_video_mode == 7) {
+		vidmem = (char *) 0xb0000;
+		vidport = 0x3b4;
+	} else {
+		vidmem = (char *) 0xb8000;
+		vidport = 0x3d4;
+	}
+
+	lines = RM_SCREEN_INFO.orig_video_lines;
+	cols = RM_SCREEN_INFO.orig_video_cols;
+
+	if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
+	else setup_output_buffer_if_we_run_high(mv);
+
+	makecrc();
+	putstr("Uncompressing Linux... ");
+	gunzip();
+	putstr("Ok, booting the kernel.\n");
+	if (high_loaded) close_output_buffer_if_we_run_high(mv);
+	return high_loaded;
+}
diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr
new file mode 100644
index 00000000000..1ed9d791f86
--- /dev/null
+++ b/arch/i386/boot/compressed/vmlinux.scr
@@ -0,0 +1,9 @@
+SECTIONS
+{
+  .data : { 
+	input_len = .;
+	LONG(input_data_end - input_data) input_data = .; 
+	*(.data) 
+	input_data_end = .; 
+	}
+}
diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S
new file mode 100644
index 00000000000..027d6b354ff
--- /dev/null
+++ b/arch/i386/boot/edd.S
@@ -0,0 +1,176 @@
+/*
+ * BIOS Enhanced Disk Drive support
+ * Copyright (C) 2002, 2003, 2004 Dell, Inc.
+ * by Matt Domsch <Matt_Domsch@dell.com> October 2002
+ * conformant to T13 Committee www.t13.org
+ *   projects 1572D, 1484D, 1386D, 1226DT
+ * disk signature read by Matt Domsch <Matt_Domsch@dell.com>
+ *	and Andrew Wilks <Andrew_Wilks@dell.com> September 2003, June 2004
+ * legacy CHS retreival by Patrick J. LoPresti <patl@users.sourceforge.net>
+ *      March 2004
+ * Command line option parsing, Matt Domsch, November 2004
+ */
+
+#include <linux/edd.h>
+#include <asm/setup.h>
+
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+	movb	$0, (EDD_MBR_SIG_NR_BUF)
+	movb	$0, (EDDNR)
+
+# Check the command line for two options:
+# edd=of  disables EDD completely  (edd=off)
+# edd=sk  skips the MBR test    (edd=skipmbr)
+	pushl	%esi
+    	cmpl	$0, %cs:cmd_line_ptr
+	jz	done_cl
+	movl	%cs:(cmd_line_ptr), %esi
+# ds:esi has the pointer to the command line now
+	movl	$(COMMAND_LINE_SIZE-7), %ecx
+# loop through kernel command line one byte at a time
+cl_loop:
+	cmpl	$EDD_CL_EQUALS, (%si)
+	jz	found_edd_equals
+	incl	%esi
+	loop	cl_loop
+	jmp	done_cl
+found_edd_equals:
+# only looking at first two characters after equals
+    	addl	$4, %esi
+	cmpw	$EDD_CL_OFF, (%si)	# edd=of
+	jz	do_edd_off
+	cmpw	$EDD_CL_SKIP, (%si)	# edd=sk
+	jz	do_edd_skipmbr
+	jmp	done_cl
+do_edd_skipmbr:
+    	popl	%esi
+	jmp	edd_start
+do_edd_off:
+	popl	%esi
+	jmp	edd_done
+done_cl:
+	popl	%esi
+
+
+# Read the first sector of each BIOS disk device and store the 4-byte signature
+edd_mbr_sig_start:
+	movb	$0x80, %dl			# from device 80
+	movw	$EDD_MBR_SIG_BUF, %bx		# store buffer ptr in bx
+edd_mbr_sig_read:
+	movl	$0xFFFFFFFF, %eax
+	movl	%eax, (%bx)			# assume failure
+	pushw	%bx
+	movb	$READ_SECTORS, %ah
+	movb	$1, %al				# read 1 sector
+	movb	$0, %dh				# at head 0
+	movw	$1, %cx				# cylinder 0, sector 0
+	pushw	%es
+	pushw	%ds
+	popw	%es
+    	movw	$EDDBUF, %bx			# disk's data goes into EDDBUF
+	pushw	%dx             # work around buggy BIOSes
+	stc                     # work around buggy BIOSes
+	int	$0x13
+	sti                     # work around buggy BIOSes
+	popw	%dx
+	popw	%es
+	popw	%bx
+	jc	edd_mbr_sig_done		# on failure, we're done.
+	movl	(EDDBUF+EDD_MBR_SIG_OFFSET), %eax # read sig out of the MBR
+	movl	%eax, (%bx)			# store success
+	incb	(EDD_MBR_SIG_NR_BUF)		# note that we stored something
+	incb	%dl				# increment to next device
+	addw	$4, %bx				# increment sig buffer ptr
+	cmpb	$EDD_MBR_SIG_MAX, (EDD_MBR_SIG_NR_BUF)	# Out of space?
+	jb	edd_mbr_sig_read		# keep looping
+edd_mbr_sig_done:
+
+# Do the BIOS Enhanced Disk Drive calls
+# This consists of two calls:
+#    int 13h ah=41h "Check Extensions Present"
+#    int 13h ah=48h "Get Device Parameters"
+#    int 13h ah=08h "Legacy Get Device Parameters"
+#
+# A buffer of size EDDMAXNR*(EDDEXTSIZE+EDDPARMSIZE) is reserved for our use
+# in the boot_params at EDDBUF.  The first four bytes of which are
+# used to store the device number, interface support map and version
+# results from fn41.  The next four bytes are used to store the legacy
+# cylinders, heads, and sectors from fn08. The following 74 bytes are used to
+# store the results from fn48.  Starting from device 80h, fn41, then fn48
+# are called and their results stored in EDDBUF+n*(EDDEXTSIZE+EDDPARMIZE).
+# Then the pointer is incremented to store the data for the next call.
+# This repeats until either a device doesn't exist, or until EDDMAXNR
+# devices have been stored.
+# The one tricky part is that ds:si always points EDDEXTSIZE bytes into
+# the structure, and the fn41 and fn08 results are stored at offsets
+# from there.  This removes the need to increment the pointer for
+# every store, and leaves it ready for the fn48 call.
+# A second one-byte buffer, EDDNR, in the boot_params stores
+# the number of BIOS devices which exist, up to EDDMAXNR.
+# In setup.c, copy_edd() stores both boot_params buffers away
+# for later use, as they would get overwritten otherwise.
+# This code is sensitive to the size of the structs in edd.h
+edd_start:
+						# %ds points to the bootsector
+       						# result buffer for fn48
+	movw	$EDDBUF+EDDEXTSIZE, %si		# in ds:si, fn41 results
+						# kept just before that
+	movb	$0x80, %dl			# BIOS device 0x80
+
+edd_check_ext:
+	movb	$CHECKEXTENSIONSPRESENT, %ah    # Function 41
+	movw	$EDDMAGIC1, %bx			# magic
+	int	$0x13				# make the call
+	jc	edd_done			# no more BIOS devices
+
+	cmpw	$EDDMAGIC2, %bx			# is magic right?
+	jne	edd_next			# nope, next...
+
+	movb	%dl, %ds:-8(%si)		# store device number
+	movb	%ah, %ds:-7(%si)		# store version
+	movw	%cx, %ds:-6(%si)		# store extensions
+	incb	(EDDNR)				# note that we stored something
+
+edd_get_device_params:
+	movw	$EDDPARMSIZE, %ds:(%si)		# put size
+	movw	$0x0, %ds:2(%si)		# work around buggy BIOSes
+	movb	$GETDEVICEPARAMETERS, %ah	# Function 48
+	int	$0x13				# make the call
+						# Don't check for fail return
+						# it doesn't matter.
+edd_get_legacy_chs:
+	xorw    %ax, %ax
+	movw    %ax, %ds:-4(%si)
+	movw    %ax, %ds:-2(%si)
+        # Ralf Brown's Interrupt List says to set ES:DI to
+	# 0000h:0000h "to guard against BIOS bugs"
+	pushw   %es
+	movw    %ax, %es
+	movw    %ax, %di
+	pushw   %dx                             # legacy call clobbers %dl
+	movb    $LEGACYGETDEVICEPARAMETERS, %ah # Function 08
+	int     $0x13                           # make the call
+	jc      edd_legacy_done                 # failed
+	movb    %cl, %al                        # Low 6 bits are max
+	andb    $0x3F, %al                      #   sector number
+	movb	%al, %ds:-1(%si)                # Record max sect
+	movb    %dh, %ds:-2(%si)                # Record max head number
+	movb    %ch, %al                        # Low 8 bits of max cyl
+	shr     $6, %cl
+	movb    %cl, %ah                        # High 2 bits of max cyl
+	movw    %ax, %ds:-4(%si)
+
+edd_legacy_done:
+	popw    %dx
+	popw    %es
+	movw	%si, %ax			# increment si
+	addw	$EDDPARMSIZE+EDDEXTSIZE, %ax
+	movw	%ax, %si
+
+edd_next:
+	incb	%dl				# increment to next device
+	cmpb	$EDDMAXNR, (EDDNR) 		# Out of space?
+	jb	edd_check_ext			# keep looping
+
+edd_done:
+#endif
diff --git a/arch/i386/boot/install.sh b/arch/i386/boot/install.sh
new file mode 100644
index 00000000000..90f2452b3b9
--- /dev/null
+++ b/arch/i386/boot/install.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+#
+# arch/i386/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for i386 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
+if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+	mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+	mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
+
+if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
diff --git a/arch/i386/boot/mtools.conf.in b/arch/i386/boot/mtools.conf.in
new file mode 100644
index 00000000000..efd6d2490c1
--- /dev/null
+++ b/arch/i386/boot/mtools.conf.in
@@ -0,0 +1,17 @@
+#
+# mtools configuration file for "make (b)zdisk"
+#
+
+# Actual floppy drive
+drive a:
+  file="/dev/fd0"
+
+# 1.44 MB floppy disk image
+drive v:
+  file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter
+
+# 2.88 MB floppy disk image (mostly for virtual uses)
+drive w:
+  file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
+
+
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
new file mode 100644
index 00000000000..a934ab32bf8
--- /dev/null
+++ b/arch/i386/boot/setup.S
@@ -0,0 +1,1028 @@
+/*
+ *	setup.S		Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * setup.s is responsible for getting the system data from the BIOS,
+ * and putting them into the appropriate places in system memory.
+ * both setup.s and system has been loaded by the bootblock.
+ *
+ * This code asks the bios for memory/disk/other parameters, and
+ * puts them in a "safe" place: 0x90000-0x901FF, ie where the
+ * boot-block used to be. It is then up to the protected mode
+ * system to read them from there before the area is overwritten
+ * for buffer-blocks.
+ *
+ * Move PS/2 aux init code to psaux.c
+ * (troyer@saifr00.cfsat.Honeywell.COM) 03Oct92
+ *
+ * some changes and additional features by Christoph Niemann,
+ * March 1993/June 1994 (Christoph.Niemann@linux.org)
+ *
+ * add APM BIOS checking by Stephen Rothwell, May 1994
+ * (sfr@canb.auug.org.au)
+ *
+ * High load stuff, initrd support and position independency
+ * by Hans Lermen & Werner Almesberger, February 1996
+ * <lermen@elserv.ffm.fgan.de>, <almesber@lrc.epfl.ch>
+ *
+ * Video handling moved to video.S by Martin Mares, March 1996
+ * <mj@k332.feld.cvut.cz>
+ *
+ * Extended memory detection scheme retwiddled by orc@pell.chi.il.us (david
+ * parsons) to avoid loadlin confusion, July 1997
+ *
+ * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999.
+ * <stiker@northlink.com>
+ *
+ * Fix to work around buggy BIOSes which dont use carry bit correctly
+ * and/or report extended memory in CX/DX for e801h memory size detection 
+ * call.  As a result the kernel got wrong figures.  The int15/e801h docs
+ * from Ralf Brown interrupt list seem to indicate AX/BX should be used
+ * anyway.  So to avoid breaking many machines (presumably there was a reason
+ * to orginally use CX/DX instead of AX/BX), we do a kludge to see
+ * if CX/DX have been changed in the e801 call and if so use AX/BX .
+ * Michael Miller, April 2001 <michaelm@mjmm.org>
+ *
+ * New A20 code ported from SYSLINUX by H. Peter Anvin. AMD Elan bugfixes
+ * by Robert Schwebel, December 2001 <robert@schwebel.de>
+ */
+
+#include <linux/config.h>
+#include <asm/segment.h>
+#include <linux/version.h>
+#include <linux/compile.h>
+#include <asm/boot.h>
+#include <asm/e820.h>
+#include <asm/page.h>
+	
+/* Signature words to ensure LILO loaded us right */
+#define SIG1	0xAA55
+#define SIG2	0x5A5A
+
+INITSEG  = DEF_INITSEG		# 0x9000, we move boot here, out of the way
+SYSSEG   = DEF_SYSSEG		# 0x1000, system loaded at 0x10000 (65536).
+SETUPSEG = DEF_SETUPSEG		# 0x9020, this is the current segment
+				# ... and the former contents of CS
+
+DELTA_INITSEG = SETUPSEG - INITSEG	# 0x0020
+
+.code16
+.globl begtext, begdata, begbss, endtext, enddata, endbss
+
+.text
+begtext:
+.data
+begdata:
+.bss
+begbss:
+.text
+
+start:
+	jmp	trampoline
+
+# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
+
+		.ascii	"HdrS"		# header signature
+		.word	0x0203		# header version number (>= 0x0105)
+					# or else old loadlin-1.5 will fail)
+realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
+start_sys_seg:	.word	SYSSEG
+		.word	kernel_version	# pointing to kernel version string
+					# above section of header is compatible
+					# with loadlin-1.5 (header v1.5). Don't
+					# change it.
+
+type_of_loader:	.byte	0		# = 0, old one (LILO, Loadlin,
+					#      Bootlin, SYSLX, bootsect...)
+					# See Documentation/i386/boot.txt for
+					# assigned ids
+	
+# flags, unused bits must be zero (RFU) bit within loadflags
+loadflags:
+LOADED_HIGH	= 1			# If set, the kernel is loaded high
+CAN_USE_HEAP	= 0x80			# If set, the loader also has set
+					# heap_end_ptr to tell how much
+					# space behind setup.S can be used for
+					# heap purposes.
+					# Only the loader knows what is free
+#ifndef __BIG_KERNEL__
+		.byte	0
+#else
+		.byte	LOADED_HIGH
+#endif
+
+setup_move_size: .word  0x8000		# size to move, when setup is not
+					# loaded at 0x90000. We will move setup 
+					# to 0x90000 then just before jumping
+					# into the kernel. However, only the
+					# loader knows how much data behind
+					# us also needs to be loaded.
+
+code32_start:				# here loaders can put a different
+					# start address for 32-bit code.
+#ifndef __BIG_KERNEL__
+		.long	0x1000		#   0x1000 = default for zImage
+#else
+		.long	0x100000	# 0x100000 = default for big kernel
+#endif
+
+ramdisk_image:	.long	0		# address of loaded ramdisk image
+					# Here the loader puts the 32-bit
+					# address where it loaded the image.
+					# This only will be read by the kernel.
+
+ramdisk_size:	.long	0		# its size in bytes
+
+bootsect_kludge:
+		.long	0		# obsolete
+
+heap_end_ptr:	.word	modelist+1024	# (Header version 0x0201 or later)
+					# space from here (exclusive) down to
+					# end of setup code can be used by setup
+					# for local heap purposes.
+
+pad1:		.word	0
+cmd_line_ptr:	.long 0			# (Header version 0x0202 or later)
+					# If nonzero, a 32-bit pointer
+					# to the kernel command line.
+					# The command line should be
+					# located between the start of
+					# setup and the end of low
+					# memory (0xa0000), or it may
+					# get overwritten before it
+					# gets read.  If this field is
+					# used, there is no longer
+					# anything magical about the
+					# 0x90000 segment; the setup
+					# can be located anywhere in
+					# low memory 0x10000 or higher.
+
+ramdisk_max:	.long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff
+					# (Header version 0x0203 or later)
+					# The highest safe address for
+					# the contents of an initrd
+
+trampoline:	call	start_of_setup
+		.align 16
+					# The offset at this point is 0x240
+		.space	(0x7ff-0x240+1) # E820 & EDD space (ending at 0x7ff)
+# End of setup header #####################################################
+
+start_of_setup:
+# Bootlin depends on this being done early
+	movw	$0x01500, %ax
+	movb	$0x81, %dl
+	int	$0x13
+
+#ifdef SAFE_RESET_DISK_CONTROLLER
+# Reset the disk controller.
+	movw	$0x0000, %ax
+	movb	$0x80, %dl
+	int	$0x13
+#endif
+
+# Set %ds = %cs, we know that SETUPSEG = %cs at this point
+	movw	%cs, %ax		# aka SETUPSEG
+	movw	%ax, %ds
+# Check signature at end of setup
+	cmpw	$SIG1, setup_sig1
+	jne	bad_sig
+
+	cmpw	$SIG2, setup_sig2
+	jne	bad_sig
+
+	jmp	good_sig1
+
+# Routine to print asciiz string at ds:si
+prtstr:
+	lodsb
+	andb	%al, %al
+	jz	fin
+
+	call	prtchr
+	jmp	prtstr
+
+fin:	ret
+
+# Space printing
+prtsp2:	call	prtspc		# Print double space
+prtspc:	movb	$0x20, %al	# Print single space (note: fall-thru)
+
+# Part of above routine, this one just prints ascii al
+prtchr:	pushw	%ax
+	pushw	%cx
+	movw	$7,%bx
+	movw	$0x01, %cx
+	movb	$0x0e, %ah
+	int	$0x10
+	popw	%cx
+	popw	%ax
+	ret
+
+beep:	movb	$0x07, %al
+	jmp	prtchr
+	
+no_sig_mess: .string	"No setup signature found ..."
+
+good_sig1:
+	jmp	good_sig
+
+# We now have to find the rest of the setup code/data
+bad_sig:
+	movw	%cs, %ax			# SETUPSEG
+	subw	$DELTA_INITSEG, %ax		# INITSEG
+	movw	%ax, %ds
+	xorb	%bh, %bh
+	movb	(497), %bl			# get setup sect from bootsect
+	subw	$4, %bx				# LILO loads 4 sectors of setup
+	shlw	$8, %bx				# convert to words (1sect=2^8 words)
+	movw	%bx, %cx
+	shrw	$3, %bx				# convert to segment
+	addw	$SYSSEG, %bx
+	movw	%bx, %cs:start_sys_seg
+# Move rest of setup code/data to here
+	movw	$2048, %di			# four sectors loaded by LILO
+	subw	%si, %si
+	pushw	%cs
+	popw	%es
+	movw	$SYSSEG, %ax
+	movw	%ax, %ds
+	rep
+	movsw
+	movw	%cs, %ax			# aka SETUPSEG
+	movw	%ax, %ds
+	cmpw	$SIG1, setup_sig1
+	jne	no_sig
+
+	cmpw	$SIG2, setup_sig2
+	jne	no_sig
+
+	jmp	good_sig
+
+no_sig:
+	lea	no_sig_mess, %si
+	call	prtstr
+
+no_sig_loop:
+	hlt
+	jmp	no_sig_loop
+
+good_sig:
+	movw	%cs, %ax			# aka SETUPSEG
+	subw	$DELTA_INITSEG, %ax 		# aka INITSEG
+	movw	%ax, %ds
+# Check if an old loader tries to load a big-kernel
+	testb	$LOADED_HIGH, %cs:loadflags	# Do we have a big kernel?
+	jz	loader_ok			# No, no danger for old loaders.
+
+	cmpb	$0, %cs:type_of_loader 		# Do we have a loader that
+						# can deal with us?
+	jnz	loader_ok			# Yes, continue.
+
+	pushw	%cs				# No, we have an old loader,
+	popw	%ds				# die. 
+	lea	loader_panic_mess, %si
+	call	prtstr
+
+	jmp	no_sig_loop
+
+loader_panic_mess: .string "Wrong loader, giving up..."
+
+loader_ok:
+# Get memory size (extended mem, kB)
+
+	xorl	%eax, %eax
+	movl	%eax, (0x1e0)
+#ifndef STANDARD_MEMORY_BIOS_CALL
+	movb	%al, (E820NR)
+# Try three different memory detection schemes.  First, try
+# e820h, which lets us assemble a memory map, then try e801h,
+# which returns a 32-bit memory size, and finally 88h, which
+# returns 0-64m
+
+# method E820H:
+# the memory map from hell.  e820h returns memory classified into
+# a whole bunch of different types, and allows memory holes and
+# everything.  We scan through this memory map and build a list
+# of the first 32 memory areas, which we return at [E820MAP].
+# This is documented at http://www.acpi.info/, in the ACPI 2.0 specification.
+
+#define SMAP  0x534d4150
+
+meme820:
+	xorl	%ebx, %ebx			# continuation counter
+	movw	$E820MAP, %di			# point into the whitelist
+						# so we can have the bios
+						# directly write into it.
+
+jmpe820:
+	movl	$0x0000e820, %eax		# e820, upper word zeroed
+	movl	$SMAP, %edx			# ascii 'SMAP'
+	movl	$20, %ecx			# size of the e820rec
+	pushw	%ds				# data record.
+	popw	%es
+	int	$0x15				# make the call
+	jc	bail820				# fall to e801 if it fails
+
+	cmpl	$SMAP, %eax			# check the return is `SMAP'
+	jne	bail820				# fall to e801 if it fails
+
+#	cmpl	$1, 16(%di)			# is this usable memory?
+#	jne	again820
+
+	# If this is usable memory, we save it by simply advancing %di by
+	# sizeof(e820rec).
+	#
+good820:
+	movb	(E820NR), %al			# up to 32 entries
+	cmpb	$E820MAX, %al
+	jnl	bail820
+
+	incb	(E820NR)
+	movw	%di, %ax
+	addw	$20, %ax
+	movw	%ax, %di
+again820:
+	cmpl	$0, %ebx			# check to see if
+	jne	jmpe820				# %ebx is set to EOF
+bail820:
+
+
+# method E801H:
+# memory size is in 1k chunksizes, to avoid confusing loadlin.
+# we store the 0xe801 memory size in a completely different place,
+# because it will most likely be longer than 16 bits.
+# (use 1e0 because that's what Larry Augustine uses in his
+# alternative new memory detection scheme, and it's sensible
+# to write everything into the same place.)
+
+meme801:
+	stc					# fix to work around buggy
+	xorw	%cx,%cx				# BIOSes which dont clear/set
+	xorw	%dx,%dx				# carry on pass/error of
+						# e801h memory size call
+						# or merely pass cx,dx though
+						# without changing them.
+	movw	$0xe801, %ax
+	int	$0x15
+	jc	mem88
+
+	cmpw	$0x0, %cx			# Kludge to handle BIOSes
+	jne	e801usecxdx			# which report their extended
+	cmpw	$0x0, %dx			# memory in AX/BX rather than
+	jne	e801usecxdx			# CX/DX.  The spec I have read
+	movw	%ax, %cx			# seems to indicate AX/BX 
+	movw	%bx, %dx			# are more reasonable anyway...
+
+e801usecxdx:
+	andl	$0xffff, %edx			# clear sign extend
+	shll	$6, %edx			# and go from 64k to 1k chunks
+	movl	%edx, (0x1e0)			# store extended memory size
+	andl	$0xffff, %ecx			# clear sign extend
+ 	addl	%ecx, (0x1e0)			# and add lower memory into
+						# total size.
+
+# Ye Olde Traditional Methode.  Returns the memory size (up to 16mb or
+# 64mb, depending on the bios) in ax.
+mem88:
+
+#endif
+	movb	$0x88, %ah
+	int	$0x15
+	movw	%ax, (2)
+
+# Set the keyboard repeat rate to the max
+	movw	$0x0305, %ax
+	xorw	%bx, %bx
+	int	$0x16
+
+# Check for video adapter and its parameters and allow the
+# user to browse video modes.
+	call	video				# NOTE: we need %ds pointing
+						# to bootsector
+
+# Get hd0 data...
+	xorw	%ax, %ax
+	movw	%ax, %ds
+	ldsw	(4 * 0x41), %si
+	movw	%cs, %ax			# aka SETUPSEG
+	subw	$DELTA_INITSEG, %ax		# aka INITSEG
+	pushw	%ax
+	movw	%ax, %es
+	movw	$0x0080, %di
+	movw	$0x10, %cx
+	pushw	%cx
+	cld
+	rep
+ 	movsb
+# Get hd1 data...
+	xorw	%ax, %ax
+	movw	%ax, %ds
+	ldsw	(4 * 0x46), %si
+	popw	%cx
+	popw	%es
+	movw	$0x0090, %di
+	rep
+	movsb
+# Check that there IS a hd1 :-)
+	movw	$0x01500, %ax
+	movb	$0x81, %dl
+	int	$0x13
+	jc	no_disk1
+	
+	cmpb	$3, %ah
+	je	is_disk1
+
+no_disk1:
+	movw	%cs, %ax			# aka SETUPSEG
+	subw	$DELTA_INITSEG, %ax 		# aka INITSEG
+	movw	%ax, %es
+	movw	$0x0090, %di
+	movw	$0x10, %cx
+	xorw	%ax, %ax
+	cld
+	rep
+	stosb
+is_disk1:
+# check for Micro Channel (MCA) bus
+	movw	%cs, %ax			# aka SETUPSEG
+	subw	$DELTA_INITSEG, %ax		# aka INITSEG
+	movw	%ax, %ds
+	xorw	%ax, %ax
+	movw	%ax, (0xa0)			# set table length to 0
+	movb	$0xc0, %ah
+	stc
+	int	$0x15				# moves feature table to es:bx
+	jc	no_mca
+
+	pushw	%ds
+	movw	%es, %ax
+	movw	%ax, %ds
+	movw	%cs, %ax			# aka SETUPSEG
+	subw	$DELTA_INITSEG, %ax		# aka INITSEG
+	movw	%ax, %es
+	movw	%bx, %si
+	movw	$0xa0, %di
+	movw	(%si), %cx
+	addw	$2, %cx				# table length is a short
+	cmpw	$0x10, %cx
+	jc	sysdesc_ok
+
+	movw	$0x10, %cx			# we keep only first 16 bytes
+sysdesc_ok:
+	rep
+	movsb
+	popw	%ds
+no_mca:
+#ifdef CONFIG_X86_VOYAGER
+	movb	$0xff, 0x40	# flag on config found
+	movb	$0xc0, %al
+	mov	$0xff, %ah
+	int	$0x15		# put voyager config info at es:di
+	jc	no_voyager
+	movw	$0x40, %si	# place voyager info in apm table
+	cld
+	movw	$7, %cx
+voyager_rep:
+	movb	%es:(%di), %al
+	movb	%al,(%si)
+	incw	%di
+	incw	%si
+	decw	%cx
+	jnz	voyager_rep
+no_voyager:	
+#endif
+# Check for PS/2 pointing device
+	movw	%cs, %ax			# aka SETUPSEG
+	subw	$DELTA_INITSEG, %ax		# aka INITSEG
+	movw	%ax, %ds
+	movw	$0, (0x1ff)			# default is no pointing device
+	int	$0x11				# int 0x11: equipment list
+	testb	$0x04, %al			# check if mouse installed
+	jz	no_psmouse
+
+	movw	$0xAA, (0x1ff)			# device present
+no_psmouse:
+
+#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
+	movl	$0x0000E980, %eax		# IST Support 
+	movl	$0x47534943, %edx		# Request value
+	int	$0x15
+
+	movl	%eax, (96)
+	movl	%ebx, (100)
+	movl	%ecx, (104)
+	movl	%edx, (108)
+#endif
+
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+# Then check for an APM BIOS...
+						# %ds points to the bootsector
+	movw	$0, 0x40			# version = 0 means no APM BIOS
+	movw	$0x05300, %ax			# APM BIOS installation check
+	xorw	%bx, %bx
+	int	$0x15
+	jc	done_apm_bios			# Nope, no APM BIOS
+	
+	cmpw	$0x0504d, %bx			# Check for "PM" signature
+	jne	done_apm_bios			# No signature, no APM BIOS
+
+	andw	$0x02, %cx			# Is 32 bit supported?
+	je	done_apm_bios			# No 32-bit, no (good) APM BIOS
+
+	movw	$0x05304, %ax			# Disconnect first just in case
+	xorw	%bx, %bx
+	int	$0x15				# ignore return code
+	movw	$0x05303, %ax			# 32 bit connect
+	xorl	%ebx, %ebx
+	xorw	%cx, %cx			# paranoia :-)
+	xorw	%dx, %dx			#   ...
+	xorl	%esi, %esi			#   ...
+	xorw	%di, %di			#   ...
+	int	$0x15
+	jc	no_32_apm_bios			# Ack, error. 
+
+	movw	%ax,  (66)			# BIOS code segment
+	movl	%ebx, (68)			# BIOS entry point offset
+	movw	%cx,  (72)			# BIOS 16 bit code segment
+	movw	%dx,  (74)			# BIOS data segment
+	movl	%esi, (78)			# BIOS code segment lengths
+	movw	%di,  (82)			# BIOS data segment length
+# Redo the installation check as the 32 bit connect
+# modifies the flags returned on some BIOSs
+	movw	$0x05300, %ax			# APM BIOS installation check
+	xorw	%bx, %bx
+	xorw	%cx, %cx			# paranoia
+	int	$0x15
+	jc	apm_disconnect			# error -> shouldn't happen
+
+	cmpw	$0x0504d, %bx			# check for "PM" signature
+	jne	apm_disconnect			# no sig -> shouldn't happen
+
+	movw	%ax, (64)			# record the APM BIOS version
+	movw	%cx, (76)			# and flags
+	jmp	done_apm_bios
+
+apm_disconnect:					# Tidy up
+	movw	$0x05304, %ax			# Disconnect
+	xorw	%bx, %bx
+	int	$0x15				# ignore return code
+
+	jmp	done_apm_bios
+
+no_32_apm_bios:
+	andw	$0xfffd, (76)			# remove 32 bit support bit
+done_apm_bios:
+#endif
+
+#include "edd.S"
+
+# Now we want to move to protected mode ...
+	cmpw	$0, %cs:realmode_swtch
+	jz	rmodeswtch_normal
+
+	lcall	*%cs:realmode_swtch
+
+	jmp	rmodeswtch_end
+
+rmodeswtch_normal:
+        pushw	%cs
+	call	default_switch
+
+rmodeswtch_end:
+# we get the code32 start address and modify the below 'jmpi'
+# (loader may have changed it)
+	movl	%cs:code32_start, %eax
+	movl	%eax, %cs:code32
+
+# Now we move the system to its rightful place ... but we check if we have a
+# big-kernel. In that case we *must* not move it ...
+	testb	$LOADED_HIGH, %cs:loadflags
+	jz	do_move0			# .. then we have a normal low
+						# loaded zImage
+						# .. or else we have a high
+						# loaded bzImage
+	jmp	end_move			# ... and we skip moving
+
+do_move0:
+	movw	$0x100, %ax			# start of destination segment
+	movw	%cs, %bp			# aka SETUPSEG
+	subw	$DELTA_INITSEG, %bp		# aka INITSEG
+	movw	%cs:start_sys_seg, %bx		# start of source segment
+	cld
+do_move:
+	movw	%ax, %es			# destination segment
+	incb	%ah				# instead of add ax,#0x100
+	movw	%bx, %ds			# source segment
+	addw	$0x100, %bx
+	subw	%di, %di
+	subw	%si, %si
+	movw 	$0x800, %cx
+	rep
+	movsw
+	cmpw	%bp, %bx			# assume start_sys_seg > 0x200,
+						# so we will perhaps read one
+						# page more than needed, but
+						# never overwrite INITSEG
+						# because destination is a
+						# minimum one page below source
+	jb	do_move
+
+end_move:
+# then we load the segment descriptors
+	movw	%cs, %ax			# aka SETUPSEG
+	movw	%ax, %ds
+		
+# Check whether we need to be downward compatible with version <=201
+	cmpl	$0, cmd_line_ptr
+	jne	end_move_self		# loader uses version >=202 features
+	cmpb	$0x20, type_of_loader
+	je	end_move_self		# bootsect loader, we know of it
+
+# Boot loader doesnt support boot protocol version 2.02.
+# If we have our code not at 0x90000, we need to move it there now.
+# We also then need to move the params behind it (commandline)
+# Because we would overwrite the code on the current IP, we move
+# it in two steps, jumping high after the first one.
+	movw	%cs, %ax
+	cmpw	$SETUPSEG, %ax
+	je	end_move_self
+
+	cli					# make sure we really have
+						# interrupts disabled !
+						# because after this the stack
+						# should not be used
+	subw	$DELTA_INITSEG, %ax		# aka INITSEG
+	movw	%ss, %dx
+	cmpw	%ax, %dx
+	jb	move_self_1
+
+	addw	$INITSEG, %dx
+	subw	%ax, %dx			# this will go into %ss after
+						# the move
+move_self_1:
+	movw	%ax, %ds
+	movw	$INITSEG, %ax			# real INITSEG
+	movw	%ax, %es
+	movw	%cs:setup_move_size, %cx
+	std					# we have to move up, so we use
+						# direction down because the
+						# areas may overlap
+	movw	%cx, %di
+	decw	%di
+	movw	%di, %si
+	subw	$move_self_here+0x200, %cx
+	rep
+	movsb
+	ljmp	$SETUPSEG, $move_self_here
+
+move_self_here:
+	movw	$move_self_here+0x200, %cx
+	rep
+	movsb
+	movw	$SETUPSEG, %ax
+	movw	%ax, %ds
+	movw	%dx, %ss
+end_move_self:					# now we are at the right place
+
+#
+# Enable A20.  This is at the very best an annoying procedure.
+# A20 code ported from SYSLINUX 1.52-1.63 by H. Peter Anvin.
+# AMD Elan bug fix by Robert Schwebel.
+#
+
+#if defined(CONFIG_X86_ELAN)
+	movb $0x02, %al			# alternate A20 gate
+	outb %al, $0x92			# this works on SC410/SC520
+a20_elan_wait:
+	call a20_test
+	jz a20_elan_wait
+	jmp a20_done
+#endif
+
+
+A20_TEST_LOOPS		=  32		# Iterations per wait
+A20_ENABLE_LOOPS	= 255		# Total loops to try		
+
+
+#ifndef CONFIG_X86_VOYAGER
+a20_try_loop:
+
+	# First, see if we are on a system with no A20 gate.
+a20_none:
+	call	a20_test
+	jnz	a20_done
+
+	# Next, try the BIOS (INT 0x15, AX=0x2401)
+a20_bios:
+	movw	$0x2401, %ax
+	pushfl					# Be paranoid about flags
+	int	$0x15
+	popfl
+
+	call	a20_test
+	jnz	a20_done
+
+	# Try enabling A20 through the keyboard controller
+#endif /* CONFIG_X86_VOYAGER */
+a20_kbc:
+	call	empty_8042
+
+#ifndef CONFIG_X86_VOYAGER
+	call	a20_test			# Just in case the BIOS worked
+	jnz	a20_done			# but had a delayed reaction.
+#endif
+
+	movb	$0xD1, %al			# command write
+	outb	%al, $0x64
+	call	empty_8042
+
+	movb	$0xDF, %al			# A20 on
+	outb	%al, $0x60
+	call	empty_8042
+
+#ifndef CONFIG_X86_VOYAGER
+	# Wait until a20 really *is* enabled; it can take a fair amount of
+	# time on certain systems; Toshiba Tecras are known to have this
+	# problem.
+a20_kbc_wait:
+	xorw	%cx, %cx
+a20_kbc_wait_loop:
+	call	a20_test
+	jnz	a20_done
+	loop	a20_kbc_wait_loop
+
+	# Final attempt: use "configuration port A"
+a20_fast:
+	inb	$0x92, %al			# Configuration Port A
+	orb	$0x02, %al			# "fast A20" version
+	andb	$0xFE, %al			# don't accidentally reset
+	outb	%al, $0x92
+
+	# Wait for configuration port A to take effect
+a20_fast_wait:
+	xorw	%cx, %cx
+a20_fast_wait_loop:
+	call	a20_test
+	jnz	a20_done
+	loop	a20_fast_wait_loop
+
+	# A20 is still not responding.  Try frobbing it again.
+	# 
+	decb	(a20_tries)
+	jnz	a20_try_loop
+	
+	movw	$a20_err_msg, %si
+	call	prtstr
+
+a20_die:
+	hlt
+	jmp	a20_die
+
+a20_tries:
+	.byte	A20_ENABLE_LOOPS
+
+a20_err_msg:
+	.ascii	"linux: fatal error: A20 gate not responding!"
+	.byte	13, 10, 0
+
+	# If we get here, all is good
+a20_done:
+
+#endif /* CONFIG_X86_VOYAGER */
+# set up gdt and idt
+	lidt	idt_48				# load idt with 0,0
+	xorl	%eax, %eax			# Compute gdt_base
+	movw	%ds, %ax			# (Convert %ds:gdt to a linear ptr)
+	shll	$4, %eax
+	addl	$gdt, %eax
+	movl	%eax, (gdt_48+2)
+	lgdt	gdt_48				# load gdt with whatever is
+						# appropriate
+
+# make sure any possible coprocessor is properly reset..
+	xorw	%ax, %ax
+	outb	%al, $0xf0
+	call	delay
+
+	outb	%al, $0xf1
+	call	delay
+
+# well, that went ok, I hope. Now we mask all interrupts - the rest
+# is done in init_IRQ().
+	movb	$0xFF, %al			# mask all interrupts for now
+	outb	%al, $0xA1
+	call	delay
+	
+	movb	$0xFB, %al			# mask all irq's but irq2 which
+	outb	%al, $0x21			# is cascaded
+
+# Well, that certainly wasn't fun :-(. Hopefully it works, and we don't
+# need no steenking BIOS anyway (except for the initial loading :-).
+# The BIOS-routine wants lots of unnecessary data, and it's less
+# "interesting" anyway. This is how REAL programmers do it.
+#
+# Well, now's the time to actually move into protected mode. To make
+# things as simple as possible, we do no register set-up or anything,
+# we let the gnu-compiled 32-bit programs do that. We just jump to
+# absolute address 0x1000 (or the loader supplied one),
+# in 32-bit protected mode.
+#
+# Note that the short jump isn't strictly needed, although there are
+# reasons why it might be a good idea. It won't hurt in any case.
+	movw	$1, %ax				# protected mode (PE) bit
+	lmsw	%ax				# This is it!
+	jmp	flush_instr
+
+flush_instr:
+	xorw	%bx, %bx			# Flag to indicate a boot
+	xorl	%esi, %esi			# Pointer to real-mode code
+	movw	%cs, %si
+	subw	$DELTA_INITSEG, %si
+	shll	$4, %esi			# Convert to 32-bit pointer
+
+# jump to startup_32 in arch/i386/boot/compressed/head.S
+#	
+# NOTE: For high loaded big kernels we need a
+#	jmpi    0x100000,__BOOT_CS
+#
+#	but we yet haven't reloaded the CS register, so the default size 
+#	of the target offset still is 16 bit.
+#       However, using an operand prefix (0x66), the CPU will properly
+#	take our 48 bit far pointer. (INTeL 80386 Programmer's Reference
+#	Manual, Mixing 16-bit and 32-bit code, page 16-6)
+
+	.byte 0x66, 0xea			# prefix + jmpi-opcode
+code32:	.long	0x1000				# will be set to 0x100000
+						# for big kernels
+	.word	__BOOT_CS
+
+# Here's a bunch of information about your current kernel..
+kernel_version:	.ascii	UTS_RELEASE
+		.ascii	" ("
+		.ascii	LINUX_COMPILE_BY
+		.ascii	"@"
+		.ascii	LINUX_COMPILE_HOST
+		.ascii	") "
+		.ascii	UTS_VERSION
+		.byte	0
+
+# This is the default real mode switch routine.
+# to be called just before protected mode transition
+default_switch:
+	cli					# no interrupts allowed !
+	movb	$0x80, %al			# disable NMI for bootup
+						# sequence
+	outb	%al, $0x70
+	lret
+
+
+#ifndef CONFIG_X86_VOYAGER
+# This routine tests whether or not A20 is enabled.  If so, it
+# exits with zf = 0.
+#
+# The memory address used, 0x200, is the int $0x80 vector, which
+# should be safe.
+
+A20_TEST_ADDR = 4*0x80
+
+a20_test:
+	pushw	%cx
+	pushw	%ax
+	xorw	%cx, %cx
+	movw	%cx, %fs			# Low memory
+	decw	%cx
+	movw	%cx, %gs			# High memory area
+	movw	$A20_TEST_LOOPS, %cx
+	movw	%fs:(A20_TEST_ADDR), %ax
+	pushw	%ax
+a20_test_wait:
+	incw	%ax
+	movw	%ax, %fs:(A20_TEST_ADDR)
+	call	delay				# Serialize and make delay constant
+	cmpw	%gs:(A20_TEST_ADDR+0x10), %ax
+	loope	a20_test_wait
+
+	popw	%fs:(A20_TEST_ADDR)
+	popw	%ax
+	popw	%cx
+	ret	
+
+#endif /* CONFIG_X86_VOYAGER */
+
+# This routine checks that the keyboard command queue is empty
+# (after emptying the output buffers)
+#
+# Some machines have delusions that the keyboard buffer is always full
+# with no keyboard attached...
+#
+# If there is no keyboard controller, we will usually get 0xff
+# to all the reads.  With each IO taking a microsecond and
+# a timeout of 100,000 iterations, this can take about half a
+# second ("delay" == outb to port 0x80). That should be ok,
+# and should also be plenty of time for a real keyboard controller
+# to empty.
+#
+
+empty_8042:
+	pushl	%ecx
+	movl	$100000, %ecx
+
+empty_8042_loop:
+	decl	%ecx
+	jz	empty_8042_end_loop
+
+	call	delay
+
+	inb	$0x64, %al			# 8042 status port
+	testb	$1, %al				# output buffer?
+	jz	no_output
+
+	call	delay
+	inb	$0x60, %al			# read it
+	jmp	empty_8042_loop
+
+no_output:
+	testb	$2, %al				# is input buffer full?
+	jnz	empty_8042_loop			# yes - loop
+empty_8042_end_loop:
+	popl	%ecx
+	ret
+
+# Read the cmos clock. Return the seconds in al
+gettime:
+	pushw	%cx
+	movb	$0x02, %ah
+	int	$0x1a
+	movb	%dh, %al			# %dh contains the seconds
+	andb	$0x0f, %al
+	movb	%dh, %ah
+	movb	$0x04, %cl
+	shrb	%cl, %ah
+	aad
+	popw	%cx
+	ret
+
+# Delay is needed after doing I/O
+delay:
+	outb	%al,$0x80
+	ret
+
+# Descriptor tables
+#
+# NOTE: The intel manual says gdt should be sixteen bytes aligned for
+# efficiency reasons.  However, there are machines which are known not
+# to boot with misaligned GDTs, so alter this at your peril!  If you alter
+# GDT_ENTRY_BOOT_CS (in asm/segment.h) remember to leave at least two
+# empty GDT entries (one for NULL and one reserved).
+#
+# NOTE:	On some CPUs, the GDT must be 8 byte aligned.  This is
+# true for the Voyager Quad CPU card which will not boot without
+# This directive.  16 byte aligment is recommended by intel.
+#
+	.align 16
+gdt:
+	.fill GDT_ENTRY_BOOT_CS,8,0
+
+	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
+	.word	0				# base address = 0
+	.word	0x9A00				# code read/exec
+	.word	0x00CF				# granularity = 4096, 386
+						#  (+5th nibble of limit)
+
+	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
+	.word	0				# base address = 0
+	.word	0x9200				# data read/write
+	.word	0x00CF				# granularity = 4096, 386
+						#  (+5th nibble of limit)
+gdt_end:
+	.align	4
+	
+	.word	0				# alignment byte
+idt_48:
+	.word	0				# idt limit = 0
+	.word	0, 0				# idt base = 0L
+
+	.word	0				# alignment byte
+gdt_48:
+	.word	gdt_end - gdt - 1		# gdt limit
+	.word	0, 0				# gdt base (filled in later)
+
+# Include video setup & detection code
+
+#include "video.S"
+
+# Setup signature -- must be last
+setup_sig1:	.word	SIG1
+setup_sig2:	.word	SIG2
+
+# After this point, there is some free space which is used by the video mode
+# handling code to store the temporary mode table (not used by the kernel).
+
+modelist:
+
+.text
+endtext:
+.data
+enddata:
+.bss
+endbss:
diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c
new file mode 100644
index 00000000000..26509b826ae
--- /dev/null
+++ b/arch/i386/boot/tools/build.c
@@ -0,0 +1,184 @@
+/*
+ *  $Id: build.c,v 1.5 1997/05/19 12:29:58 mj Exp $
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 1997 Martin Mares
+ */
+
+/*
+ * This file builds a disk-image from three different files:
+ *
+ * - bootsect: exactly 512 bytes of 8086 machine code, loads the rest
+ * - setup: 8086 machine code, sets up system parm
+ * - system: 80386 code for actual system
+ *
+ * It does some checking that all files are of the correct type, and
+ * just writes the result to stdout, removing headers and padding to
+ * the right amount. It also writes some system data to stderr.
+ */
+
+/*
+ * Changes by tytso to allow root device specification
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ * Cross compiling fixes by Gertjan van Wingerde, July 1996
+ * Rewritten by Martin Mares, April 1997
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <asm/boot.h>
+
+typedef unsigned char byte;
+typedef unsigned short word;
+typedef unsigned long u32;
+
+#define DEFAULT_MAJOR_ROOT 0
+#define DEFAULT_MINOR_ROOT 0
+
+/* Minimal number of setup sectors (see also bootsect.S) */
+#define SETUP_SECTS 4
+
+byte buf[1024];
+int fd;
+int is_big_kernel;
+
+void die(const char * str, ...)
+{
+	va_list args;
+	va_start(args, str);
+	vfprintf(stderr, str, args);
+	fputc('\n', stderr);
+	exit(1);
+}
+
+void file_open(const char *name)
+{
+	if ((fd = open(name, O_RDONLY, 0)) < 0)
+		die("Unable to open `%s': %m", name);
+}
+
+void usage(void)
+{
+	die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
+}
+
+int main(int argc, char ** argv)
+{
+	unsigned int i, c, sz, setup_sectors;
+	u32 sys_size;
+	byte major_root, minor_root;
+	struct stat sb;
+
+	if (argc > 2 && !strcmp(argv[1], "-b"))
+	  {
+	    is_big_kernel = 1;
+	    argc--, argv++;
+	  }
+	if ((argc < 4) || (argc > 5))
+		usage();
+	if (argc > 4) {
+		if (!strcmp(argv[4], "CURRENT")) {
+			if (stat("/", &sb)) {
+				perror("/");
+				die("Couldn't stat /");
+			}
+			major_root = major(sb.st_dev);
+			minor_root = minor(sb.st_dev);
+		} else if (strcmp(argv[4], "FLOPPY")) {
+			if (stat(argv[4], &sb)) {
+				perror(argv[4]);
+				die("Couldn't stat root device.");
+			}
+			major_root = major(sb.st_rdev);
+			minor_root = minor(sb.st_rdev);
+		} else {
+			major_root = 0;
+			minor_root = 0;
+		}
+	} else {
+		major_root = DEFAULT_MAJOR_ROOT;
+		minor_root = DEFAULT_MINOR_ROOT;
+	}
+	fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
+
+	file_open(argv[1]);
+	i = read(fd, buf, sizeof(buf));
+	fprintf(stderr,"Boot sector %d bytes.\n",i);
+	if (i != 512)
+		die("Boot block must be exactly 512 bytes");
+	if (buf[510] != 0x55 || buf[511] != 0xaa)
+		die("Boot block hasn't got boot flag (0xAA55)");
+	buf[508] = minor_root;
+	buf[509] = major_root;
+	if (write(1, buf, 512) != 512)
+		die("Write call failed");
+	close (fd);
+
+	file_open(argv[2]);				    /* Copy the setup code */
+	for (i=0 ; (c=read(fd, buf, sizeof(buf)))>0 ; i+=c )
+		if (write(1, buf, c) != c)
+			die("Write call failed");
+	if (c != 0)
+		die("read-error on `setup'");
+	close (fd);
+
+	setup_sectors = (i + 511) / 512;	/* Pad unused space with zeros */
+	/* for compatibility with ancient versions of LILO. */
+	if (setup_sectors < SETUP_SECTS)
+		setup_sectors = SETUP_SECTS;
+	fprintf(stderr, "Setup is %d bytes.\n", i);
+	memset(buf, 0, sizeof(buf));
+	while (i < setup_sectors * 512) {
+		c = setup_sectors * 512 - i;
+		if (c > sizeof(buf))
+			c = sizeof(buf);
+		if (write(1, buf, c) != c)
+			die("Write call failed");
+		i += c;
+	}
+
+	file_open(argv[3]);
+	if (fstat (fd, &sb))
+		die("Unable to stat `%s': %m", argv[3]);
+	sz = sb.st_size;
+	fprintf (stderr, "System is %d kB\n", sz/1024);
+	sys_size = (sz + 15) / 16;
+	if (!is_big_kernel && sys_size > DEF_SYSSIZE)
+		die("System is too big. Try using bzImage or modules.");
+	while (sz > 0) {
+		int l, n;
+
+		l = (sz > sizeof(buf)) ? sizeof(buf) : sz;
+		if ((n=read(fd, buf, l)) != l) {
+			if (n < 0)
+				die("Error reading %s: %m", argv[3]);
+			else
+				die("%s: Unexpected EOF", argv[3]);
+		}
+		if (write(1, buf, l) != l)
+			die("Write failed");
+		sz -= l;
+	}
+	close(fd);
+
+	if (lseek(1, 497, SEEK_SET) != 497)		    /* Write sizes to the bootsector */
+		die("Output: seek failed");
+	buf[0] = setup_sectors;
+	if (write(1, buf, 1) != 1)
+		die("Write of setup sector count failed");
+	if (lseek(1, 500, SEEK_SET) != 500)
+		die("Output: seek failed");
+	buf[0] = (sys_size & 0xff);
+	buf[1] = ((sys_size >> 8) & 0xff);
+	if (write(1, buf, 2) != 2)
+		die("Write of image length failed");
+
+	return 0;					    /* Everything is OK */
+}
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
new file mode 100644
index 00000000000..925d3f5a382
--- /dev/null
+++ b/arch/i386/boot/video.S
@@ -0,0 +1,2007 @@
+/*	video.S
+ *
+ *	Display adapter & video mode setup, version 2.13 (14-May-99)
+ *
+ *	Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
+ *	Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
+ *
+ *	Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
+ *
+ *	For further information, look at Documentation/svga.txt.
+ *
+ */
+
+#include <linux/config.h> /* for CONFIG_VIDEO_* */
+
+/* Enable autodetection of SVGA adapters and modes. */
+#undef CONFIG_VIDEO_SVGA
+
+/* Enable autodetection of VESA modes */
+#define CONFIG_VIDEO_VESA
+
+/* Enable compacting of mode table */
+#define CONFIG_VIDEO_COMPACT
+
+/* Retain screen contents when switching modes */
+#define CONFIG_VIDEO_RETAIN
+
+/* Enable local mode list */
+#undef CONFIG_VIDEO_LOCAL
+
+/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
+#undef CONFIG_VIDEO_400_HACK
+
+/* Hack that lets you force specific BIOS mode ID and specific dimensions */
+#undef CONFIG_VIDEO_GFX_HACK
+#define VIDEO_GFX_BIOS_AX 0x4f02	/* 800x600 on ThinkPad */
+#define VIDEO_GFX_BIOS_BX 0x0102
+#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425	/* 100x37 */
+
+/* This code uses an extended set of video mode numbers. These include:
+ * Aliases for standard modes
+ *	NORMAL_VGA (-1)
+ *	EXTENDED_VGA (-2)
+ *	ASK_VGA (-3)
+ * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
+ * of compatibility when extending the table. These are between 0x00 and 0xff.
+ */
+#define VIDEO_FIRST_MENU 0x0000
+
+/* Standard BIOS video modes (BIOS number + 0x0100) */
+#define VIDEO_FIRST_BIOS 0x0100
+
+/* VESA BIOS video modes (VESA number + 0x0200) */
+#define VIDEO_FIRST_VESA 0x0200
+
+/* Video7 special modes (BIOS number + 0x0900) */
+#define VIDEO_FIRST_V7 0x0900
+
+/* Special video modes */
+#define VIDEO_FIRST_SPECIAL 0x0f00
+#define VIDEO_80x25 0x0f00
+#define VIDEO_8POINT 0x0f01
+#define VIDEO_80x43 0x0f02
+#define VIDEO_80x28 0x0f03
+#define VIDEO_CURRENT_MODE 0x0f04
+#define VIDEO_80x30 0x0f05
+#define VIDEO_80x34 0x0f06
+#define VIDEO_80x60 0x0f07
+#define VIDEO_GFX_HACK 0x0f08
+#define VIDEO_LAST_SPECIAL 0x0f09
+
+/* Video modes given by resolution */
+#define VIDEO_FIRST_RESOLUTION 0x1000
+
+/* The "recalculate timings" flag */
+#define VIDEO_RECALC 0x8000
+
+/* Positions of various video parameters passed to the kernel */
+/* (see also include/linux/tty.h) */
+#define PARAM_CURSOR_POS	0x00
+#define PARAM_VIDEO_PAGE	0x04
+#define PARAM_VIDEO_MODE	0x06
+#define PARAM_VIDEO_COLS	0x07
+#define PARAM_VIDEO_EGA_BX	0x0a
+#define PARAM_VIDEO_LINES	0x0e
+#define PARAM_HAVE_VGA		0x0f
+#define PARAM_FONT_POINTS	0x10
+
+#define PARAM_LFB_WIDTH		0x12
+#define PARAM_LFB_HEIGHT	0x14
+#define PARAM_LFB_DEPTH		0x16
+#define PARAM_LFB_BASE		0x18
+#define PARAM_LFB_SIZE		0x1c
+#define PARAM_LFB_LINELENGTH	0x24
+#define PARAM_LFB_COLORS	0x26
+#define PARAM_VESAPM_SEG	0x2e
+#define PARAM_VESAPM_OFF	0x30
+#define PARAM_LFB_PAGES		0x32
+#define PARAM_VESA_ATTRIB	0x34
+
+/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
+#ifdef CONFIG_VIDEO_RETAIN
+#define DO_STORE call store_screen
+#else
+#define DO_STORE
+#endif /* CONFIG_VIDEO_RETAIN */
+
+# This is the main entry point called by setup.S
+# %ds *must* be pointing to the bootsector
+video:	pushw	%ds		# We use different segments
+	pushw	%ds		# FS contains original DS
+	popw	%fs
+	pushw	%cs		# DS is equal to CS
+	popw	%ds
+	pushw	%cs		# ES is equal to CS
+	popw	%es
+	xorw	%ax, %ax
+	movw	%ax, %gs	# GS is zero
+	cld
+	call	basic_detect	# Basic adapter type testing (EGA/VGA/MDA/CGA)
+#ifdef CONFIG_VIDEO_SELECT
+	movw	%fs:(0x01fa), %ax		# User selected video mode
+	cmpw	$ASK_VGA, %ax			# Bring up the menu
+	jz	vid2
+
+	call	mode_set			# Set the mode
+	jc	vid1
+
+	leaw	badmdt, %si			# Invalid mode ID
+	call	prtstr
+vid2:	call	mode_menu
+vid1:
+#ifdef CONFIG_VIDEO_RETAIN
+	call	restore_screen			# Restore screen contents
+#endif /* CONFIG_VIDEO_RETAIN */
+	call	store_edid
+#endif /* CONFIG_VIDEO_SELECT */
+	call	mode_params			# Store mode parameters
+	popw	%ds				# Restore original DS
+	ret
+
+# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
+basic_detect:
+	movb	$0, %fs:(PARAM_HAVE_VGA)
+	movb	$0x12, %ah	# Check EGA/VGA
+	movb	$0x10, %bl
+	int	$0x10
+	movw	%bx, %fs:(PARAM_VIDEO_EGA_BX)	# Identifies EGA to the kernel
+	cmpb	$0x10, %bl			# No, it's a CGA/MDA/HGA card.
+	je	basret
+
+	incb	adapter
+	movw	$0x1a00, %ax			# Check EGA or VGA?
+	int	$0x10
+	cmpb	$0x1a, %al			# 1a means VGA...
+	jne	basret				# anything else is EGA.
+	
+	incb	%fs:(PARAM_HAVE_VGA)		# We've detected a VGA
+	incb	adapter
+basret:	ret
+
+# Store the video mode parameters for later usage by the kernel.
+# This is done by asking the BIOS except for the rows/columns
+# parameters in the default 80x25 mode -- these are set directly,
+# because some very obscure BIOSes supply insane values.
+mode_params:
+#ifdef CONFIG_VIDEO_SELECT
+	cmpb	$0, graphic_mode
+	jnz	mopar_gr
+#endif
+	movb	$0x03, %ah			# Read cursor position
+	xorb	%bh, %bh
+	int	$0x10
+	movw	%dx, %fs:(PARAM_CURSOR_POS)
+	movb	$0x0f, %ah			# Read page/mode/width
+	int	$0x10
+	movw	%bx, %fs:(PARAM_VIDEO_PAGE)
+	movw	%ax, %fs:(PARAM_VIDEO_MODE)	# Video mode and screen width
+	cmpb	$0x7, %al			# MDA/HGA => segment differs
+	jnz	mopar0
+
+	movw	$0xb000, video_segment
+mopar0: movw	%gs:(0x485), %ax		# Font size
+	movw	%ax, %fs:(PARAM_FONT_POINTS)	# (valid only on EGA/VGA)
+	movw	force_size, %ax			# Forced size?
+	orw	%ax, %ax
+	jz	mopar1
+
+	movb	%ah, %fs:(PARAM_VIDEO_COLS)
+	movb	%al, %fs:(PARAM_VIDEO_LINES)
+	ret
+
+mopar1:	movb	$25, %al
+	cmpb	$0, adapter			# If we are on CGA/MDA/HGA, the
+	jz	mopar2				# screen must have 25 lines.
+
+	movb	%gs:(0x484), %al		# On EGA/VGA, use the EGA+ BIOS
+	incb	%al				# location of max lines.
+mopar2: movb	%al, %fs:(PARAM_VIDEO_LINES)
+	ret
+
+#ifdef CONFIG_VIDEO_SELECT
+# Fetching of VESA frame buffer parameters
+mopar_gr:
+	leaw	modelist+1024, %di
+	movb	$0x23, %fs:(PARAM_HAVE_VGA)
+	movw	16(%di), %ax
+	movw	%ax, %fs:(PARAM_LFB_LINELENGTH)
+	movw	18(%di), %ax
+	movw	%ax, %fs:(PARAM_LFB_WIDTH)
+	movw	20(%di), %ax
+	movw	%ax, %fs:(PARAM_LFB_HEIGHT)
+	movb	25(%di), %al
+	movb	$0, %ah
+	movw	%ax, %fs:(PARAM_LFB_DEPTH)
+	movb	29(%di), %al	
+	movb	$0, %ah
+	movw	%ax, %fs:(PARAM_LFB_PAGES)
+	movl	40(%di), %eax
+	movl	%eax, %fs:(PARAM_LFB_BASE)
+	movl	31(%di), %eax
+	movl	%eax, %fs:(PARAM_LFB_COLORS)
+	movl	35(%di), %eax
+	movl	%eax, %fs:(PARAM_LFB_COLORS+4)
+	movw	0(%di), %ax
+	movw	%ax, %fs:(PARAM_VESA_ATTRIB)
+
+# get video mem size
+	leaw	modelist+1024, %di
+	movw	$0x4f00, %ax
+	int	$0x10
+	xorl	%eax, %eax
+	movw	18(%di), %ax
+	movl	%eax, %fs:(PARAM_LFB_SIZE)
+
+# switching the DAC to 8-bit is for <= 8 bpp only
+	movw	%fs:(PARAM_LFB_DEPTH), %ax
+	cmpw	$8, %ax
+	jg	dac_done
+
+# get DAC switching capability
+	xorl	%eax, %eax
+	movb	10(%di), %al
+	testb	$1, %al
+	jz	dac_set
+
+# attempt to switch DAC to 8-bit
+	movw	$0x4f08, %ax
+	movw	$0x0800, %bx
+	int	$0x10
+	cmpw	$0x004f, %ax
+	jne     dac_set
+	movb    %bh, dac_size		# store actual DAC size
+
+dac_set:
+# set color size to DAC size
+	movb	dac_size, %al
+	movb	%al, %fs:(PARAM_LFB_COLORS+0)
+	movb	%al, %fs:(PARAM_LFB_COLORS+2)
+	movb	%al, %fs:(PARAM_LFB_COLORS+4)
+	movb	%al, %fs:(PARAM_LFB_COLORS+6)
+
+# set color offsets to 0
+	movb	$0, %fs:(PARAM_LFB_COLORS+1)
+	movb	$0, %fs:(PARAM_LFB_COLORS+3)
+	movb	$0, %fs:(PARAM_LFB_COLORS+5)
+	movb	$0, %fs:(PARAM_LFB_COLORS+7)
+
+dac_done:
+# get protected mode interface informations
+	movw	$0x4f0a, %ax
+	xorw	%bx, %bx
+	xorw	%di, %di
+	int	$0x10
+	cmp	$0x004f, %ax
+	jnz	no_pm
+
+	movw	%es, %fs:(PARAM_VESAPM_SEG)
+	movw	%di, %fs:(PARAM_VESAPM_OFF)
+no_pm:	ret
+
+# The video mode menu
+mode_menu:
+	leaw	keymsg, %si			# "Return/Space/Timeout" message
+	call	prtstr
+	call	flush
+nokey:	call	getkt
+
+	cmpb	$0x0d, %al			# ENTER ?
+	je	listm				# yes - manual mode selection
+
+	cmpb	$0x20, %al			# SPACE ?
+	je	defmd1				# no - repeat
+
+	call 	beep
+	jmp	nokey
+
+defmd1:	ret					# No mode chosen? Default 80x25
+
+listm:	call	mode_table			# List mode table
+listm0:	leaw	name_bann, %si			# Print adapter name
+	call	prtstr
+	movw	card_name, %si
+	orw	%si, %si
+	jnz	an2
+
+	movb	adapter, %al
+	leaw	old_name, %si
+	orb	%al, %al
+	jz	an1
+
+	leaw	ega_name, %si
+	decb	%al
+	jz	an1
+
+	leaw	vga_name, %si
+	jmp	an1
+
+an2:	call	prtstr
+	leaw	svga_name, %si
+an1:	call	prtstr
+	leaw	listhdr, %si			# Table header
+	call	prtstr
+	movb	$0x30, %dl			# DL holds mode number
+	leaw	modelist, %si
+lm1:	cmpw	$ASK_VGA, (%si)			# End?
+	jz	lm2
+
+	movb	%dl, %al			# Menu selection number
+	call	prtchr
+	call	prtsp2
+	lodsw
+	call	prthw				# Mode ID
+	call	prtsp2
+	movb	0x1(%si), %al
+	call	prtdec				# Rows
+	movb	$0x78, %al			# the letter 'x'
+	call	prtchr
+	lodsw
+	call	prtdec				# Columns
+	movb	$0x0d, %al			# New line
+	call	prtchr
+	movb	$0x0a, %al
+	call	prtchr
+	incb	%dl				# Next character
+	cmpb	$0x3a, %dl
+	jnz	lm1
+
+	movb	$0x61, %dl
+	jmp	lm1
+
+lm2:	leaw	prompt, %si			# Mode prompt
+	call	prtstr
+	leaw	edit_buf, %di			# Editor buffer
+lm3:	call	getkey
+	cmpb	$0x0d, %al			# Enter?
+	jz	lment
+
+	cmpb	$0x08, %al			# Backspace?
+	jz	lmbs
+
+	cmpb	$0x20, %al			# Printable?
+	jc	lm3
+
+	cmpw	$edit_buf+4, %di		# Enough space?
+	jz	lm3
+
+	stosb
+	call	prtchr
+	jmp	lm3
+
+lmbs:	cmpw	$edit_buf, %di			# Backspace
+	jz	lm3
+
+	decw	%di
+	movb	$0x08, %al
+	call	prtchr
+	call	prtspc
+	movb	$0x08, %al
+	call	prtchr
+	jmp	lm3
+	
+lment:	movb	$0, (%di)
+	leaw	crlft, %si
+	call	prtstr
+	leaw	edit_buf, %si
+	cmpb	$0, (%si)			# Empty string = default mode
+	jz	lmdef
+
+	cmpb	$0, 1(%si)			# One character = menu selection
+	jz	mnusel
+
+	cmpw	$0x6373, (%si)			# "scan" => mode scanning
+	jnz	lmhx
+
+	cmpw	$0x6e61, 2(%si)
+	jz	lmscan
+
+lmhx:	xorw	%bx, %bx			# Else => mode ID in hex
+lmhex:	lodsb
+	orb	%al, %al
+	jz	lmuse1
+
+	subb	$0x30, %al
+	jc	lmbad
+
+	cmpb	$10, %al
+	jc	lmhx1
+
+	subb	$7, %al
+	andb	$0xdf, %al
+	cmpb	$10, %al
+	jc	lmbad
+
+	cmpb	$16, %al
+	jnc	lmbad
+
+lmhx1:	shlw	$4, %bx
+	orb	%al, %bl
+	jmp	lmhex
+
+lmuse1:	movw	%bx, %ax
+	jmp	lmuse
+
+mnusel:	lodsb					# Menu selection
+	xorb	%ah, %ah
+	subb	$0x30, %al
+	jc	lmbad
+
+	cmpb	$10, %al
+	jc	lmuse
+	
+	cmpb	$0x61-0x30, %al
+	jc	lmbad
+	
+	subb	$0x61-0x30-10, %al
+	cmpb	$36, %al
+	jnc	lmbad
+
+lmuse:	call	mode_set
+	jc	lmdef
+
+lmbad:	leaw	unknt, %si
+	call	prtstr
+	jmp	lm2
+lmscan:	cmpb	$0, adapter			# Scanning only on EGA/VGA
+	jz	lmbad
+
+	movw	$0, mt_end			# Scanning of modes is
+	movb	$1, scanning			# done as new autodetection.
+	call	mode_table
+	jmp	listm0
+lmdef:	ret
+
+# Additional parts of mode_set... (relative jumps, you know)
+setv7:						# Video7 extended modes
+	DO_STORE
+	subb	$VIDEO_FIRST_V7>>8, %bh
+	movw	$0x6f05, %ax
+	int	$0x10
+	stc
+	ret
+
+_setrec:	jmp	setrec			# Ugly...
+_set_80x25:	jmp	set_80x25
+
+# Aliases for backward compatibility.
+setalias:
+	movw	$VIDEO_80x25, %ax
+	incw	%bx
+	jz	mode_set
+
+	movb	$VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
+	incw	%bx
+	jnz	setbad				# Fall-through!
+
+# Setting of user mode (AX=mode ID) => CF=success
+mode_set:
+	movw	%ax, %fs:(0x01fa)		# Store mode for use in acpi_wakeup.S
+	movw	%ax, %bx
+	cmpb	$0xff, %ah
+	jz	setalias
+
+	testb	$VIDEO_RECALC>>8, %ah
+	jnz	_setrec
+
+	cmpb	$VIDEO_FIRST_RESOLUTION>>8, %ah
+	jnc	setres
+	
+	cmpb	$VIDEO_FIRST_SPECIAL>>8, %ah
+	jz	setspc
+	
+	cmpb	$VIDEO_FIRST_V7>>8, %ah
+	jz	setv7
+	
+	cmpb	$VIDEO_FIRST_VESA>>8, %ah
+	jnc	check_vesa
+	
+	orb	%ah, %ah
+	jz	setmenu
+	
+	decb	%ah
+	jz	setbios
+
+setbad:	clc
+	movb	$0, do_restore			# The screen needn't be restored
+	ret
+
+setvesa:
+	DO_STORE
+	subb	$VIDEO_FIRST_VESA>>8, %bh
+	movw	$0x4f02, %ax			# VESA BIOS mode set call
+	int	$0x10
+	cmpw	$0x004f, %ax			# AL=4f if implemented
+	jnz	setbad				# AH=0 if OK
+
+	stc
+	ret
+
+setbios:
+	DO_STORE
+	int	$0x10				# Standard BIOS mode set call
+	pushw	%bx
+	movb	$0x0f, %ah			# Check if really set
+	int	$0x10
+	popw	%bx
+	cmpb	%bl, %al
+	jnz	setbad
+	
+	stc
+	ret
+
+setspc:	xorb	%bh, %bh			# Set special mode
+	cmpb	$VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
+	jnc	setbad
+	
+	addw	%bx, %bx
+	jmp	*spec_inits(%bx)
+
+setmenu:
+	orb	%al, %al			# 80x25 is an exception
+	jz	_set_80x25
+	
+	pushw	%bx				# Set mode chosen from menu
+	call	mode_table			# Build the mode table
+	popw	%ax
+	shlw	$2, %ax
+	addw	%ax, %si
+	cmpw	%di, %si
+	jnc	setbad
+	
+	movw	(%si), %ax			# Fetch mode ID
+_m_s:	jmp	mode_set
+
+setres:	pushw	%bx				# Set mode chosen by resolution
+	call	mode_table
+	popw	%bx
+	xchgb	%bl, %bh
+setr1:	lodsw
+	cmpw	$ASK_VGA, %ax			# End of the list?
+	jz	setbad
+	
+	lodsw
+	cmpw	%bx, %ax
+	jnz	setr1
+	
+	movw	-4(%si), %ax			# Fetch mode ID
+	jmp	_m_s
+
+check_vesa:
+	leaw	modelist+1024, %di
+	subb	$VIDEO_FIRST_VESA>>8, %bh
+	movw	%bx, %cx			# Get mode information structure
+	movw	$0x4f01, %ax
+	int	$0x10
+	addb	$VIDEO_FIRST_VESA>>8, %bh
+	cmpw	$0x004f, %ax
+	jnz	setbad
+
+	movb	(%di), %al			# Check capabilities.
+	andb	$0x19, %al
+	cmpb	$0x09, %al
+	jz	setvesa				# This is a text mode
+
+	movb	(%di), %al			# Check capabilities.
+	andb	$0x99, %al
+	cmpb	$0x99, %al
+	jnz	_setbad				# Doh! No linear frame buffer.
+
+	subb	$VIDEO_FIRST_VESA>>8, %bh
+	orw	$0x4000, %bx			# Use linear frame buffer
+	movw	$0x4f02, %ax			# VESA BIOS mode set call
+	int	$0x10
+	cmpw	$0x004f, %ax			# AL=4f if implemented
+	jnz	_setbad				# AH=0 if OK
+
+	movb	$1, graphic_mode		# flag graphic mode
+	movb	$0, do_restore			# no screen restore
+	stc
+	ret
+
+_setbad:	jmp	setbad          	# Ugly...
+
+# Recalculate vertical display end registers -- this fixes various
+# inconsistencies of extended modes on many adapters. Called when
+# the VIDEO_RECALC flag is set in the mode ID.
+
+setrec:	subb	$VIDEO_RECALC>>8, %ah		# Set the base mode
+	call	mode_set
+	jnc	rct3
+
+	movw	%gs:(0x485), %ax		# Font size in pixels
+	movb	%gs:(0x484), %bl		# Number of rows
+	incb	%bl
+	mulb	%bl				# Number of visible
+	decw	%ax				# scan lines - 1
+	movw	$0x3d4, %dx
+	movw	%ax, %bx
+	movb	$0x12, %al			# Lower 8 bits
+	movb	%bl, %ah
+	outw	%ax, %dx
+	movb	$0x07, %al		# Bits 8 and 9 in the overflow register
+	call	inidx
+	xchgb	%al, %ah
+	andb	$0xbd, %ah
+	shrb	%bh
+	jnc	rct1
+	orb	$0x02, %ah
+rct1:	shrb	%bh
+	jnc	rct2
+	orb	$0x40, %ah
+rct2:	movb	$0x07, %al
+	outw	%ax, %dx
+	stc
+rct3:	ret
+
+# Table of routines for setting of the special modes.
+spec_inits:
+	.word	set_80x25
+	.word	set_8pixel
+	.word	set_80x43
+	.word	set_80x28
+	.word	set_current
+	.word	set_80x30
+	.word	set_80x34
+	.word	set_80x60
+	.word	set_gfx
+
+# Set the 80x25 mode. If already set, do nothing.
+set_80x25:
+	movw	$0x5019, force_size		# Override possibly broken BIOS
+use_80x25:
+#ifdef CONFIG_VIDEO_400_HACK
+	movw	$0x1202, %ax			# Force 400 scan lines
+	movb	$0x30, %bl
+	int	$0x10
+#else
+	movb	$0x0f, %ah			# Get current mode ID
+	int	$0x10
+	cmpw	$0x5007, %ax	# Mode 7 (80x25 mono) is the only one available
+	jz	st80		# on CGA/MDA/HGA and is also available on EGAM
+
+	cmpw	$0x5003, %ax	# Unknown mode, force 80x25 color
+	jnz	force3
+
+st80:	cmpb	$0, adapter	# CGA/MDA/HGA => mode 3/7 is always 80x25
+	jz	set80
+
+	movb	%gs:(0x0484), %al	# This is EGA+ -- beware of 80x50 etc.
+	orb	%al, %al		# Some buggy BIOS'es set 0 rows
+	jz	set80
+	
+	cmpb	$24, %al		# It's hopefully correct
+	jz	set80
+#endif /* CONFIG_VIDEO_400_HACK */
+force3:	DO_STORE
+	movw	$0x0003, %ax			# Forced set
+	int	$0x10
+set80:	stc
+	ret
+
+# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
+set_8pixel:
+	DO_STORE
+	call	use_80x25			# The base is 80x25
+set_8pt:
+	movw	$0x1112, %ax			# Use 8x8 font
+	xorb	%bl, %bl
+	int	$0x10
+	movw	$0x1200, %ax			# Use alternate print screen
+	movb	$0x20, %bl
+	int	$0x10
+	movw	$0x1201, %ax			# Turn off cursor emulation
+	movb	$0x34, %bl
+	int	$0x10
+	movb	$0x01, %ah			# Define cursor scan lines 6-7
+	movw	$0x0607, %cx
+	int	$0x10
+set_current:
+	stc
+	ret
+
+# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
+# 80x25 mode with 14-point fonts instead of 16-point.
+set_80x28:
+	DO_STORE
+	call	use_80x25			# The base is 80x25
+set14:	movw	$0x1111, %ax			# Use 9x14 font
+	xorb	%bl, %bl
+	int	$0x10
+	movb	$0x01, %ah			# Define cursor scan lines 11-12
+	movw	$0x0b0c, %cx
+	int	$0x10
+	stc
+	ret
+
+# Set the 80x43 mode. This mode is works on all VGA's.
+# It's a 350-scanline mode with 8-pixel font.
+set_80x43:
+	DO_STORE
+	movw	$0x1201, %ax			# Set 350 scans
+	movb	$0x30, %bl
+	int	$0x10
+	movw	$0x0003, %ax			# Reset video mode
+	int	$0x10
+	jmp	set_8pt				# Use 8-pixel font
+
+# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
+set_80x30:
+	call	use_80x25			# Start with real 80x25
+	DO_STORE
+	movw	$0x3cc, %dx			# Get CRTC port
+	inb	%dx, %al
+	movb	$0xd4, %dl
+	rorb	%al				# Mono or color?
+	jc	set48a
+
+	movb	$0xb4, %dl
+set48a:	movw	$0x0c11, %ax		# Vertical sync end (also unlocks CR0-7)
+ 	call	outidx
+	movw	$0x0b06, %ax			# Vertical total
+ 	call	outidx
+	movw	$0x3e07, %ax			# (Vertical) overflow
+ 	call	outidx
+	movw	$0xea10, %ax			# Vertical sync start
+ 	call	outidx
+	movw	$0xdf12, %ax			# Vertical display end
+	call	outidx
+	movw	$0xe715, %ax			# Vertical blank start
+ 	call	outidx
+	movw	$0x0416, %ax			# Vertical blank end
+ 	call	outidx
+	pushw	%dx
+	movb	$0xcc, %dl			# Misc output register (read)
+ 	inb	%dx, %al
+ 	movb	$0xc2, %dl			# (write)
+ 	andb	$0x0d, %al	# Preserve clock select bits and color bit
+ 	orb	$0xe2, %al			# Set correct sync polarity
+ 	outb	%al, %dx
+	popw	%dx
+	movw	$0x501e, force_size
+	stc					# That's all.
+	ret
+
+# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
+set_80x34:
+	call	set_80x30			# Set 480 scans
+	call	set14				# And 14-pt font
+	movw	$0xdb12, %ax			# VGA vertical display end
+	movw	$0x5022, force_size
+setvde:	call	outidx
+	stc
+	ret
+
+# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
+set_80x60:
+	call	set_80x30			# Set 480 scans
+	call	set_8pt				# And 8-pt font
+	movw	$0xdf12, %ax			# VGA vertical display end
+	movw	$0x503c, force_size
+	jmp	setvde
+
+# Special hack for ThinkPad graphics
+set_gfx:
+#ifdef CONFIG_VIDEO_GFX_HACK
+	movw	$VIDEO_GFX_BIOS_AX, %ax
+	movw	$VIDEO_GFX_BIOS_BX, %bx
+	int	$0x10
+	movw	$VIDEO_GFX_DUMMY_RESOLUTION, force_size
+	stc
+#endif
+	ret
+
+#ifdef CONFIG_VIDEO_RETAIN
+
+# Store screen contents to temporary buffer.
+store_screen:
+	cmpb	$0, do_restore			# Already stored?
+	jnz	stsr
+
+	testb	$CAN_USE_HEAP, loadflags	# Have we space for storing?
+	jz	stsr
+	
+	pushw	%ax
+	pushw	%bx
+	pushw	force_size			# Don't force specific size
+	movw	$0, force_size
+	call	mode_params			# Obtain params of current mode
+	popw	force_size
+	movb	%fs:(PARAM_VIDEO_LINES), %ah
+	movb	%fs:(PARAM_VIDEO_COLS), %al
+	movw	%ax, %bx			# BX=dimensions
+	mulb	%ah
+	movw	%ax, %cx			# CX=number of characters
+	addw	%ax, %ax			# Calculate image size
+	addw	$modelist+1024+4, %ax
+	cmpw	heap_end_ptr, %ax
+	jnc	sts1				# Unfortunately, out of memory
+
+	movw	%fs:(PARAM_CURSOR_POS), %ax	# Store mode params
+	leaw	modelist+1024, %di
+	stosw
+	movw	%bx, %ax
+	stosw
+	pushw	%ds				# Store the screen
+	movw	video_segment, %ds
+	xorw	%si, %si
+	rep
+	movsw
+	popw	%ds
+	incb	do_restore			# Screen will be restored later
+sts1:	popw	%bx
+	popw	%ax
+stsr:	ret
+
+# Restore screen contents from temporary buffer.
+restore_screen:
+	cmpb	$0, do_restore			# Has the screen been stored?
+	jz	res1
+
+	call	mode_params			# Get parameters of current mode
+	movb	%fs:(PARAM_VIDEO_LINES), %cl
+	movb	%fs:(PARAM_VIDEO_COLS), %ch
+	leaw	modelist+1024, %si		# Screen buffer
+	lodsw					# Set cursor position
+	movw	%ax, %dx
+	cmpb	%cl, %dh
+	jc	res2
+	
+	movb	%cl, %dh
+	decb	%dh
+res2:	cmpb	%ch, %dl
+	jc	res3
+	
+	movb	%ch, %dl
+	decb	%dl
+res3:	movb	$0x02, %ah
+	movb	$0x00, %bh
+	int	$0x10
+	lodsw					# Display size
+	movb	%ah, %dl			# DL=number of lines
+	movb	$0, %ah				# BX=phys. length of orig. line
+	movw	%ax, %bx
+	cmpb	%cl, %dl			# Too many?
+	jc	res4
+
+	pushw	%ax
+	movb	%dl, %al
+	subb	%cl, %al
+	mulb	%bl
+	addw	%ax, %si
+	addw	%ax, %si
+	popw	%ax
+	movb	%cl, %dl
+res4:	cmpb	%ch, %al			# Too wide?
+	jc	res5
+	
+	movb	%ch, %al			# AX=width of src. line
+res5:	movb	$0, %cl
+	xchgb	%ch, %cl
+	movw	%cx, %bp			# BP=width of dest. line
+	pushw	%es
+	movw	video_segment, %es
+	xorw	%di, %di			# Move the data
+	addw	%bx, %bx			# Convert BX and BP to _bytes_
+	addw	%bp, %bp
+res6:	pushw	%si
+	pushw	%di
+	movw	%ax, %cx
+	rep
+	movsw
+	popw	%di
+	popw	%si
+	addw	%bp, %di
+	addw	%bx, %si
+	decb	%dl
+	jnz	res6
+	
+	popw	%es				# Done
+res1:	ret
+#endif /* CONFIG_VIDEO_RETAIN */
+
+# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
+outidx:	outb	%al, %dx
+	pushw	%ax
+	movb	%ah, %al
+	incw	%dx
+	outb	%al, %dx
+	decw	%dx
+	popw	%ax
+	ret
+
+# Build the table of video modes (stored after the setup.S code at the
+# `modelist' label. Each video mode record looks like:
+#	.word	MODE-ID		(our special mode ID (see above))
+#	.byte	rows		(number of rows)
+#	.byte	columns		(number of columns)
+# Returns address of the end of the table in DI, the end is marked
+# with a ASK_VGA ID.
+mode_table:
+	movw	mt_end, %di			# Already filled?
+	orw	%di, %di
+	jnz	mtab1x
+	
+	leaw	modelist, %di			# Store standard modes:
+	movl	$VIDEO_80x25 + 0x50190000, %eax	# The 80x25 mode (ALL)
+	stosl
+	movb	adapter, %al			# CGA/MDA/HGA -- no more modes
+	orb	%al, %al
+	jz	mtabe
+	
+	decb	%al
+	jnz	mtabv
+	
+	movl	$VIDEO_8POINT + 0x502b0000, %eax	# The 80x43 EGA mode
+	stosl
+	jmp	mtabe
+
+mtab1x:	jmp	mtab1
+
+mtabv:	leaw	vga_modes, %si			# All modes for std VGA
+	movw	$vga_modes_end-vga_modes, %cx
+	rep	# I'm unable to use movsw as I don't know how to store a half
+	movsb	# of the expression above to cx without using explicit shr.
+
+	cmpb	$0, scanning			# Mode scan requested?
+	jz	mscan1
+	
+	call	mode_scan
+mscan1:
+
+#ifdef CONFIG_VIDEO_LOCAL
+	call	local_modes
+#endif /* CONFIG_VIDEO_LOCAL */
+
+#ifdef CONFIG_VIDEO_VESA
+	call	vesa_modes			# Detect VESA VGA modes
+#endif /* CONFIG_VIDEO_VESA */
+
+#ifdef CONFIG_VIDEO_SVGA
+	cmpb	$0, scanning			# Bypass when scanning
+	jnz	mscan2
+	
+	call	svga_modes			# Detect SVGA cards & modes
+mscan2:
+#endif /* CONFIG_VIDEO_SVGA */
+
+mtabe:
+
+#ifdef CONFIG_VIDEO_COMPACT
+	leaw	modelist, %si
+	movw	%di, %dx
+	movw	%si, %di
+cmt1:	cmpw	%dx, %si			# Scan all modes
+	jz	cmt2
+
+	leaw	modelist, %bx			# Find in previous entries
+	movw	2(%si), %cx
+cmt3:	cmpw	%bx, %si
+	jz	cmt4
+
+	cmpw	2(%bx), %cx			# Found => don't copy this entry
+	jz	cmt5
+
+	addw	$4, %bx
+	jmp	cmt3
+
+cmt4:	movsl					# Copy entry
+	jmp	cmt1
+
+cmt5:	addw	$4, %si				# Skip entry
+	jmp	cmt1
+
+cmt2:
+#endif	/* CONFIG_VIDEO_COMPACT */
+
+	movw	$ASK_VGA, (%di)			# End marker
+	movw	%di, mt_end
+mtab1:	leaw	modelist, %si			# SI=mode list, DI=list end
+ret0:	ret
+
+# Modes usable on all standard VGAs
+vga_modes:
+	.word	VIDEO_8POINT
+	.word	0x5032				# 80x50
+	.word	VIDEO_80x43
+	.word	0x502b				# 80x43
+	.word	VIDEO_80x28
+	.word	0x501c				# 80x28
+	.word	VIDEO_80x30
+	.word	0x501e				# 80x30
+	.word	VIDEO_80x34
+	.word	0x5022				# 80x34
+	.word	VIDEO_80x60
+	.word	0x503c				# 80x60
+#ifdef CONFIG_VIDEO_GFX_HACK
+	.word	VIDEO_GFX_HACK
+	.word	VIDEO_GFX_DUMMY_RESOLUTION
+#endif
+
+vga_modes_end:
+# Detect VESA modes.
+
+#ifdef CONFIG_VIDEO_VESA
+vesa_modes:
+	cmpb	$2, adapter			# VGA only
+	jnz	ret0
+
+	movw	%di, %bp			# BP=original mode table end
+	addw	$0x200, %di			# Buffer space
+	movw	$0x4f00, %ax			# VESA Get card info call
+	int	$0x10
+	movw	%bp, %di
+	cmpw	$0x004f, %ax			# Successful?
+	jnz	ret0
+	
+	cmpw	$0x4556, 0x200(%di)
+	jnz	ret0
+	
+	cmpw	$0x4153, 0x202(%di)
+	jnz	ret0
+	
+	movw	$vesa_name, card_name		# Set name to "VESA VGA"
+	pushw	%gs
+	lgsw	0x20e(%di), %si			# GS:SI=mode list
+	movw	$128, %cx			# Iteration limit
+vesa1:
+# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
+# XXX:	lodsw	%gs:(%si), %ax			# Get next mode in the list
+	gs; lodsw
+	cmpw	$0xffff, %ax			# End of the table?
+	jz	vesar
+	
+	cmpw	$0x0080, %ax			# Check validity of mode ID
+	jc	vesa2
+	
+	orb	%ah, %ah		# Valid IDs: 0x0000-0x007f/0x0100-0x07ff
+	jz	vesan			# Certain BIOSes report 0x80-0xff!
+
+	cmpw	$0x0800, %ax
+	jnc	vesae
+
+vesa2:	pushw	%cx
+	movw	%ax, %cx			# Get mode information structure
+	movw	$0x4f01, %ax
+	int	$0x10
+	movw	%cx, %bx			# BX=mode number
+	addb	$VIDEO_FIRST_VESA>>8, %bh
+	popw	%cx
+	cmpw	$0x004f, %ax
+	jnz	vesan			# Don't report errors (buggy BIOSES)
+
+	movb	(%di), %al			# Check capabilities. We require
+	andb	$0x19, %al			# a color text mode.
+	cmpb	$0x09, %al
+	jnz	vesan
+	
+	cmpw	$0xb800, 8(%di)		# Standard video memory address required
+	jnz	vesan
+
+	testb	$2, (%di)			# Mode characteristics supplied?
+	movw	%bx, (%di)			# Store mode number
+	jz	vesa3
+	
+	xorw	%dx, %dx
+	movw	0x12(%di), %bx			# Width
+	orb	%bh, %bh
+	jnz	vesan
+	
+	movb	%bl, 0x3(%di)
+	movw	0x14(%di), %ax			# Height
+	orb	%ah, %ah
+	jnz	vesan
+	
+	movb	%al, 2(%di)
+	mulb	%bl
+	cmpw	$8193, %ax		# Small enough for Linux console driver?
+	jnc	vesan
+
+	jmp	vesaok
+
+vesa3:	subw	$0x8108, %bx	# This mode has no detailed info specified,
+	jc	vesan		# so it must be a standard VESA mode.
+
+	cmpw	$5, %bx
+	jnc	vesan
+
+	movw	vesa_text_mode_table(%bx), %ax
+	movw	%ax, 2(%di)
+vesaok:	addw	$4, %di				# The mode is valid. Store it.
+vesan:	loop	vesa1			# Next mode. Limit exceeded => error
+vesae:	leaw	vesaer, %si
+	call	prtstr
+	movw	%bp, %di			# Discard already found modes.
+vesar:	popw	%gs
+	ret
+
+# Dimensions of standard VESA text modes
+vesa_text_mode_table:
+	.byte	60, 80				# 0108
+	.byte	25, 132				# 0109
+	.byte	43, 132				# 010A
+	.byte	50, 132				# 010B
+	.byte	60, 132				# 010C
+#endif	/* CONFIG_VIDEO_VESA */
+
+# Scan for video modes. A bit dirty, but should work.
+mode_scan:
+	movw	$0x0100, %cx			# Start with mode 0
+scm1:	movb	$0, %ah				# Test the mode
+	movb	%cl, %al
+	int	$0x10
+	movb	$0x0f, %ah
+	int	$0x10
+	cmpb	%cl, %al
+	jnz	scm2				# Mode not set
+
+	movw	$0x3c0, %dx			# Test if it's a text mode
+	movb	$0x10, %al			# Mode bits
+	call	inidx
+	andb	$0x03, %al
+	jnz	scm2
+	
+	movb	$0xce, %dl			# Another set of mode bits
+	movb	$0x06, %al
+	call	inidx
+	shrb	%al
+	jc	scm2
+	
+	movb	$0xd4, %dl			# Cursor location
+	movb	$0x0f, %al
+	call	inidx
+	orb	%al, %al
+	jnz	scm2
+	
+	movw	%cx, %ax			# Ok, store the mode
+	stosw
+	movb	%gs:(0x484), %al		# Number of rows
+	incb	%al
+	stosb
+	movw	%gs:(0x44a), %ax		# Number of columns
+	stosb
+scm2:	incb	%cl
+	jns	scm1
+	
+	movw	$0x0003, %ax			# Return back to mode 3
+	int	$0x10
+	ret
+
+tstidx:	outw	%ax, %dx			# OUT DX,AX and inidx
+inidx:	outb	%al, %dx			# Read from indexed VGA register
+	incw	%dx			# AL=index, DX=index reg port -> AL=data
+	inb	%dx, %al
+	decw	%dx
+	ret
+
+# Try to detect type of SVGA card and supply (usually approximate) video
+# mode table for it.
+
+#ifdef CONFIG_VIDEO_SVGA
+svga_modes:
+	leaw	svga_table, %si			# Test all known SVGA adapters
+dosvga:	lodsw
+	movw	%ax, %bp			# Default mode table
+	orw	%ax, %ax
+	jz	didsv1
+
+	lodsw					# Pointer to test routine
+	pushw	%si
+	pushw	%di
+	pushw	%es
+	movw	$0xc000, %bx
+	movw	%bx, %es
+	call	*%ax				# Call test routine
+	popw	%es
+	popw	%di
+	popw	%si
+	orw	%bp, %bp
+	jz	dosvga
+	
+	movw	%bp, %si			# Found, copy the modes
+	movb	svga_prefix, %ah
+cpsvga:	lodsb
+	orb	%al, %al
+	jz	didsv
+	
+	stosw
+	movsw
+	jmp	cpsvga
+
+didsv:	movw	%si, card_name			# Store pointer to card name
+didsv1:	ret
+
+# Table of all known SVGA cards. For each card, we store a pointer to
+# a table of video modes supported by the card and a pointer to a routine
+# used for testing of presence of the card. The video mode table is always
+# followed by the name of the card or the chipset.
+svga_table:
+	.word	ati_md, ati_test
+	.word	oak_md, oak_test
+	.word	paradise_md, paradise_test
+	.word	realtek_md, realtek_test
+	.word	s3_md, s3_test
+	.word	chips_md, chips_test
+	.word	video7_md, video7_test
+	.word	cirrus5_md, cirrus5_test
+	.word	cirrus6_md, cirrus6_test
+	.word	cirrus1_md, cirrus1_test
+	.word	ahead_md, ahead_test
+	.word	everex_md, everex_test
+	.word	genoa_md, genoa_test
+	.word	trident_md, trident_test
+	.word	tseng_md, tseng_test
+	.word	0
+
+# Test routines and mode tables:
+
+# S3 - The test algorithm was taken from the SuperProbe package
+# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
+s3_test:
+	movw	$0x0f35, %cx	# we store some constants in cl/ch
+	movw	$0x03d4, %dx
+	movb	$0x38, %al
+	call	inidx
+	movb	%al, %bh	# store current CRT-register 0x38
+	movw	$0x0038, %ax
+	call	outidx		# disable writing to special regs
+	movb	%cl, %al	# check whether we can write special reg 0x35
+	call	inidx
+	movb	%al, %bl	# save the current value of CRT reg 0x35
+	andb	$0xf0, %al	# clear bits 0-3
+	movb	%al, %ah
+	movb	%cl, %al	# and write it to CRT reg 0x35
+	call	outidx
+	call	inidx		# now read it back
+	andb	%ch, %al	# clear the upper 4 bits
+	jz	s3_2		# the first test failed. But we have a
+
+	movb	%bl, %ah	# second chance
+	movb	%cl, %al
+	call	outidx
+	jmp	s3_1		# do the other tests
+
+s3_2:	movw	%cx, %ax	# load ah with 0xf and al with 0x35
+	orb	%bl, %ah	# set the upper 4 bits of ah with the orig value
+	call	outidx		# write ...
+	call	inidx		# ... and reread 
+	andb	%cl, %al	# turn off the upper 4 bits
+	pushw	%ax
+	movb	%bl, %ah	# restore old value in register 0x35
+	movb	%cl, %al
+	call	outidx
+	popw	%ax
+	cmpb	%ch, %al	# setting lower 4 bits was successful => bad
+	je	no_s3		# writing is allowed => this is not an S3
+
+s3_1:	movw	$0x4838, %ax	# allow writing to special regs by putting
+	call	outidx		# magic number into CRT-register 0x38
+	movb	%cl, %al	# check whether we can write special reg 0x35
+	call	inidx
+	movb	%al, %bl
+	andb	$0xf0, %al
+	movb	%al, %ah
+	movb	%cl, %al
+	call	outidx
+	call	inidx
+	andb	%ch, %al
+	jnz	no_s3		# no, we can't write => no S3
+
+	movw	%cx, %ax
+	orb	%bl, %ah
+	call	outidx
+	call	inidx
+	andb	%ch, %al
+	pushw	%ax
+	movb	%bl, %ah	# restore old value in register 0x35
+	movb	%cl, %al
+	call	outidx
+	popw	%ax
+	cmpb	%ch, %al
+	jne	no_s31		# writing not possible => no S3
+	movb	$0x30, %al
+	call	inidx		# now get the S3 id ...
+	leaw	idS3, %di
+	movw	$0x10, %cx
+	repne
+	scasb
+	je	no_s31
+
+	movb	%bh, %ah
+	movb	$0x38, %al
+	jmp	s3rest
+
+no_s3:	movb	$0x35, %al	# restore CRT register 0x35
+	movb	%bl, %ah
+	call	outidx
+no_s31:	xorw	%bp, %bp	# Detection failed
+s3rest:	movb	%bh, %ah
+	movb	$0x38, %al	# restore old value of CRT register 0x38
+	jmp	outidx
+
+idS3:	.byte	0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
+	.byte	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
+
+s3_md:	.byte	0x54, 0x2b, 0x84
+	.byte	0x55, 0x19, 0x84
+	.byte	0
+	.ascii	"S3"
+	.byte	0
+
+# ATI cards.
+ati_test:
+	leaw 	idati, %si
+	movw	$0x31, %di
+	movw	$0x09, %cx
+	repe
+	cmpsb
+	je	atiok
+
+	xorw	%bp, %bp
+atiok:	ret
+
+idati:	.ascii	"761295520"
+
+ati_md:	.byte	0x23, 0x19, 0x84
+	.byte	0x33, 0x2c, 0x84
+	.byte	0x22, 0x1e, 0x64
+	.byte	0x21, 0x19, 0x64
+	.byte	0x58, 0x21, 0x50
+	.byte	0x5b, 0x1e, 0x50
+	.byte	0
+	.ascii	"ATI"
+	.byte	0
+
+# AHEAD
+ahead_test:
+	movw	$0x200f, %ax
+	movw	$0x3ce, %dx
+	outw	%ax, %dx
+	incw	%dx
+	inb	%dx, %al
+	cmpb	$0x20, %al
+	je	isahed
+
+	cmpb	$0x21, %al
+	je	isahed
+	
+	xorw	%bp, %bp
+isahed:	ret
+
+ahead_md:
+	.byte	0x22, 0x2c, 0x84
+	.byte	0x23, 0x19, 0x84
+	.byte	0x24, 0x1c, 0x84
+	.byte	0x2f, 0x32, 0xa0
+	.byte	0x32, 0x22, 0x50
+	.byte	0x34, 0x42, 0x50
+	.byte	0
+	.ascii	"Ahead"
+	.byte	0
+
+# Chips & Tech.
+chips_test:
+	movw	$0x3c3, %dx
+	inb	%dx, %al
+	orb	$0x10, %al
+	outb	%al, %dx
+	movw	$0x104, %dx
+	inb	%dx, %al
+	movb	%al, %bl
+	movw	$0x3c3, %dx
+	inb	%dx, %al
+	andb	$0xef, %al
+	outb	%al, %dx
+	cmpb	$0xa5, %bl
+	je	cantok
+	
+	xorw	%bp, %bp
+cantok:	ret
+
+chips_md:
+	.byte	0x60, 0x19, 0x84
+	.byte	0x61, 0x32, 0x84
+	.byte	0
+	.ascii	"Chips & Technologies"
+	.byte	0
+
+# Cirrus Logic 5X0
+cirrus1_test:
+	movw	$0x3d4, %dx
+	movb	$0x0c, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	movb	%al, %bl
+	xorb	%al, %al
+	outb	%al, %dx
+	decw	%dx
+	movb	$0x1f, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	movb	%al, %bh
+	xorb	%ah, %ah
+	shlb	$4, %al
+	movw	%ax, %cx
+	movb	%bh, %al
+	shrb	$4, %al
+	addw	%ax, %cx
+	shlw	$8, %cx
+	addw	$6, %cx
+	movw	%cx, %ax
+	movw	$0x3c4, %dx
+	outw	%ax, %dx
+	incw	%dx
+	inb	%dx, %al
+	andb	%al, %al
+	jnz	nocirr
+	
+	movb	%bh, %al
+	outb	%al, %dx
+	inb	%dx, %al
+	cmpb	$0x01, %al
+	je	iscirr
+
+nocirr:	xorw	%bp, %bp
+iscirr: movw	$0x3d4, %dx
+	movb	%bl, %al
+	xorb	%ah, %ah
+	shlw	$8, %ax
+	addw	$0x0c, %ax
+	outw	%ax, %dx
+	ret
+
+cirrus1_md:
+	.byte	0x1f, 0x19, 0x84
+	.byte	0x20, 0x2c, 0x84
+	.byte	0x22, 0x1e, 0x84
+	.byte	0x31, 0x25, 0x64
+	.byte	0
+	.ascii	"Cirrus Logic 5X0"
+	.byte	0
+
+# Cirrus Logic 54XX
+cirrus5_test:
+	movw	$0x3c4, %dx
+	movb	$6, %al
+	call	inidx
+	movb	%al, %bl			# BL=backup
+	movw	$6, %ax
+	call	tstidx
+	cmpb	$0x0f, %al
+	jne	c5fail
+	
+	movw	$0x1206, %ax
+	call	tstidx
+	cmpb	$0x12, %al
+	jne	c5fail
+	
+	movb	$0x1e, %al
+	call	inidx
+	movb	%al, %bh
+	movb	%bh, %ah
+	andb	$0xc0, %ah
+	movb	$0x1e, %al
+	call	tstidx
+	andb	$0x3f, %al
+	jne	c5xx
+	
+	movb	$0x1e, %al
+	movb	%bh, %ah
+	orb	$0x3f, %ah
+	call	tstidx
+	xorb	$0x3f, %al
+	andb	$0x3f, %al
+c5xx:	pushf
+	movb	$0x1e, %al
+	movb	%bh, %ah
+	outw	%ax, %dx
+	popf
+	je	c5done
+
+c5fail:	xorw	%bp, %bp
+c5done:	movb	$6, %al
+	movb	%bl, %ah
+	outw	%ax, %dx
+	ret
+
+cirrus5_md:
+	.byte	0x14, 0x19, 0x84
+	.byte	0x54, 0x2b, 0x84
+	.byte	0
+	.ascii	"Cirrus Logic 54XX"
+	.byte	0
+
+# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
+# it's misidentified by the Ahead test.
+cirrus6_test:
+	movw	$0x3ce, %dx
+	movb	$0x0a, %al
+	call	inidx
+	movb	%al, %bl	# BL=backup
+	movw	$0xce0a, %ax
+	call	tstidx
+	orb	%al, %al
+	jne	c2fail
+	
+	movw	$0xec0a, %ax
+	call	tstidx
+	cmpb	$0x01, %al
+	jne	c2fail
+	
+	movb	$0xaa, %al
+	call	inidx		# 4X, 5X, 7X and 8X are valid 64XX chip ID's. 
+	shrb	$4, %al
+	subb	$4, %al
+	jz	c6done
+	
+	decb	%al
+	jz	c6done
+	
+	subb	$2, %al
+	jz	c6done
+	
+	decb	%al
+	jz	c6done
+	
+c2fail:	xorw	%bp, %bp
+c6done:	movb	$0x0a, %al
+	movb	%bl, %ah
+	outw	%ax, %dx
+	ret
+
+cirrus6_md:
+	.byte	0
+	.ascii	"Cirrus Logic 64XX"
+	.byte	0
+
+# Everex / Trident
+everex_test:
+	movw	$0x7000, %ax
+	xorw	%bx, %bx
+	int	$0x10
+	cmpb	$0x70, %al
+	jne	noevrx
+	
+	shrw	$4, %dx
+	cmpw	$0x678, %dx
+	je	evtrid
+	
+	cmpw	$0x236, %dx
+	jne	evrxok
+
+evtrid:	leaw	trident_md, %bp
+evrxok:	ret
+
+noevrx:	xorw	%bp, %bp
+	ret
+
+everex_md:
+	.byte	0x03, 0x22, 0x50
+	.byte	0x04, 0x3c, 0x50
+	.byte	0x07, 0x2b, 0x64
+	.byte	0x08, 0x4b, 0x64
+	.byte	0x0a, 0x19, 0x84
+	.byte	0x0b, 0x2c, 0x84
+	.byte	0x16, 0x1e, 0x50
+	.byte	0x18, 0x1b, 0x64
+	.byte	0x21, 0x40, 0xa0
+	.byte	0x40, 0x1e, 0x84
+	.byte	0
+	.ascii	"Everex/Trident"
+	.byte	0
+
+# Genoa.
+genoa_test:
+	leaw	idgenoa, %si			# Check Genoa 'clues'
+	xorw	%ax, %ax
+	movb	%es:(0x37), %al
+	movw	%ax, %di
+	movw	$0x04, %cx
+	decw	%si
+	decw	%di
+l1:	incw	%si
+	incw	%di
+	movb	(%si), %al
+	testb	%al, %al
+	jz	l2
+
+	cmpb	%es:(%di), %al
+l2:	loope 	l1
+	orw	%cx, %cx
+	je	isgen
+	
+	xorw	%bp, %bp
+isgen:	ret
+
+idgenoa: .byte	0x77, 0x00, 0x99, 0x66
+
+genoa_md:
+	.byte	0x58, 0x20, 0x50
+	.byte	0x5a, 0x2a, 0x64
+	.byte	0x60, 0x19, 0x84
+	.byte	0x61, 0x1d, 0x84
+	.byte	0x62, 0x20, 0x84
+	.byte	0x63, 0x2c, 0x84
+	.byte	0x64, 0x3c, 0x84
+	.byte	0x6b, 0x4f, 0x64
+	.byte	0x72, 0x3c, 0x50
+	.byte	0x74, 0x42, 0x50
+	.byte	0x78, 0x4b, 0x64
+	.byte	0
+	.ascii	"Genoa"
+	.byte	0
+
+# OAK
+oak_test:
+	leaw	idoakvga, %si
+	movw	$0x08, %di
+	movw	$0x08, %cx
+	repe
+	cmpsb
+	je	isoak
+	
+	xorw	%bp, %bp
+isoak:	ret
+
+idoakvga: .ascii  "OAK VGA "
+
+oak_md: .byte	0x4e, 0x3c, 0x50
+	.byte	0x4f, 0x3c, 0x84
+	.byte	0x50, 0x19, 0x84
+	.byte	0x51, 0x2b, 0x84
+	.byte	0
+	.ascii	"OAK"
+	.byte	0
+
+# WD Paradise.
+paradise_test:
+	leaw	idparadise, %si
+	movw	$0x7d, %di
+	movw	$0x04, %cx
+	repe
+	cmpsb
+	je	ispara
+	
+	xorw	%bp, %bp
+ispara:	ret
+
+idparadise:	.ascii	"VGA="
+
+paradise_md:
+	.byte	0x41, 0x22, 0x50
+	.byte	0x47, 0x1c, 0x84
+	.byte	0x55, 0x19, 0x84
+	.byte	0x54, 0x2c, 0x84
+	.byte	0
+	.ascii	"Paradise"
+	.byte	0
+
+# Trident.
+trident_test:
+	movw	$0x3c4, %dx
+	movb	$0x0e, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	xchgb	%al, %ah
+	xorb	%al, %al
+	outb	%al, %dx
+	inb	%dx, %al
+	xchgb	%ah, %al
+	movb	%al, %bl	# Strange thing ... in the book this wasn't
+	andb	$0x02, %bl	# necessary but it worked on my card which
+	jz	setb2		# is a trident. Without it the screen goes
+				# blurred ...
+	andb	$0xfd, %al
+	jmp	clrb2		
+
+setb2:	orb	$0x02, %al	
+clrb2:	outb	%al, %dx
+	andb	$0x0f, %ah
+	cmpb	$0x02, %ah
+	je	istrid
+
+	xorw	%bp, %bp
+istrid:	ret
+
+trident_md:
+	.byte	0x50, 0x1e, 0x50
+	.byte	0x51, 0x2b, 0x50
+	.byte	0x52, 0x3c, 0x50
+	.byte	0x57, 0x19, 0x84
+	.byte	0x58, 0x1e, 0x84
+	.byte	0x59, 0x2b, 0x84
+	.byte	0x5a, 0x3c, 0x84
+	.byte	0
+	.ascii	"Trident"
+	.byte	0
+
+# Tseng.
+tseng_test:
+	movw	$0x3cd, %dx
+	inb	%dx, %al	# Could things be this simple ! :-)
+	movb	%al, %bl
+	movb	$0x55, %al
+	outb	%al, %dx
+	inb	%dx, %al
+	movb	%al, %ah
+	movb	%bl, %al
+	outb	%al, %dx
+	cmpb	$0x55, %ah
+ 	je	istsen
+
+isnot:	xorw	%bp, %bp
+istsen:	ret
+
+tseng_md:
+	.byte	0x26, 0x3c, 0x50
+	.byte	0x2a, 0x28, 0x64
+	.byte	0x23, 0x19, 0x84
+	.byte	0x24, 0x1c, 0x84
+	.byte	0x22, 0x2c, 0x84
+	.byte	0x21, 0x3c, 0x84
+	.byte	0
+	.ascii	"Tseng"
+	.byte	0
+
+# Video7.
+video7_test:
+	movw	$0x3cc, %dx
+	inb	%dx, %al
+	movw	$0x3b4, %dx
+	andb	$0x01, %al
+	jz	even7
+
+	movw	$0x3d4, %dx
+even7:	movb	$0x0c, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	movb	%al, %bl
+	movb	$0x55, %al
+	outb	%al, %dx
+	inb	%dx, %al
+	decw	%dx
+	movb	$0x1f, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	movb	%al, %bh
+	decw	%dx
+	movb	$0x0c, %al
+	outb	%al, %dx
+	incw	%dx
+	movb	%bl, %al
+	outb	%al, %dx
+	movb	$0x55, %al
+	xorb	$0xea, %al
+	cmpb	%bh, %al
+	jne	isnot
+	
+	movb	$VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
+	ret
+
+video7_md:
+	.byte	0x40, 0x2b, 0x50
+	.byte	0x43, 0x3c, 0x50
+	.byte	0x44, 0x3c, 0x64
+	.byte	0x41, 0x19, 0x84
+	.byte	0x42, 0x2c, 0x84
+	.byte	0x45, 0x1c, 0x84
+	.byte	0
+	.ascii	"Video 7"
+	.byte	0
+
+# Realtek VGA
+realtek_test:
+	leaw	idrtvga, %si
+	movw	$0x45, %di
+	movw	$0x0b, %cx
+	repe
+	cmpsb
+	je	isrt
+	
+	xorw	%bp, %bp
+isrt:	ret
+
+idrtvga:	.ascii	"REALTEK VGA"
+
+realtek_md:
+	.byte	0x1a, 0x3c, 0x50
+	.byte	0x1b, 0x19, 0x84
+	.byte	0x1c, 0x1e, 0x84
+	.byte	0x1d, 0x2b, 0x84
+	.byte	0x1e, 0x3c, 0x84
+	.byte	0
+	.ascii	"REALTEK"
+	.byte	0
+
+#endif	/* CONFIG_VIDEO_SVGA */
+
+# User-defined local mode table (VGA only)
+#ifdef CONFIG_VIDEO_LOCAL
+local_modes:
+	leaw	local_mode_table, %si
+locm1:	lodsw
+	orw	%ax, %ax
+	jz	locm2
+	
+	stosw
+	movsw
+	jmp	locm1
+
+locm2:	ret
+
+# This is the table of local video modes which can be supplied manually
+# by the user. Each entry consists of mode ID (word) and dimensions
+# (byte for column count and another byte for row count). These modes
+# are placed before all SVGA and VESA modes and override them if table
+# compacting is enabled. The table must end with a zero word followed
+# by NUL-terminated video adapter name.
+local_mode_table:
+	.word	0x0100				# Example: 40x25
+	.byte	25,40
+	.word	0
+	.ascii	"Local"
+	.byte	0
+#endif	/* CONFIG_VIDEO_LOCAL */
+
+# Read a key and return the ASCII code in al, scan code in ah
+getkey:	xorb	%ah, %ah
+	int	$0x16
+	ret
+
+# Read a key with a timeout of 30 seconds.
+# The hardware clock is used to get the time.
+getkt:	call	gettime
+	addb	$30, %al			# Wait 30 seconds
+	cmpb	$60, %al
+	jl	lminute
+
+	subb	$60, %al
+lminute:
+	movb	%al, %cl
+again:	movb	$0x01, %ah
+	int	$0x16
+	jnz	getkey				# key pressed, so get it
+
+	call	gettime
+	cmpb	%cl, %al
+	jne	again
+
+	movb	$0x20, %al			# timeout, return `space'
+	ret
+
+# Flush the keyboard buffer
+flush:	movb	$0x01, %ah
+	int	$0x16
+	jz	empty
+	
+	xorb	%ah, %ah
+	int	$0x16
+	jmp	flush
+
+empty:	ret
+
+# Print hexadecimal number.
+prthw:	pushw	%ax
+	movb	%ah, %al
+	call	prthb
+	popw	%ax
+prthb:	pushw	%ax
+	shrb	$4, %al
+	call	prthn
+	popw	%ax
+	andb	$0x0f, %al
+prthn:	cmpb	$0x0a, %al
+	jc	prth1
+
+	addb	$0x07, %al
+prth1:	addb	$0x30, %al
+	jmp	prtchr
+
+# Print decimal number in al
+prtdec:	pushw	%ax
+	pushw	%cx
+	xorb	%ah, %ah
+	movb	$0x0a, %cl
+	idivb	%cl
+	cmpb	$0x09, %al
+	jbe	lt100
+
+	call	prtdec
+	jmp	skip10
+
+lt100:	addb	$0x30, %al
+	call	prtchr
+skip10:	movb	%ah, %al
+	addb	$0x30, %al
+	call	prtchr	
+	popw	%cx
+	popw	%ax
+	ret
+
+store_edid:
+	pushw	%es				# just save all registers 
+	pushw	%ax				
+	pushw	%bx
+	pushw   %cx
+	pushw	%dx
+	pushw   %di
+
+	pushw	%fs                             
+	popw    %es
+
+	movl	$0x13131313, %eax		# memset block with 0x13
+	movw    $32, %cx
+	movw	$0x140, %di
+	cld
+	rep 
+	stosl  
+
+	movw	$0x4f15, %ax                    # do VBE/DDC 
+	movw	$0x01, %bx
+	movw	$0x00, %cx
+	movw    $0x01, %dx
+	movw	$0x140, %di
+	int	$0x10	
+
+	popw	%di				# restore all registers        
+	popw	%dx
+	popw	%cx
+	popw	%bx
+	popw	%ax
+	popw	%es	
+	ret
+
+# VIDEO_SELECT-only variables
+mt_end:		.word	0	# End of video mode table if built
+edit_buf:	.space	6	# Line editor buffer
+card_name:	.word	0	# Pointer to adapter name
+scanning:	.byte	0	# Performing mode scan
+do_restore:	.byte	0	# Screen contents altered during mode change
+svga_prefix:	.byte	VIDEO_FIRST_BIOS>>8	# Default prefix for BIOS modes
+graphic_mode:	.byte	0	# Graphic mode with a linear frame buffer
+dac_size:	.byte	6	# DAC bit depth
+
+# Status messages
+keymsg:		.ascii	"Press <RETURN> to see video modes available, "
+		.ascii	"<SPACE> to continue or wait 30 secs"
+		.byte	0x0d, 0x0a, 0
+
+listhdr:	.byte	0x0d, 0x0a
+		.ascii	"Mode:    COLSxROWS:"
+
+crlft:		.byte	0x0d, 0x0a, 0
+
+prompt:		.byte	0x0d, 0x0a
+		.asciz	"Enter mode number or `scan': "
+
+unknt:		.asciz	"Unknown mode ID. Try again."
+
+badmdt:		.ascii	"You passed an undefined mode number."
+		.byte	0x0d, 0x0a, 0
+
+vesaer:		.ascii	"Error: Scanning of VESA modes failed. Please "
+		.ascii	"report to <mj@ucw.cz>."
+		.byte	0x0d, 0x0a, 0
+
+old_name:	.asciz	"CGA/MDA/HGA"
+
+ega_name:	.asciz	"EGA"
+
+svga_name:	.ascii	" "
+
+vga_name:	.asciz	"VGA"
+
+vesa_name:	.asciz	"VESA"
+
+name_bann:	.asciz	"Video adapter: "
+#endif /* CONFIG_VIDEO_SELECT */
+
+# Other variables:
+adapter:	.byte	0	# Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
+video_segment:	.word	0xb800	# Video memory segment
+force_size:	.word	0	# Use this size instead of the one in BIOS vars
diff --git a/arch/i386/crypto/Makefile b/arch/i386/crypto/Makefile
new file mode 100644
index 00000000000..103c353d0a6
--- /dev/null
+++ b/arch/i386/crypto/Makefile
@@ -0,0 +1,9 @@
+# 
+# i386/crypto/Makefile 
+# 
+# Arch-specific CryptoAPI modules.
+# 
+
+obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
+
+aes-i586-y := aes-i586-asm.o aes.o
diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S
new file mode 100644
index 00000000000..7b73c67cb4e
--- /dev/null
+++ b/arch/i386/crypto/aes-i586-asm.S
@@ -0,0 +1,376 @@
+// -------------------------------------------------------------------------
+// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
+// All rights reserved.
+//
+// LICENSE TERMS
+//
+// The free distribution and use of this software in both source and binary 
+// form is allowed (with or without changes) provided that:
+//
+//   1. distributions of this source code include the above copyright 
+//      notice, this list of conditions and the following disclaimer//
+//
+//   2. distributions in binary form include the above copyright
+//      notice, this list of conditions and the following disclaimer
+//      in the documentation and/or other associated materials//
+//
+//   3. the copyright holder's name is not used to endorse products 
+//      built using this software without specific written permission.
+//
+//
+// ALTERNATIVELY, provided that this notice is retained in full, this product
+// may be distributed under the terms of the GNU General Public License (GPL),
+// in which case the provisions of the GPL apply INSTEAD OF those given above.
+//
+// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
+// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+
+// DISCLAIMER
+//
+// This software is provided 'as is' with no explicit or implied warranties
+// in respect of its properties including, but not limited to, correctness 
+// and fitness for purpose.
+// -------------------------------------------------------------------------
+// Issue Date: 29/07/2002
+
+.file "aes-i586-asm.S"
+.text
+
+// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
+// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
+	
+#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
+
+// offsets to parameters with one register pushed onto stack
+
+#define in_blk    8  // input byte array address parameter
+#define out_blk  12  // output byte array address parameter
+#define ctx      16  // AES context structure
+
+// offsets in context structure
+
+#define ekey     0   // encryption key schedule base address
+#define nrnd   256   // number of rounds
+#define dkey   260   // decryption key schedule base address
+
+// register mapping for encrypt and decrypt subroutines
+
+#define r0  eax
+#define r1  ebx
+#define r2  ecx
+#define r3  edx
+#define r4  esi
+#define r5  edi
+
+#define eaxl  al
+#define eaxh  ah
+#define ebxl  bl
+#define ebxh  bh
+#define ecxl  cl
+#define ecxh  ch
+#define edxl  dl
+#define edxh  dh
+
+#define _h(reg) reg##h
+#define h(reg) _h(reg)
+
+#define _l(reg) reg##l
+#define l(reg) _l(reg)
+
+// This macro takes a 32-bit word representing a column and uses
+// each of its four bytes to index into four tables of 256 32-bit
+// words to obtain values that are then xored into the appropriate
+// output registers r0, r1, r4 or r5.  
+
+// Parameters:
+// table table base address
+//   %1  out_state[0]
+//   %2  out_state[1]
+//   %3  out_state[2]
+//   %4  out_state[3]
+//   idx input register for the round (destroyed)
+//   tmp scratch register for the round
+// sched key schedule
+
+#define do_col(table, a1,a2,a3,a4, idx, tmp)	\
+	movzx   %l(idx),%tmp;			\
+	xor     table(,%tmp,4),%a1;		\
+	movzx   %h(idx),%tmp;			\
+	shr     $16,%idx;			\
+	xor     table+tlen(,%tmp,4),%a2;	\
+	movzx   %l(idx),%tmp;			\
+	movzx   %h(idx),%idx;			\
+	xor     table+2*tlen(,%tmp,4),%a3;	\
+	xor     table+3*tlen(,%idx,4),%a4;
+
+// initialise output registers from the key schedule
+// NB1: original value of a3 is in idx on exit
+// NB2: original values of a1,a2,a4 aren't used
+#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
+	mov     0 sched,%a1;			\
+	movzx   %l(idx),%tmp;			\
+	mov     12 sched,%a2;			\
+	xor     table(,%tmp,4),%a1;		\
+	mov     4 sched,%a4;			\
+	movzx   %h(idx),%tmp;			\
+	shr     $16,%idx;			\
+	xor     table+tlen(,%tmp,4),%a2;	\
+	movzx   %l(idx),%tmp;			\
+	movzx   %h(idx),%idx;			\
+	xor     table+3*tlen(,%idx,4),%a4;	\
+	mov     %a3,%idx;			\
+	mov     8 sched,%a3;			\
+	xor     table+2*tlen(,%tmp,4),%a3;
+
+// initialise output registers from the key schedule
+// NB1: original value of a3 is in idx on exit
+// NB2: original values of a1,a2,a4 aren't used
+#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
+	mov     0 sched,%a1;			\
+	movzx   %l(idx),%tmp;			\
+	mov     4 sched,%a2;			\
+	xor     table(,%tmp,4),%a1;		\
+	mov     12 sched,%a4;			\
+	movzx   %h(idx),%tmp;			\
+	shr     $16,%idx;			\
+	xor     table+tlen(,%tmp,4),%a2;	\
+	movzx   %l(idx),%tmp;			\
+	movzx   %h(idx),%idx;			\
+	xor     table+3*tlen(,%idx,4),%a4;	\
+	mov     %a3,%idx;			\
+	mov     8 sched,%a3;			\
+	xor     table+2*tlen(,%tmp,4),%a3;
+
+
+// original Gladman had conditional saves to MMX regs.
+#define save(a1, a2)		\
+	mov     %a2,4*a1(%esp)
+
+#define restore(a1, a2)		\
+	mov     4*a2(%esp),%a1
+
+// These macros perform a forward encryption cycle. They are entered with
+// the first previous round column values in r0,r1,r4,r5 and
+// exit with the final values in the same registers, using stack
+// for temporary storage.
+
+// round column values
+// on entry: r0,r1,r4,r5
+// on exit:  r2,r1,r4,r5
+#define fwd_rnd1(arg, table)						\
+	save   (0,r1);							\
+	save   (1,r5);							\
+									\
+	/* compute new column values */					\
+	do_fcol(table, r2,r5,r4,r1, r0,r3, arg);	/* idx=r0 */	\
+	do_col (table, r4,r1,r2,r5, r0,r3);		/* idx=r4 */	\
+	restore(r0,0);							\
+	do_col (table, r1,r2,r5,r4, r0,r3);		/* idx=r1 */	\
+	restore(r0,1);							\
+	do_col (table, r5,r4,r1,r2, r0,r3);		/* idx=r5 */
+
+// round column values
+// on entry: r2,r1,r4,r5
+// on exit:  r0,r1,r4,r5
+#define fwd_rnd2(arg, table)						\
+	save   (0,r1);							\
+	save   (1,r5);							\
+									\
+	/* compute new column values */					\
+	do_fcol(table, r0,r5,r4,r1, r2,r3, arg);	/* idx=r2 */	\
+	do_col (table, r4,r1,r0,r5, r2,r3);		/* idx=r4 */	\
+	restore(r2,0);							\
+	do_col (table, r1,r0,r5,r4, r2,r3);		/* idx=r1 */	\
+	restore(r2,1);							\
+	do_col (table, r5,r4,r1,r0, r2,r3);		/* idx=r5 */
+
+// These macros performs an inverse encryption cycle. They are entered with
+// the first previous round column values in r0,r1,r4,r5 and
+// exit with the final values in the same registers, using stack
+// for temporary storage
+
+// round column values
+// on entry: r0,r1,r4,r5
+// on exit:  r2,r1,r4,r5
+#define inv_rnd1(arg, table)						\
+	save    (0,r1);							\
+	save    (1,r5);							\
+									\
+	/* compute new column values */					\
+	do_icol(table, r2,r1,r4,r5, r0,r3, arg);	/* idx=r0 */	\
+	do_col (table, r4,r5,r2,r1, r0,r3);		/* idx=r4 */	\
+	restore(r0,0);							\
+	do_col (table, r1,r4,r5,r2, r0,r3);		/* idx=r1 */	\
+	restore(r0,1);							\
+	do_col (table, r5,r2,r1,r4, r0,r3);		/* idx=r5 */
+
+// round column values
+// on entry: r2,r1,r4,r5
+// on exit:  r0,r1,r4,r5
+#define inv_rnd2(arg, table)						\
+	save    (0,r1);							\
+	save    (1,r5);							\
+									\
+	/* compute new column values */					\
+	do_icol(table, r0,r1,r4,r5, r2,r3, arg);	/* idx=r2 */	\
+	do_col (table, r4,r5,r0,r1, r2,r3);		/* idx=r4 */	\
+	restore(r2,0);							\
+	do_col (table, r1,r4,r5,r0, r2,r3);		/* idx=r1 */	\
+	restore(r2,1);							\
+	do_col (table, r5,r0,r1,r4, r2,r3);		/* idx=r5 */
+
+// AES (Rijndael) Encryption Subroutine
+
+.global  aes_enc_blk
+
+.extern  ft_tab
+.extern  fl_tab
+
+.align 4
+
+aes_enc_blk:
+	push    %ebp
+	mov     ctx(%esp),%ebp      // pointer to context
+
+// CAUTION: the order and the values used in these assigns 
+// rely on the register mappings
+
+1:	push    %ebx
+	mov     in_blk+4(%esp),%r2
+	push    %esi
+	mov     nrnd(%ebp),%r3   // number of rounds
+	push    %edi
+#if ekey != 0
+	lea     ekey(%ebp),%ebp  // key pointer
+#endif
+
+// input four columns and xor in first round key
+
+	mov     (%r2),%r0
+	mov     4(%r2),%r1
+	mov     8(%r2),%r4
+	mov     12(%r2),%r5
+	xor     (%ebp),%r0
+	xor     4(%ebp),%r1
+	xor     8(%ebp),%r4
+	xor     12(%ebp),%r5
+
+	sub     $8,%esp           // space for register saves on stack
+	add     $16,%ebp          // increment to next round key
+	sub     $10,%r3          
+	je      4f              // 10 rounds for 128-bit key
+	add     $32,%ebp
+	sub     $2,%r3
+	je      3f              // 12 rounds for 128-bit key
+	add     $32,%ebp
+
+2:	fwd_rnd1( -64(%ebp) ,ft_tab)	// 14 rounds for 128-bit key
+	fwd_rnd2( -48(%ebp) ,ft_tab)
+3:	fwd_rnd1( -32(%ebp) ,ft_tab)	// 12 rounds for 128-bit key
+	fwd_rnd2( -16(%ebp) ,ft_tab)
+4:	fwd_rnd1(    (%ebp) ,ft_tab)	// 10 rounds for 128-bit key
+	fwd_rnd2( +16(%ebp) ,ft_tab)
+	fwd_rnd1( +32(%ebp) ,ft_tab)
+	fwd_rnd2( +48(%ebp) ,ft_tab)
+	fwd_rnd1( +64(%ebp) ,ft_tab)
+	fwd_rnd2( +80(%ebp) ,ft_tab)
+	fwd_rnd1( +96(%ebp) ,ft_tab)
+	fwd_rnd2(+112(%ebp) ,ft_tab)
+	fwd_rnd1(+128(%ebp) ,ft_tab)
+	fwd_rnd2(+144(%ebp) ,fl_tab)	// last round uses a different table
+
+// move final values to the output array.  CAUTION: the 
+// order of these assigns rely on the register mappings
+
+	add     $8,%esp
+	mov     out_blk+12(%esp),%ebp
+	mov     %r5,12(%ebp)
+	pop     %edi
+	mov     %r4,8(%ebp)
+	pop     %esi
+	mov     %r1,4(%ebp)
+	pop     %ebx
+	mov     %r0,(%ebp)
+	pop     %ebp
+	mov     $1,%eax
+	ret
+
+// AES (Rijndael) Decryption Subroutine
+
+.global  aes_dec_blk
+
+.extern  it_tab
+.extern  il_tab
+
+.align 4
+
+aes_dec_blk:
+	push    %ebp
+	mov     ctx(%esp),%ebp       // pointer to context
+
+// CAUTION: the order and the values used in these assigns 
+// rely on the register mappings
+
+1:	push    %ebx
+	mov     in_blk+4(%esp),%r2
+	push    %esi
+	mov     nrnd(%ebp),%r3   // number of rounds
+	push    %edi
+#if dkey != 0
+	lea     dkey(%ebp),%ebp  // key pointer
+#endif
+	mov     %r3,%r0
+	shl     $4,%r0
+	add     %r0,%ebp
+	
+// input four columns and xor in first round key
+
+	mov     (%r2),%r0
+	mov     4(%r2),%r1
+	mov     8(%r2),%r4
+	mov     12(%r2),%r5
+	xor     (%ebp),%r0
+	xor     4(%ebp),%r1
+	xor     8(%ebp),%r4
+	xor     12(%ebp),%r5
+
+	sub     $8,%esp         // space for register saves on stack
+	sub     $16,%ebp        // increment to next round key
+	sub     $10,%r3          
+	je      4f              // 10 rounds for 128-bit key
+	sub     $32,%ebp
+	sub     $2,%r3
+	je      3f              // 12 rounds for 128-bit key
+	sub     $32,%ebp
+
+2:	inv_rnd1( +64(%ebp), it_tab)	// 14 rounds for 128-bit key
+	inv_rnd2( +48(%ebp), it_tab)
+3:	inv_rnd1( +32(%ebp), it_tab)	// 12 rounds for 128-bit key
+	inv_rnd2( +16(%ebp), it_tab)
+4:	inv_rnd1(    (%ebp), it_tab)	// 10 rounds for 128-bit key
+	inv_rnd2( -16(%ebp), it_tab)
+	inv_rnd1( -32(%ebp), it_tab)
+	inv_rnd2( -48(%ebp), it_tab)
+	inv_rnd1( -64(%ebp), it_tab)
+	inv_rnd2( -80(%ebp), it_tab)
+	inv_rnd1( -96(%ebp), it_tab)
+	inv_rnd2(-112(%ebp), it_tab)
+	inv_rnd1(-128(%ebp), it_tab)
+	inv_rnd2(-144(%ebp), il_tab)	// last round uses a different table
+
+// move final values to the output array.  CAUTION: the 
+// order of these assigns rely on the register mappings
+
+	add     $8,%esp
+	mov     out_blk+12(%esp),%ebp
+	mov     %r5,12(%ebp)
+	pop     %edi
+	mov     %r4,8(%ebp)
+	pop     %esi
+	mov     %r1,4(%ebp)
+	pop     %ebx
+	mov     %r0,(%ebp)
+	pop     %ebp
+	mov     $1,%eax
+	ret
+
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
new file mode 100644
index 00000000000..1019430fc1f
--- /dev/null
+++ b/arch/i386/crypto/aes.c
@@ -0,0 +1,520 @@
+/* 
+ * 
+ * Glue Code for optimized 586 assembler version of AES
+ *
+ * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
+ * All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software in both source and binary
+ * form is allowed (with or without changes) provided that:
+ *
+ *   1. distributions of this source code include the above copyright
+ *      notice, this list of conditions and the following disclaimer;
+ *
+ *   2. distributions in binary form include the above copyright
+ *      notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other associated materials;
+ *
+ *   3. the copyright holder's name is not used to endorse products
+ *      built using this software without specific written permission.
+ *
+ * ALTERNATIVELY, provided that this notice is retained in full, this product
+ * may be distributed under the terms of the GNU General Public License (GPL),
+ * in which case the provisions of the GPL apply INSTEAD OF those given above.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ *
+ * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
+ * 2.5 API).
+ * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
+ * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/linkage.h>
+
+asmlinkage void aes_enc_blk(const u8 *src, u8 *dst, void *ctx);
+asmlinkage void aes_dec_blk(const u8 *src, u8 *dst, void *ctx);
+
+#define AES_MIN_KEY_SIZE	16
+#define AES_MAX_KEY_SIZE	32
+#define AES_BLOCK_SIZE		16
+#define AES_KS_LENGTH		4 * AES_BLOCK_SIZE
+#define RC_LENGTH		29
+
+struct aes_ctx {
+	u32 ekey[AES_KS_LENGTH];
+	u32 rounds;
+	u32 dkey[AES_KS_LENGTH];
+};
+
+#define WPOLY 0x011b
+#define u32_in(x) le32_to_cpu(*(const u32 *)(x))
+#define bytes2word(b0, b1, b2, b3)  \
+	(((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
+
+/* define the finite field multiplies required for Rijndael */
+#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
+#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
+#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
+#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
+#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
+#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
+#define fi(x) ((x) ?   pow[255 - log[x]]: 0)
+
+static inline u32 upr(u32 x, int n)
+{
+	return (x << 8 * n) | (x >> (32 - 8 * n));
+}
+
+static inline u8 bval(u32 x, int n)
+{
+	return x >> 8 * n;
+}
+
+/* The forward and inverse affine transformations used in the S-box */
+#define fwd_affine(x) \
+	(w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
+
+#define inv_affine(x) \
+	(w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
+
+static u32 rcon_tab[RC_LENGTH];
+
+u32 ft_tab[4][256];
+u32 fl_tab[4][256];
+static u32 ls_tab[4][256];
+static u32 im_tab[4][256];
+u32 il_tab[4][256];
+u32 it_tab[4][256];
+
+static void gen_tabs(void)
+{
+	u32 i, w;
+	u8 pow[512], log[256];
+
+	/*
+	 * log and power tables for GF(2^8) finite field with
+	 * WPOLY as modular polynomial - the simplest primitive
+	 * root is 0x03, used here to generate the tables.
+	 */
+	i = 0; w = 1; 
+	
+	do {
+		pow[i] = (u8)w;
+		pow[i + 255] = (u8)w;
+		log[w] = (u8)i++;
+		w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
+	} while (w != 1);
+	
+	for(i = 0, w = 1; i < RC_LENGTH; ++i) {
+		rcon_tab[i] = bytes2word(w, 0, 0, 0);
+		w = f2(w);
+	}
+
+	for(i = 0; i < 256; ++i) {
+		u8 b;
+		
+		b = fwd_affine(fi((u8)i));
+		w = bytes2word(f2(b), b, b, f3(b));
+
+		/* tables for a normal encryption round */
+		ft_tab[0][i] = w;
+		ft_tab[1][i] = upr(w, 1);
+		ft_tab[2][i] = upr(w, 2);
+		ft_tab[3][i] = upr(w, 3);
+		w = bytes2word(b, 0, 0, 0);
+		
+		/*
+		 * tables for last encryption round
+		 * (may also be used in the key schedule)
+		 */
+		fl_tab[0][i] = w;
+		fl_tab[1][i] = upr(w, 1);
+		fl_tab[2][i] = upr(w, 2);
+		fl_tab[3][i] = upr(w, 3);
+		
+		/*
+		 * table for key schedule if fl_tab above is
+		 * not of the required form
+		 */
+		ls_tab[0][i] = w;
+		ls_tab[1][i] = upr(w, 1);
+		ls_tab[2][i] = upr(w, 2);
+		ls_tab[3][i] = upr(w, 3);
+		
+		b = fi(inv_affine((u8)i));
+		w = bytes2word(fe(b), f9(b), fd(b), fb(b));
+
+		/* tables for the inverse mix column operation  */
+		im_tab[0][b] = w;
+		im_tab[1][b] = upr(w, 1);
+		im_tab[2][b] = upr(w, 2);
+		im_tab[3][b] = upr(w, 3);
+
+		/* tables for a normal decryption round */
+		it_tab[0][i] = w;
+		it_tab[1][i] = upr(w,1);
+		it_tab[2][i] = upr(w,2);
+		it_tab[3][i] = upr(w,3);
+
+		w = bytes2word(b, 0, 0, 0);
+		
+		/* tables for last decryption round */
+		il_tab[0][i] = w;
+		il_tab[1][i] = upr(w,1);
+		il_tab[2][i] = upr(w,2);
+		il_tab[3][i] = upr(w,3);
+    }
+}
+
+#define four_tables(x,tab,vf,rf,c)		\
+(	tab[0][bval(vf(x,0,c),rf(0,c))]	^	\
+	tab[1][bval(vf(x,1,c),rf(1,c))] ^	\
+	tab[2][bval(vf(x,2,c),rf(2,c))] ^	\
+	tab[3][bval(vf(x,3,c),rf(3,c))]		\
+)
+
+#define vf1(x,r,c)  (x)
+#define rf1(r,c)    (r)
+#define rf2(r,c)    ((r-c)&3)
+
+#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
+#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
+
+#define ff(x) inv_mcol(x)
+
+#define ke4(k,i)							\
+{									\
+	k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];		\
+	k[4*(i)+5] = ss[1] ^= ss[0];					\
+	k[4*(i)+6] = ss[2] ^= ss[1];					\
+	k[4*(i)+7] = ss[3] ^= ss[2];					\
+}
+
+#define kel4(k,i)							\
+{									\
+	k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];		\
+	k[4*(i)+5] = ss[1] ^= ss[0];					\
+	k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2];	\
+}
+
+#define ke6(k,i)							\
+{									\
+	k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];		\
+	k[6*(i)+ 7] = ss[1] ^= ss[0];					\
+	k[6*(i)+ 8] = ss[2] ^= ss[1];					\
+	k[6*(i)+ 9] = ss[3] ^= ss[2];					\
+	k[6*(i)+10] = ss[4] ^= ss[3];					\
+	k[6*(i)+11] = ss[5] ^= ss[4];					\
+}
+
+#define kel6(k,i)							\
+{									\
+	k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];		\
+	k[6*(i)+ 7] = ss[1] ^= ss[0];					\
+	k[6*(i)+ 8] = ss[2] ^= ss[1];					\
+	k[6*(i)+ 9] = ss[3] ^= ss[2];					\
+}
+
+#define ke8(k,i)							\
+{									\
+	k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];		\
+	k[8*(i)+ 9] = ss[1] ^= ss[0];					\
+	k[8*(i)+10] = ss[2] ^= ss[1];					\
+	k[8*(i)+11] = ss[3] ^= ss[2];					\
+	k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0);				\
+	k[8*(i)+13] = ss[5] ^= ss[4];					\
+	k[8*(i)+14] = ss[6] ^= ss[5];					\
+	k[8*(i)+15] = ss[7] ^= ss[6];					\
+}
+
+#define kel8(k,i)							\
+{									\
+	k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];		\
+	k[8*(i)+ 9] = ss[1] ^= ss[0];					\
+	k[8*(i)+10] = ss[2] ^= ss[1];					\
+	k[8*(i)+11] = ss[3] ^= ss[2];					\
+}
+
+#define kdf4(k,i)							\
+{									\
+	ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3];				\
+	ss[1] = ss[1] ^ ss[3];						\
+	ss[2] = ss[2] ^ ss[3];						\
+	ss[3] = ss[3];							\
+	ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];			\
+	ss[i % 4] ^= ss[4];						\
+	ss[4] ^= k[4*(i)];						\
+	k[4*(i)+4] = ff(ss[4]);						\
+	ss[4] ^= k[4*(i)+1];						\
+	k[4*(i)+5] = ff(ss[4]);						\
+	ss[4] ^= k[4*(i)+2];						\
+	k[4*(i)+6] = ff(ss[4]);						\
+	ss[4] ^= k[4*(i)+3];						\
+	k[4*(i)+7] = ff(ss[4]);						\
+}
+
+#define kd4(k,i)							\
+{									\
+	ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];			\
+	ss[i % 4] ^= ss[4];						\
+	ss[4] = ff(ss[4]);						\
+	k[4*(i)+4] = ss[4] ^= k[4*(i)];					\
+	k[4*(i)+5] = ss[4] ^= k[4*(i)+1];				\
+	k[4*(i)+6] = ss[4] ^= k[4*(i)+2];				\
+	k[4*(i)+7] = ss[4] ^= k[4*(i)+3];				\
+}
+
+#define kdl4(k,i)							\
+{									\
+	ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];			\
+	ss[i % 4] ^= ss[4];						\
+	k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3];			\
+	k[4*(i)+5] = ss[1] ^ ss[3];					\
+	k[4*(i)+6] = ss[0];						\
+	k[4*(i)+7] = ss[1];						\
+}
+
+#define kdf6(k,i)							\
+{									\
+	ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];				\
+	k[6*(i)+ 6] = ff(ss[0]);					\
+	ss[1] ^= ss[0];							\
+	k[6*(i)+ 7] = ff(ss[1]);					\
+	ss[2] ^= ss[1];							\
+	k[6*(i)+ 8] = ff(ss[2]);					\
+	ss[3] ^= ss[2];							\
+	k[6*(i)+ 9] = ff(ss[3]);					\
+	ss[4] ^= ss[3];							\
+	k[6*(i)+10] = ff(ss[4]);					\
+	ss[5] ^= ss[4];							\
+	k[6*(i)+11] = ff(ss[5]);					\
+}
+
+#define kd6(k,i)							\
+{									\
+	ss[6] = ls_box(ss[5],3) ^ rcon_tab[i];				\
+	ss[0] ^= ss[6]; ss[6] = ff(ss[6]);				\
+	k[6*(i)+ 6] = ss[6] ^= k[6*(i)];				\
+	ss[1] ^= ss[0];							\
+	k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1];				\
+	ss[2] ^= ss[1];							\
+	k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2];				\
+	ss[3] ^= ss[2];							\
+	k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3];				\
+	ss[4] ^= ss[3];							\
+	k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4];				\
+	ss[5] ^= ss[4];							\
+	k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5];				\
+}
+
+#define kdl6(k,i)							\
+{									\
+	ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];				\
+	k[6*(i)+ 6] = ss[0];						\
+	ss[1] ^= ss[0];							\
+	k[6*(i)+ 7] = ss[1];						\
+	ss[2] ^= ss[1];							\
+	k[6*(i)+ 8] = ss[2];						\
+	ss[3] ^= ss[2];							\
+	k[6*(i)+ 9] = ss[3];						\
+}
+
+#define kdf8(k,i)							\
+{									\
+	ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];				\
+	k[8*(i)+ 8] = ff(ss[0]);					\
+	ss[1] ^= ss[0];							\
+	k[8*(i)+ 9] = ff(ss[1]);					\
+	ss[2] ^= ss[1];							\
+	k[8*(i)+10] = ff(ss[2]);					\
+	ss[3] ^= ss[2];							\
+	k[8*(i)+11] = ff(ss[3]);					\
+	ss[4] ^= ls_box(ss[3],0);					\
+	k[8*(i)+12] = ff(ss[4]);					\
+	ss[5] ^= ss[4];							\
+	k[8*(i)+13] = ff(ss[5]);					\
+	ss[6] ^= ss[5];							\
+	k[8*(i)+14] = ff(ss[6]);					\
+	ss[7] ^= ss[6];							\
+	k[8*(i)+15] = ff(ss[7]);					\
+}
+
+#define kd8(k,i)							\
+{									\
+	u32 __g = ls_box(ss[7],3) ^ rcon_tab[i];			\
+	ss[0] ^= __g;							\
+	__g = ff(__g);							\
+	k[8*(i)+ 8] = __g ^= k[8*(i)];					\
+	ss[1] ^= ss[0];							\
+	k[8*(i)+ 9] = __g ^= k[8*(i)+ 1];				\
+	ss[2] ^= ss[1];							\
+	k[8*(i)+10] = __g ^= k[8*(i)+ 2];				\
+	ss[3] ^= ss[2];							\
+	k[8*(i)+11] = __g ^= k[8*(i)+ 3];				\
+	__g = ls_box(ss[3],0);						\
+	ss[4] ^= __g;							\
+	__g = ff(__g);							\
+	k[8*(i)+12] = __g ^= k[8*(i)+ 4];				\
+	ss[5] ^= ss[4];							\
+	k[8*(i)+13] = __g ^= k[8*(i)+ 5];				\
+	ss[6] ^= ss[5];							\
+	k[8*(i)+14] = __g ^= k[8*(i)+ 6];				\
+	ss[7] ^= ss[6];							\
+	k[8*(i)+15] = __g ^= k[8*(i)+ 7];				\
+}
+
+#define kdl8(k,i)							\
+{									\
+	ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];				\
+	k[8*(i)+ 8] = ss[0];						\
+	ss[1] ^= ss[0];							\
+	k[8*(i)+ 9] = ss[1];						\
+	ss[2] ^= ss[1];							\
+	k[8*(i)+10] = ss[2];						\
+	ss[3] ^= ss[2];							\
+	k[8*(i)+11] = ss[3];						\
+}
+
+static int
+aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
+{
+	int i;
+	u32 ss[8];
+	struct aes_ctx *ctx = ctx_arg;
+
+	/* encryption schedule */
+	
+	ctx->ekey[0] = ss[0] = u32_in(in_key);
+	ctx->ekey[1] = ss[1] = u32_in(in_key + 4);
+	ctx->ekey[2] = ss[2] = u32_in(in_key + 8);
+	ctx->ekey[3] = ss[3] = u32_in(in_key + 12);
+
+	switch(key_len) {
+	case 16:
+		for (i = 0; i < 9; i++)
+			ke4(ctx->ekey, i);
+		kel4(ctx->ekey, 9);
+		ctx->rounds = 10;
+		break;
+		
+	case 24:
+		ctx->ekey[4] = ss[4] = u32_in(in_key + 16);
+		ctx->ekey[5] = ss[5] = u32_in(in_key + 20);
+		for (i = 0; i < 7; i++)
+			ke6(ctx->ekey, i);
+		kel6(ctx->ekey, 7); 
+		ctx->rounds = 12;
+		break;
+
+	case 32:
+		ctx->ekey[4] = ss[4] = u32_in(in_key + 16);
+		ctx->ekey[5] = ss[5] = u32_in(in_key + 20);
+		ctx->ekey[6] = ss[6] = u32_in(in_key + 24);
+		ctx->ekey[7] = ss[7] = u32_in(in_key + 28);
+		for (i = 0; i < 6; i++)
+			ke8(ctx->ekey, i);
+		kel8(ctx->ekey, 6);
+		ctx->rounds = 14;
+		break;
+
+	default:
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	
+	/* decryption schedule */
+	
+	ctx->dkey[0] = ss[0] = u32_in(in_key);
+	ctx->dkey[1] = ss[1] = u32_in(in_key + 4);
+	ctx->dkey[2] = ss[2] = u32_in(in_key + 8);
+	ctx->dkey[3] = ss[3] = u32_in(in_key + 12);
+
+	switch (key_len) {
+	case 16:
+		kdf4(ctx->dkey, 0);
+		for (i = 1; i < 9; i++)
+			kd4(ctx->dkey, i);
+		kdl4(ctx->dkey, 9);
+		break;
+		
+	case 24:
+		ctx->dkey[4] = ff(ss[4] = u32_in(in_key + 16));
+		ctx->dkey[5] = ff(ss[5] = u32_in(in_key + 20));
+		kdf6(ctx->dkey, 0);
+		for (i = 1; i < 7; i++)
+			kd6(ctx->dkey, i);
+		kdl6(ctx->dkey, 7);
+		break;
+
+	case 32:
+		ctx->dkey[4] = ff(ss[4] = u32_in(in_key + 16));
+		ctx->dkey[5] = ff(ss[5] = u32_in(in_key + 20));
+		ctx->dkey[6] = ff(ss[6] = u32_in(in_key + 24));
+		ctx->dkey[7] = ff(ss[7] = u32_in(in_key + 28));
+		kdf8(ctx->dkey, 0);
+		for (i = 1; i < 6; i++)
+			kd8(ctx->dkey, i);
+		kdl8(ctx->dkey, 6);
+		break;
+	}
+	return 0;
+}
+
+static inline void aes_encrypt(void *ctx, u8 *dst, const u8 *src)
+{
+	aes_enc_blk(src, dst, ctx);
+}
+static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src)
+{
+	aes_dec_blk(src, dst, ctx);
+}
+
+
+static struct crypto_alg aes_alg = {
+	.cra_name		=	"aes",
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct aes_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
+			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
+			.cia_setkey	   	= 	aes_set_key,
+			.cia_encrypt	 	=	aes_encrypt,
+			.cia_decrypt	  	=	aes_decrypt
+		}
+	}
+};
+
+static int __init aes_init(void)
+{
+	gen_tabs();
+	return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
+MODULE_ALIAS("aes");
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
new file mode 100644
index 00000000000..28e62038379
--- /dev/null
+++ b/arch/i386/defconfig
@@ -0,0 +1,1247 @@
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_GENERIC_ISA_DMA=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_STANDALONE=y
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+CONFIG_AUDIT=y
+CONFIG_AUDITSYSCALL=y
+CONFIG_LOG_BUF_SHIFT=15
+CONFIG_HOTPLUG=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+# CONFIG_MODULE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+CONFIG_KMOD=y
+
+#
+# Processor type and features
+#
+CONFIG_X86_PC=y
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+# CONFIG_X86_GENERICARCH is not set
+# CONFIG_X86_ES7000 is not set
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+# CONFIG_M686 is not set
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+CONFIG_MPENTIUM4=y
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MEFFICEON is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_X86_GENERIC is not set
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_HPET_EMULATE_RTC is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_SCHED_SMT=y
+CONFIG_PREEMPT=y
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_TSC=y
+CONFIG_X86_MCE=y
+CONFIG_X86_MCE_NONFATAL=y
+CONFIG_X86_MCE_P4THERMAL=y
+# CONFIG_TOSHIBA is not set
+# CONFIG_I8K is not set
+# CONFIG_MICROCODE is not set
+# CONFIG_X86_MSR is not set
+# CONFIG_X86_CPUID is not set
+
+#
+# Firmware Drivers
+#
+# CONFIG_EDD is not set
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+# CONFIG_HIGHMEM64G is not set
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MTRR=y
+# CONFIG_EFI is not set
+CONFIG_IRQBALANCE=y
+CONFIG_HAVE_DEC_LOCK=y
+# CONFIG_REGPARM is not set
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+CONFIG_SOFTWARE_SUSPEND=y
+# CONFIG_PM_DISK is not set
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SLEEP_PROC_FS=y
+CONFIG_ACPI_AC=y
+CONFIG_ACPI_BATTERY=y
+CONFIG_ACPI_BUTTON=y
+CONFIG_ACPI_FAN=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_THERMAL=y
+# CONFIG_ACPI_ASUS is not set
+# CONFIG_ACPI_TOSHIBA is not set
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+# CONFIG_X86_PM_TIMER is not set
+
+#
+# APM (Advanced Power Management) BIOS Support
+#
+# CONFIG_APM is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+# CONFIG_PCI_USE_VECTOR is not set
+CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_NAMES=y
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SCx200 is not set
+
+#
+# PCMCIA/CardBus support
+#
+# CONFIG_PCMCIA is not set
+CONFIG_PCMCIA_PROBE=y
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_AOUT=y
+CONFIG_BINFMT_MISC=y
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_FW_LOADER=m
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+CONFIG_PARPORT=y
+CONFIG_PARPORT_PC=y
+CONFIG_PARPORT_PC_CML1=y
+# CONFIG_PARPORT_SERIAL is not set
+# CONFIG_PARPORT_PC_FIFO is not set
+# CONFIG_PARPORT_PC_SUPERIO is not set
+# CONFIG_PARPORT_OTHER is not set
+# CONFIG_PARPORT_1284 is not set
+
+#
+# Plug and Play support
+#
+CONFIG_PNP=y
+# CONFIG_PNP_DEBUG is not set
+
+#
+# Protocols
+#
+# CONFIG_ISAPNP is not set
+# CONFIG_PNPBIOS is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=y
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_CARMEL is not set
+# CONFIG_BLK_DEV_RAM is not set
+CONFIG_LBD=y
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+CONFIG_IDE_TASKFILE_IO=y
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_CMD640=y
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+# CONFIG_BLK_DEV_IDEPNP is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+CONFIG_BLK_DEV_ADMA=y
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_ATIIXP is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SIS5513 is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+CONFIG_CHR_DEV_SG=y
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+CONFIG_SCSI_DPT_I2O=m
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_MEGARAID is not set
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_SATA_SVW is not set
+CONFIG_SCSI_ATA_PIIX=y
+# CONFIG_SCSI_SATA_PROMISE is not set
+CONFIG_SCSI_SATA_SX4=m
+# CONFIG_SCSI_SATA_SIL is not set
+CONFIG_SCSI_SATA_SIS=m
+# CONFIG_SCSI_SATA_VIA is not set
+# CONFIG_SCSI_SATA_VITESSE is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_PPA is not set
+# CONFIG_SCSI_IMM is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+CONFIG_SCSI_IPR=m
+# CONFIG_SCSI_IPR_TRACE is not set
+# CONFIG_SCSI_IPR_DUMP is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+CONFIG_SCSI_QLA2XXX=y
+# CONFIG_SCSI_QLA21XX is not set
+# CONFIG_SCSI_QLA22XX is not set
+# CONFIG_SCSI_QLA2300 is not set
+# CONFIG_SCSI_QLA2322 is not set
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA6322 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+CONFIG_IEEE1394=y
+
+#
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
+
+#
+# Device Drivers
+#
+
+#
+# Texas Instruments PCILynx requires I2C
+#
+CONFIG_IEEE1394_OHCI1394=y
+
+#
+# Protocol Drivers
+#
+# CONFIG_IEEE1394_VIDEO1394 is not set
+# CONFIG_IEEE1394_SBP2 is not set
+# CONFIG_IEEE1394_ETH1394 is not set
+# CONFIG_IEEE1394_DV1394 is not set
+CONFIG_IEEE1394_RAWIO=y
+# CONFIG_IEEE1394_CMP is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+# CONFIG_NETLINK_DEV is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=y
+# CONFIG_IP_NF_FTP is not set
+# CONFIG_IP_NF_IRC is not set
+# CONFIG_IP_NF_TFTP is not set
+# CONFIG_IP_NF_AMANDA is not set
+CONFIG_IP_NF_QUEUE=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_LIMIT=y
+CONFIG_IP_NF_MATCH_IPRANGE=y
+CONFIG_IP_NF_MATCH_MAC=y
+CONFIG_IP_NF_MATCH_PKTTYPE=y
+CONFIG_IP_NF_MATCH_MARK=y
+CONFIG_IP_NF_MATCH_MULTIPORT=y
+CONFIG_IP_NF_MATCH_TOS=y
+CONFIG_IP_NF_MATCH_RECENT=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_DSCP=y
+CONFIG_IP_NF_MATCH_AH_ESP=y
+CONFIG_IP_NF_MATCH_LENGTH=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_MATCH_TCPMSS=y
+CONFIG_IP_NF_MATCH_HELPER=y
+CONFIG_IP_NF_MATCH_STATE=y
+CONFIG_IP_NF_MATCH_CONNTRACK=y
+CONFIG_IP_NF_MATCH_OWNER=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_SAME=y
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_TARGET_TOS=y
+CONFIG_IP_NF_TARGET_ECN=y
+CONFIG_IP_NF_TARGET_DSCP=y
+CONFIG_IP_NF_TARGET_MARK=y
+CONFIG_IP_NF_TARGET_CLASSIFY=y
+CONFIG_IP_NF_TARGET_LOG=y
+CONFIG_IP_NF_TARGET_ULOG=y
+CONFIG_IP_NF_TARGET_TCPMSS=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_IP_NF_TARGET_NOTRACK=m
+CONFIG_IP_NF_RAW=m
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_NET_SB1000 is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+
+#
+# Tulip family network device support
+#
+# CONFIG_NET_TULIP is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_CS89x0 is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+# CONFIG_E100 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+CONFIG_8139TOO=y
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+# CONFIG_E1000 is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_TIGON3 is not set
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+CONFIG_S2IO=m
+# CONFIG_S2IO_NAPI is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_PCIPS2 is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_INPORT is not set
+# CONFIG_MOUSE_LOGIBM is not set
+# CONFIG_MOUSE_PC110PAD is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_CONSOLE is not set
+# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_PRINTER=y
+# CONFIG_LP_CONSOLE is not set
+# CONFIG_PPDEV is not set
+# CONFIG_TIPAR is not set
+# CONFIG_QIC02_TAPE is not set
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=y
+# CONFIG_AGP_ALI is not set
+# CONFIG_AGP_ATI is not set
+# CONFIG_AGP_AMD is not set
+# CONFIG_AGP_AMD64 is not set
+CONFIG_AGP_INTEL=y
+# CONFIG_AGP_NVIDIA is not set
+# CONFIG_AGP_SIS is not set
+# CONFIG_AGP_SWORKS is not set
+# CONFIG_AGP_VIA is not set
+# CONFIG_AGP_EFFICEON is not set
+CONFIG_DRM=y
+# CONFIG_DRM_TDFX is not set
+# CONFIG_DRM_GAMMA is not set
+# CONFIG_DRM_R128 is not set
+# CONFIG_DRM_RADEON is not set
+# CONFIG_DRM_I810 is not set
+CONFIG_DRM_I830=y
+# CONFIG_DRM_MGA is not set
+# CONFIG_DRM_SIS is not set
+# CONFIG_MWAVE is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HANGCHECK_TIMER is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Misc devices
+#
+# CONFIG_IBM_ASM is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+# CONFIG_VIDEO_SELECT is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+CONFIG_SOUND=y
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=y
+CONFIG_SND_TIMER=y
+CONFIG_SND_PCM=y
+CONFIG_SND_RAWMIDI=y
+CONFIG_SND_SEQUENCER=y
+# CONFIG_SND_SEQ_DUMMY is not set
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=y
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_VIRMIDI is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+
+#
+# ISA devices
+#
+# CONFIG_SND_AD1848 is not set
+# CONFIG_SND_CS4231 is not set
+# CONFIG_SND_CS4232 is not set
+# CONFIG_SND_CS4236 is not set
+# CONFIG_SND_ES1688 is not set
+# CONFIG_SND_ES18XX is not set
+# CONFIG_SND_GUSCLASSIC is not set
+# CONFIG_SND_GUSEXTREME is not set
+# CONFIG_SND_GUSMAX is not set
+# CONFIG_SND_INTERWAVE is not set
+# CONFIG_SND_INTERWAVE_STB is not set
+# CONFIG_SND_OPTI92X_AD1848 is not set
+# CONFIG_SND_OPTI92X_CS4231 is not set
+# CONFIG_SND_OPTI93X is not set
+# CONFIG_SND_SB8 is not set
+# CONFIG_SND_SB16 is not set
+# CONFIG_SND_SBAWE is not set
+# CONFIG_SND_WAVEFRONT is not set
+# CONFIG_SND_CMI8330 is not set
+# CONFIG_SND_OPL3SA2 is not set
+# CONFIG_SND_SGALAXY is not set
+# CONFIG_SND_SSCAPE is not set
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=y
+# CONFIG_SND_ALI5451 is not set
+# CONFIG_SND_ATIIXP is not set
+# CONFIG_SND_AU8810 is not set
+# CONFIG_SND_AU8820 is not set
+# CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AZT3328 is not set
+# CONFIG_SND_BT87X is not set
+# CONFIG_SND_CS46XX is not set
+# CONFIG_SND_CS4281 is not set
+# CONFIG_SND_EMU10K1 is not set
+# CONFIG_SND_KORG1212 is not set
+# CONFIG_SND_MIXART is not set
+# CONFIG_SND_NM256 is not set
+# CONFIG_SND_RME32 is not set
+# CONFIG_SND_RME96 is not set
+# CONFIG_SND_RME9652 is not set
+# CONFIG_SND_HDSP is not set
+# CONFIG_SND_TRIDENT is not set
+# CONFIG_SND_YMFPCI is not set
+# CONFIG_SND_ALS4000 is not set
+# CONFIG_SND_CMIPCI is not set
+# CONFIG_SND_ENS1370 is not set
+# CONFIG_SND_ENS1371 is not set
+# CONFIG_SND_ES1938 is not set
+# CONFIG_SND_ES1968 is not set
+# CONFIG_SND_MAESTRO3 is not set
+# CONFIG_SND_FM801 is not set
+# CONFIG_SND_ICE1712 is not set
+# CONFIG_SND_ICE1724 is not set
+CONFIG_SND_INTEL8X0=y
+# CONFIG_SND_INTEL8X0M is not set
+# CONFIG_SND_SONICVIBES is not set
+# CONFIG_SND_VIA82XX is not set
+# CONFIG_SND_VX222 is not set
+
+#
+# ALSA USB devices
+#
+# CONFIG_SND_USB_AUDIO is not set
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
+
+#
+# USB support
+#
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_OHCI_HCD is not set
+CONFIG_USB_UHCI_HCD=y
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_AUDIO is not set
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_MIDI is not set
+# CONFIG_USB_ACM is not set
+CONFIG_USB_PRINTER=y
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_HP8200e is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+CONFIG_USB_EGALAX=m
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+# CONFIG_USB_HPUSBSCSI is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network adaptors
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_TIGL is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+CONFIG_USB_CYTHERM=m
+CONFIG_USB_PHIDGETSERVO=m
+# CONFIG_USB_TEST is not set
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+CONFIG_AUTOFS4_FS=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V3 is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_NFSD=y
+# CONFIG_NFSD_V3 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=y
+CONFIG_EXPORTFS=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_DEBUG_KERNEL is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_SPINLOCK_SLEEP=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_4KSTACKS=y
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+
+#
+# Security options
+#
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_X86_STD_RESOURCES=y
+CONFIG_PC=y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
new file mode 100644
index 00000000000..933787a46b4
--- /dev/null
+++ b/arch/i386/kernel/Makefile
@@ -0,0 +1,71 @@
+#
+# Makefile for the linux kernel.
+#
+
+extra-y := head.o init_task.o vmlinux.lds
+
+obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
+		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
+		pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
+		doublefault.o quirks.o
+
+obj-y				+= cpu/
+obj-y				+= timers/
+obj-$(CONFIG_ACPI_BOOT)		+= acpi/
+obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
+obj-$(CONFIG_MCA)		+= mca.o
+obj-$(CONFIG_X86_MSR)		+= msr.o
+obj-$(CONFIG_X86_CPUID)		+= cpuid.o
+obj-$(CONFIG_MICROCODE)		+= microcode.o
+obj-$(CONFIG_APM)		+= apm.o
+obj-$(CONFIG_X86_SMP)		+= smp.o smpboot.o
+obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
+obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
+obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
+obj-$(CONFIG_X86_NUMAQ)		+= numaq.o
+obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_MODULES)		+= module.o
+obj-y				+= sysenter.o vsyscall.o
+obj-$(CONFIG_ACPI_SRAT) 	+= srat.o
+obj-$(CONFIG_HPET_TIMER) 	+= time_hpet.o
+obj-$(CONFIG_EFI) 		+= efi.o efi_stub.o
+obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+
+EXTRA_AFLAGS   := -traditional
+
+obj-$(CONFIG_SCx200)		+= scx200.o
+
+# vsyscall.o contains the vsyscall DSO images as __initdata.
+# We must build both images before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
+targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
+targets += vsyscall.lds
+
+# The DSO images are built using a special linker script.
+quiet_cmd_syscall = SYSCALL $@
+      cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
+		          -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
+
+vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1
+SYSCFLAGS_vsyscall-sysenter.so	= $(vsyscall-flags)
+SYSCFLAGS_vsyscall-int80.so	= $(vsyscall-flags)
+
+$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
+	$(call if_changed,syscall)
+
+# We also create a special relocatable object that should mirror the symbol
+# table and layout of the linked DSO.  With ld -R we can then refer to
+# these symbols in the kernel code rather than hand-coded addresses.
+extra-y += vsyscall-syms.o
+$(obj)/built-in.o: $(obj)/vsyscall-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
+
+SYSCFLAGS_vsyscall-syms.o = -r
+$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE
+	$(call if_changed,syscall)
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
new file mode 100644
index 00000000000..ee75cb286cf
--- /dev/null
+++ b/arch/i386/kernel/acpi/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_ACPI_BOOT)		:= boot.o
+obj-$(CONFIG_X86_IO_APIC)	+= earlyquirk.o
+obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup.o
+
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
new file mode 100644
index 00000000000..9ba0b957d11
--- /dev/null
+++ b/arch/i386/kernel/acpi/boot.c
@@ -0,0 +1,908 @@
+/*
+ *  boot.c - Architecture-Specific Low-Level ACPI Boot Support
+ *
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include <asm/pgtable.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mpspec.h>
+
+#ifdef	CONFIG_X86_64
+
+static inline void  acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
+extern void __init clustered_apic_check(void);
+static inline int ioapic_setup_disabled(void) { return 0; }
+#include <asm/proto.h>
+
+#else	/* X86 */
+
+#ifdef	CONFIG_X86_LOCAL_APIC
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+#endif	/* X86 */
+
+#define BAD_MADT_ENTRY(entry, end) (					    \
+		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
+		((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX			"ACPI: "
+
+#ifdef CONFIG_ACPI_PCI
+int acpi_noirq __initdata;	/* skip ACPI IRQ initialization */
+int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
+#else
+int acpi_noirq __initdata = 1;
+int acpi_pci_disabled __initdata = 1;
+#endif
+int acpi_ht __initdata = 1;	/* enable HT */
+
+int acpi_lapic;
+int acpi_ioapic;
+int acpi_strict;
+EXPORT_SYMBOL(acpi_strict);
+
+acpi_interrupt_flags acpi_sci_flags __initdata;
+int acpi_sci_override_gsi __initdata;
+int acpi_skip_timer_override __initdata;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+#endif
+
+#ifndef __HAVE_ARCH_CMPXCHG
+#warning ACPI uses CMPXCHG, i486 and later hardware
+#endif
+
+#define MAX_MADT_ENTRIES	256
+u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
+			{ [0 ... MAX_MADT_ENTRIES-1] = 0xff };
+EXPORT_SYMBOL(x86_acpiid_to_apicid);
+
+/* --------------------------------------------------------------------------
+                              Boot-time Configuration
+   -------------------------------------------------------------------------- */
+
+/*
+ * The default interrupt routing model is PIC (8259).  This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+enum acpi_irq_model_id		acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+#ifdef	CONFIG_X86_64
+
+/* rely on all ACPI tables being in the direct mapping */
+char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+{
+	if (!phys_addr || !size)
+	return NULL;
+
+	if (phys_addr < (end_pfn_map << PAGE_SHIFT))
+		return __va(phys_addr);
+
+	return NULL;
+}
+
+#else
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+char *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+	unsigned long base, offset, mapped_size;
+	int idx;
+
+	if (phys + size < 8*1024*1024) 
+		return __va(phys); 
+
+	offset = phys & (PAGE_SIZE - 1);
+	mapped_size = PAGE_SIZE - offset;
+	set_fixmap(FIX_ACPI_END, phys);
+	base = fix_to_virt(FIX_ACPI_END);
+
+	/*
+	 * Most cases can be covered by the below.
+	 */
+	idx = FIX_ACPI_END;
+	while (mapped_size < size) {
+		if (--idx < FIX_ACPI_BEGIN)
+			return NULL;	/* cannot handle this */
+		phys += PAGE_SIZE;
+		set_fixmap(idx, phys);
+		mapped_size += PAGE_SIZE;
+	}
+
+	return ((unsigned char *) base + offset);
+}
+#endif
+
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg *mcfg;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+
+	if (mcfg->base_reserved) {
+		printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+		return -ENODEV;
+	}
+
+	pci_mmcfg_base_addr = mcfg->base_address;
+
+	return 0;
+}
+#else
+#define	acpi_parse_mcfg NULL
+#endif /* !CONFIG_PCI_MMCONFIG */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static int __init
+acpi_parse_madt (
+	unsigned long		phys_addr,
+	unsigned long		size)
+{
+	struct acpi_table_madt	*madt = NULL;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
+	if (!madt) {
+		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+		return -ENODEV;
+	}
+
+	if (madt->lapic_address) {
+		acpi_lapic_addr = (u64) madt->lapic_address;
+
+		printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+			madt->lapic_address);
+	}
+
+	acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
+	
+	return 0;
+}
+
+
+static int __init
+acpi_parse_lapic (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_lapic	*processor = NULL;
+
+	processor = (struct acpi_table_lapic*) header;
+
+	if (BAD_MADT_ENTRY(processor, end))
+		return -EINVAL;
+
+	acpi_table_print_madt_entry(header);
+
+	/* no utility in registering a disabled processor */
+	if (processor->flags.enabled == 0)
+		return 0;
+
+	x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
+
+	mp_register_lapic (
+		processor->id,					   /* APIC ID */
+		processor->flags.enabled);			  /* Enabled? */
+
+	return 0;
+}
+
+static int __init
+acpi_parse_lapic_addr_ovr (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
+
+	lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
+
+	if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
+		return -EINVAL;
+
+	acpi_lapic_addr = lapic_addr_ovr->address;
+
+	return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_lapic_nmi *lapic_nmi = NULL;
+
+	lapic_nmi = (struct acpi_table_lapic_nmi*) header;
+
+	if (BAD_MADT_ENTRY(lapic_nmi, end))
+		return -EINVAL;
+
+	acpi_table_print_madt_entry(header);
+
+	if (lapic_nmi->lint != 1)
+		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+	return 0;
+}
+
+
+#endif /*CONFIG_X86_LOCAL_APIC*/
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+
+static int __init
+acpi_parse_ioapic (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_ioapic *ioapic = NULL;
+
+	ioapic = (struct acpi_table_ioapic*) header;
+
+	if (BAD_MADT_ENTRY(ioapic, end))
+		return -EINVAL;
+ 
+	acpi_table_print_madt_entry(header);
+
+	mp_register_ioapic (
+		ioapic->id,
+		ioapic->address,
+		ioapic->global_irq_base);
+ 
+	return 0;
+}
+
+/*
+ * Parse Interrupt Source Override for the ACPI SCI
+ */
+static void
+acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
+{
+	if (trigger == 0)	/* compatible SCI trigger is level */
+		trigger = 3;
+
+	if (polarity == 0)	/* compatible SCI polarity is low */
+		polarity = 3;
+
+	/* Command-line over-ride via acpi_sci= */
+	if (acpi_sci_flags.trigger)
+		trigger = acpi_sci_flags.trigger;
+
+	if (acpi_sci_flags.polarity)
+		polarity = acpi_sci_flags.polarity;
+
+	/*
+ 	 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
+	 * If GSI is < 16, this will update its flags,
+	 * else it will create a new mp_irqs[] entry.
+	 */
+	mp_override_legacy_irq(gsi, polarity, trigger, gsi);
+
+	/*
+	 * stash over-ride to indicate we've been here
+	 * and for later update of acpi_fadt
+	 */
+	acpi_sci_override_gsi = gsi;
+	return;
+}
+
+static int __init
+acpi_parse_int_src_ovr (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_int_src_ovr *intsrc = NULL;
+
+	intsrc = (struct acpi_table_int_src_ovr*) header;
+
+	if (BAD_MADT_ENTRY(intsrc, end))
+		return -EINVAL;
+
+	acpi_table_print_madt_entry(header);
+
+	if (intsrc->bus_irq == acpi_fadt.sci_int) {
+		acpi_sci_ioapic_setup(intsrc->global_irq,
+			intsrc->flags.polarity, intsrc->flags.trigger);
+		return 0;
+	}
+
+	if (acpi_skip_timer_override &&
+		intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
+			printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+			return 0;
+	}
+
+	mp_override_legacy_irq (
+		intsrc->bus_irq,
+		intsrc->flags.polarity,
+		intsrc->flags.trigger,
+		intsrc->global_irq);
+
+	return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (
+	acpi_table_entry_header *header, const unsigned long end)
+{
+	struct acpi_table_nmi_src *nmi_src = NULL;
+
+	nmi_src = (struct acpi_table_nmi_src*) header;
+
+	if (BAD_MADT_ENTRY(nmi_src, end))
+		return -EINVAL;
+
+	acpi_table_print_madt_entry(header);
+
+	/* TBD: Support nimsrc entries? */
+
+	return 0;
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+#ifdef	CONFIG_ACPI_BUS
+
+/*
+ * acpi_pic_sci_set_trigger()
+ * 
+ * use ELCR to set PIC-mode trigger type for SCI
+ *
+ * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
+ * it may require Edge Trigger -- use "acpi_sci=edge"
+ *
+ * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
+ * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
+ * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
+ * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
+ */
+
+void __init
+acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
+{
+	unsigned int mask = 1 << irq;
+	unsigned int old, new;
+
+	/* Real old ELCR mask */
+	old = inb(0x4d0) | (inb(0x4d1) << 8);
+
+	/*
+	 * If we use ACPI to set PCI irq's, then we should clear ELCR
+	 * since we will set it correctly as we enable the PCI irq
+	 * routing.
+	 */
+	new = acpi_noirq ? old : 0;
+
+	/*
+	 * Update SCI information in the ELCR, it isn't in the PCI
+	 * routing tables..
+	 */
+	switch (trigger) {
+	case 1:	/* Edge - clear */
+		new &= ~mask;
+		break;
+	case 3: /* Level - set */
+		new |= mask;
+		break;
+	}
+
+	if (old == new)
+		return;
+
+	printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
+	outb(new, 0x4d0);
+	outb(new >> 8, 0x4d1);
+}
+
+
+#endif /* CONFIG_ACPI_BUS */
+
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+	if (use_pci_vector() && !platform_legacy_irq(gsi))
+ 		*irq = IO_APIC_VECTOR(gsi);
+	else
+#endif
+		*irq = gsi;
+	return 0;
+}
+
+unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
+{
+	unsigned int irq;
+	unsigned int plat_gsi = gsi;
+
+#ifdef CONFIG_PCI
+	/*
+	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
+	 */
+	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+		extern void eisa_set_level_irq(unsigned int irq);
+
+		if (edge_level == ACPI_LEVEL_SENSITIVE)
+				eisa_set_level_irq(gsi);
+	}
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
+		plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
+	}
+#endif
+	acpi_gsi_to_irq(plat_gsi, &irq);
+	return irq;
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+
+/*
+ *  ACPI based hotplug support for CPU
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int
+acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+	/* TBD */
+	return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+
+int
+acpi_unmap_lsapic(int cpu)
+{
+	/* TBD */
+	return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
+static unsigned long __init
+acpi_scan_rsdp (
+	unsigned long		start,
+	unsigned long		length)
+{
+	unsigned long		offset = 0;
+	unsigned long		sig_len = sizeof("RSD PTR ") - 1;
+
+	/*
+	 * Scan all 16-byte boundaries of the physical memory region for the
+	 * RSDP signature.
+	 */
+	for (offset = 0; offset < length; offset += 16) {
+		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
+			continue;
+		return (start + offset);
+	}
+
+	return 0;
+}
+
+static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_sbf *sb;
+
+	if (!phys_addr || !size)
+	return -EINVAL;
+
+	sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
+	if (!sb) {
+		printk(KERN_WARNING PREFIX "Unable to map SBF\n");
+		return -ENODEV;
+	}
+
+	sbf_port = sb->sbf_cmos; /* Save CMOS port */
+
+	return 0;
+}
+
+
+#ifdef CONFIG_HPET_TIMER
+
+static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
+{
+	struct acpi_table_hpet *hpet_tbl;
+
+	if (!phys || !size)
+		return -EINVAL;
+
+	hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size);
+	if (!hpet_tbl) {
+		printk(KERN_WARNING PREFIX "Unable to map HPET\n");
+		return -ENODEV;
+	}
+
+	if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
+		printk(KERN_WARNING PREFIX "HPET timers must be located in "
+		       "memory.\n");
+		return -1;
+	}
+
+#ifdef	CONFIG_X86_64
+        vxtime.hpet_address = hpet_tbl->addr.addrl |
+                ((long) hpet_tbl->addr.addrh << 32);
+
+        printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+               hpet_tbl->id, vxtime.hpet_address);
+#else	/* X86 */
+	{
+		extern unsigned long hpet_address;
+
+		hpet_address = hpet_tbl->addr.addrl;
+		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+			hpet_tbl->id, hpet_address);
+	}
+#endif	/* X86 */
+
+	return 0;
+}
+#else
+#define	acpi_parse_hpet	NULL
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+extern u32 pmtmr_ioport;
+#endif
+
+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
+{
+	struct fadt_descriptor_rev2 *fadt = NULL;
+
+	fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
+	if(!fadt) {
+		printk(KERN_WARNING PREFIX "Unable to map FADT\n");
+		return 0;
+	}
+
+#ifdef	CONFIG_ACPI_INTERPRETER
+	/* initialize sci_int early for INT_SRC_OVR MADT parsing */
+	acpi_fadt.sci_int = fadt->sci_int;
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+	/* detect the location of the ACPI PM Timer */
+	if (fadt->revision >= FADT2_REVISION_ID) {
+		/* FADT rev. 2 */
+		if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO)
+			return 0;
+
+		pmtmr_ioport = fadt->xpm_tmr_blk.address;
+	} else {
+		/* FADT rev. 1 */
+		pmtmr_ioport = fadt->V1_pm_tmr_blk;
+	}
+	if (pmtmr_ioport)
+		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport);
+#endif
+	return 0;
+}
+
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+	unsigned long		rsdp_phys = 0;
+
+	if (efi_enabled) {
+		if (efi.acpi20)
+			return __pa(efi.acpi20);
+		else if (efi.acpi)
+			return __pa(efi.acpi);
+	}
+	/*
+	 * Scan memory looking for the RSDP signature. First search EBDA (low
+	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
+	 */
+	rsdp_phys = acpi_scan_rsdp (0, 0x400);
+	if (!rsdp_phys)
+		rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF);
+
+	return rsdp_phys;
+}
+
+#ifdef	CONFIG_X86_LOCAL_APIC
+/*
+ * Parse LAPIC entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init
+acpi_parse_madt_lapic_entries(void)
+{
+	int count;
+
+	/* 
+	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
+	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+	 */
+
+	count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0);
+	if (count < 0) {
+		printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+		return count;
+	}
+
+	mp_register_lapic_address(acpi_lapic_addr);
+
+	count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
+				       MAX_APICS);
+	if (!count) { 
+		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+		/* TBD: Cleanup to allow fallback to MPS */
+		return -ENODEV;
+	}
+	else if (count < 0) {
+		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+		/* TBD: Cleanup to allow fallback to MPS */
+		return count;
+	}
+
+	count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
+	if (count < 0) {
+		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+		/* TBD: Cleanup to allow fallback to MPS */
+		return count;
+	}
+	return 0;
+}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+/*
+ * Parse IOAPIC related entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init
+acpi_parse_madt_ioapic_entries(void)
+{
+	int count;
+
+	/*
+	 * ACPI interpreter is required to complete interrupt setup,
+	 * so if it is off, don't enumerate the io-apics with ACPI.
+	 * If MPS is present, it will handle them,
+	 * otherwise the system will stay in PIC mode
+	 */
+	if (acpi_disabled || acpi_noirq) {
+		return -ENODEV;
+        }
+
+	/*
+ 	 * if "noapic" boot option, don't look for IO-APICs
+	 */
+	if (skip_ioapic_setup) {
+		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
+			"due to 'noapic' option.\n");
+		return -ENODEV;
+	}
+
+	count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS);
+	if (!count) {
+		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+		return -ENODEV;
+	}
+	else if (count < 0) {
+		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+		return count;
+	}
+
+	count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
+	if (count < 0) {
+		printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+		/* TBD: Cleanup to allow fallback to MPS */
+		return count;
+	}
+
+	/*
+	 * If BIOS did not supply an INT_SRC_OVR for the SCI
+	 * pretend we got one so we can set the SCI flags.
+	 */
+	if (!acpi_sci_override_gsi)
+		acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
+
+	/* Fill in identity legacy mapings where no override */
+	mp_config_acpi_legacy_irqs();
+
+	count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS);
+	if (count < 0) {
+		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+		/* TBD: Cleanup to allow fallback to MPS */
+		return count;
+	}
+
+	return 0;
+}
+#else
+static inline int acpi_parse_madt_ioapic_entries(void)
+{
+	return -1;
+}
+#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */
+
+
+static void __init
+acpi_process_madt(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	int count, error;
+
+	count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
+	if (count >= 1) {
+
+		/*
+		 * Parse MADT LAPIC entries
+		 */
+		error = acpi_parse_madt_lapic_entries();
+		if (!error) {
+			acpi_lapic = 1;
+
+			/*
+			 * Parse MADT IO-APIC entries
+			 */
+			error = acpi_parse_madt_ioapic_entries();
+			if (!error) {
+				acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+				acpi_irq_balance_set(NULL);
+				acpi_ioapic = 1;
+
+				smp_found_config = 1;
+				clustered_apic_check();
+			}
+		}
+		if (error == -EINVAL) {
+			/*
+			 * Dell Precision Workstation 410, 610 come here.
+			 */
+			printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n");
+			disable_acpi();
+		}
+	}
+#endif
+	return;
+}
+
+/*
+ * acpi_boot_table_init() and acpi_boot_init()
+ *  called from setup_arch(), always.
+ *	1. checksums all tables
+ *	2. enumerates lapics
+ *	3. enumerates io-apics
+ *
+ * acpi_table_init() is separate to allow reading SRAT without
+ * other side effects.
+ *
+ * side effects of acpi_boot_init:
+ *	acpi_lapic = 1 if LAPIC found
+ *	acpi_ioapic = 1 if IOAPIC found
+ *	if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ *	if acpi_blacklisted() acpi_disabled = 1;
+ *	acpi_irq_model=...
+ *	...
+ *
+ * return value: (currently ignored)
+ *	0: success
+ *	!0: failure
+ */
+
+int __init
+acpi_boot_table_init(void)
+{
+	int error;
+
+	/*
+	 * If acpi_disabled, bail out
+	 * One exception: acpi=ht continues far enough to enumerate LAPICs
+	 */
+	if (acpi_disabled && !acpi_ht)
+		 return 1;
+
+	/* 
+	 * Initialize the ACPI boot-time table parser.
+	 */
+	error = acpi_table_init();
+	if (error) {
+		disable_acpi();
+		return error;
+	}
+
+#ifdef __i386__
+	check_acpi_pci();
+#endif
+
+	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+	/*
+	 * blacklist may disable ACPI entirely
+	 */
+	error = acpi_blacklisted();
+	if (error) {
+		extern int acpi_force;
+
+		if (acpi_force) {
+			printk(KERN_WARNING PREFIX "acpi=force override\n");
+		} else {
+			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
+			disable_acpi();
+			return error;
+		}
+	}
+
+	return 0;
+}
+
+
+int __init acpi_boot_init(void)
+{
+	/*
+	 * If acpi_disabled, bail out
+	 * One exception: acpi=ht continues far enough to enumerate LAPICs
+	 */
+	if (acpi_disabled && !acpi_ht)
+		 return 1;
+
+	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+	/*
+	 * set sci_int and PM timer address
+	 */
+	acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
+
+	/*
+	 * Process the Multiple APIC Description Table (MADT), if present
+	 */
+	acpi_process_madt();
+
+	acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+
+	return 0;
+}
+
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
new file mode 100644
index 00000000000..726a5ca4b16
--- /dev/null
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -0,0 +1,51 @@
+/* 
+ * Do early PCI probing for bug detection when the main PCI subsystem is 
+ * not up yet.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <asm/pci-direct.h>
+#include <asm/acpi.h>
+
+static int __init check_bridge(int vendor, int device) 
+{
+	/* According to Nvidia all timer overrides are bogus. Just ignore
+	   them all. */
+	if (vendor == PCI_VENDOR_ID_NVIDIA) { 
+		acpi_skip_timer_override = 1; 		
+	}
+	return 0;
+}
+   
+void __init check_acpi_pci(void) 
+{ 
+	int num,slot,func; 
+
+	/* Assume the machine supports type 1. If not it will 
+	   always read ffffffff and should not have any side effect. */
+
+	/* Poor man's PCI discovery */
+	for (num = 0; num < 32; num++) { 
+		for (slot = 0; slot < 32; slot++) { 
+			for (func = 0; func < 8; func++) { 
+				u32 class;
+				u32 vendor;
+				class = read_pci_config(num,slot,func,
+							PCI_CLASS_REVISION);
+				if (class == 0xffffffff)
+					break; 
+
+				if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
+					continue; 
+				
+				vendor = read_pci_config(num, slot, func, 
+							 PCI_VENDOR_ID);
+				
+				if (check_bridge(vendor&0xffff, vendor >> 16))
+					return; 
+			} 
+			
+		}
+	}
+}
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
new file mode 100644
index 00000000000..28bb0514bb6
--- /dev/null
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -0,0 +1,93 @@
+/*
+ * sleep.c - x86-specific ACPI sleep support.
+ *
+ *  Copyright (C) 2001-2003 Patrick Mochel
+ *  Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
+ */
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <asm/smp.h>
+#include <asm/tlbflush.h>
+
+/* address in low memory of the wakeup routine. */
+unsigned long acpi_wakeup_address = 0;
+unsigned long acpi_video_flags;
+extern char wakeup_start, wakeup_end;
+
+extern void zap_low_mappings(void);
+
+extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
+
+static void init_low_mapping(pgd_t *pgd, int pgd_limit)
+{
+	int pgd_ofs = 0;
+
+	while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) {
+		set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD));
+		pgd_ofs++, pgd++;
+	}
+	flush_tlb_all();
+}
+
+/**
+ * acpi_save_state_mem - save kernel state
+ *
+ * Create an identity mapped page table and copy the wakeup routine to
+ * low memory.
+ */
+int acpi_save_state_mem (void)
+{
+	if (!acpi_wakeup_address)
+		return 1;
+	init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD);
+	memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start);
+	acpi_copy_wakeup_routine(acpi_wakeup_address);
+
+	return 0;
+}
+
+/*
+ * acpi_restore_state - undo effects of acpi_save_state_mem
+ */
+void acpi_restore_state_mem (void)
+{
+	zap_low_mappings();
+}
+
+/**
+ * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ *
+ * We allocate a page from the first 1MB of memory for the wakeup
+ * routine for when we come back from a sleep state. The
+ * runtime allocator allows specification of <16MB pages, but not
+ * <1MB pages.
+ */
+void __init acpi_reserve_bootmem(void)
+{
+	if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) {
+		printk(KERN_ERR "ACPI: Wakeup code way too big, S3 disabled.\n");
+		return;
+	}
+
+	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
+	if (!acpi_wakeup_address)
+		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
+}
+
+static int __init acpi_sleep_setup(char *str)
+{
+	while ((str != NULL) && (*str != '\0')) {
+		if (strncmp(str, "s3_bios", 7) == 0)
+			acpi_video_flags = 1;
+		if (strncmp(str, "s3_mode", 7) == 0)
+			acpi_video_flags |= 2;
+		str = strchr(str, ',');
+		if (str != NULL)
+			str += strspn(str, ", \t");
+	}
+	return 1;
+}
+
+
+__setup("acpi_sleep=", acpi_sleep_setup);
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
new file mode 100644
index 00000000000..39d32484f6f
--- /dev/null
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -0,0 +1,318 @@
+.text
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+
+#
+# wakeup_code runs in real mode, and at unknown address (determined at run-time).
+# Therefore it must only use relative jumps/calls. 
+#
+# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled
+#
+# If physical address of wakeup_code is 0x12345, BIOS should call us with
+# cs = 0x1234, eip = 0x05
+# 
+
+ALIGN
+	.align	4096
+ENTRY(wakeup_start)
+wakeup_code:
+	wakeup_code_start = .
+	.code16
+
+ 	movw	$0xb800, %ax
+	movw	%ax,%fs
+	movw	$0x0e00 + 'L', %fs:(0x10)
+
+	cli
+	cld
+
+	# setup data segment
+	movw	%cs, %ax
+	movw	%ax, %ds					# Make ds:0 point to wakeup_start
+	movw	%ax, %ss
+	mov	$(wakeup_stack - wakeup_code), %sp		# Private stack is needed for ASUS board
+	movw	$0x0e00 + 'S', %fs:(0x12)
+
+	pushl	$0						# Kill any dangerous flags
+	popfl
+
+	movl	real_magic - wakeup_code, %eax
+	cmpl	$0x12345678, %eax
+	jne	bogus_real_magic
+
+	testl	$1, video_flags - wakeup_code
+	jz	1f
+	lcall   $0xc000,$3
+	movw	%cs, %ax
+	movw	%ax, %ds					# Bios might have played with that
+	movw	%ax, %ss
+1:
+
+	testl	$2, video_flags - wakeup_code
+	jz	1f
+	mov	video_mode - wakeup_code, %ax
+	call	mode_set
+1:
+
+	# set up page table
+	movl	$swapper_pg_dir-__PAGE_OFFSET, %eax
+	movl	%eax, %cr3
+
+	testl	$1, real_efer_save_restore - wakeup_code
+	jz	4f
+	# restore efer setting
+	movl	real_save_efer_edx - wakeup_code, %edx
+	movl	real_save_efer_eax - wakeup_code, %eax
+	mov     $0xc0000080, %ecx
+	wrmsr
+4:
+	# make sure %cr4 is set correctly (features, etc)
+	movl	real_save_cr4 - wakeup_code, %eax
+	movl	%eax, %cr4
+	movw	$0xb800, %ax
+	movw	%ax,%fs
+	movw	$0x0e00 + 'i', %fs:(0x12)
+	
+	# need a gdt
+	lgdt	real_save_gdt - wakeup_code
+
+	movl	real_save_cr0 - wakeup_code, %eax
+	movl	%eax, %cr0
+	jmp 1f
+1:
+	movw	$0x0e00 + 'n', %fs:(0x14)
+
+	movl	real_magic - wakeup_code, %eax
+	cmpl	$0x12345678, %eax
+	jne	bogus_real_magic
+
+	ljmpl	$__KERNEL_CS,$wakeup_pmode_return
+
+real_save_gdt:	.word 0
+		.long 0
+real_save_cr0:	.long 0
+real_save_cr3:	.long 0
+real_save_cr4:	.long 0
+real_magic:	.long 0
+video_mode:	.long 0
+video_flags:	.long 0
+real_efer_save_restore:	.long 0
+real_save_efer_edx: 	.long 0
+real_save_efer_eax: 	.long 0
+
+bogus_real_magic:
+	movw	$0x0e00 + 'B', %fs:(0x12)
+	jmp bogus_real_magic
+
+/* This code uses an extended set of video mode numbers. These include:
+ * Aliases for standard modes
+ *	NORMAL_VGA (-1)
+ *	EXTENDED_VGA (-2)
+ *	ASK_VGA (-3)
+ * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
+ * of compatibility when extending the table. These are between 0x00 and 0xff.
+ */
+#define VIDEO_FIRST_MENU 0x0000
+
+/* Standard BIOS video modes (BIOS number + 0x0100) */
+#define VIDEO_FIRST_BIOS 0x0100
+
+/* VESA BIOS video modes (VESA number + 0x0200) */
+#define VIDEO_FIRST_VESA 0x0200
+
+/* Video7 special modes (BIOS number + 0x0900) */
+#define VIDEO_FIRST_V7 0x0900
+
+# Setting of user mode (AX=mode ID) => CF=success
+mode_set:
+	movw	%ax, %bx
+#if 0
+	cmpb	$0xff, %ah
+	jz	setalias
+
+	testb	$VIDEO_RECALC>>8, %ah
+	jnz	_setrec
+
+	cmpb	$VIDEO_FIRST_RESOLUTION>>8, %ah
+	jnc	setres
+	
+	cmpb	$VIDEO_FIRST_SPECIAL>>8, %ah
+	jz	setspc
+
+	cmpb	$VIDEO_FIRST_V7>>8, %ah
+	jz	setv7
+#endif
+	
+	cmpb	$VIDEO_FIRST_VESA>>8, %ah
+	jnc	check_vesa
+#if 0	
+	orb	%ah, %ah
+	jz	setmenu
+#endif
+	
+	decb	%ah
+#	jz	setbios				  Add bios modes later
+
+setbad:	clc
+	ret
+
+check_vesa:
+	subb	$VIDEO_FIRST_VESA>>8, %bh
+	orw	$0x4000, %bx			# Use linear frame buffer
+	movw	$0x4f02, %ax			# VESA BIOS mode set call
+	int	$0x10
+	cmpw	$0x004f, %ax			# AL=4f if implemented
+	jnz	_setbad				# AH=0 if OK
+
+	stc
+	ret
+
+_setbad: jmp setbad
+
+	.code32
+	ALIGN
+
+.org	0x800
+wakeup_stack_begin:	# Stack grows down
+
+.org	0xff0		# Just below end of page
+wakeup_stack:
+ENTRY(wakeup_end)
+	
+.org	0x1000
+
+wakeup_pmode_return:
+	movw	$__KERNEL_DS, %ax
+	movw	%ax, %ss
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %fs
+	movw	%ax, %gs
+	movw	$0x0e00 + 'u', 0xb8016
+
+	# reload the gdt, as we need the full 32 bit address
+	lgdt	saved_gdt
+	lidt	saved_idt
+	lldt	saved_ldt
+	ljmp	$(__KERNEL_CS),$1f
+1:
+	movl	%cr3, %eax
+	movl	%eax, %cr3
+	wbinvd
+
+	# and restore the stack ... but you need gdt for this to work
+	movl	saved_context_esp, %esp
+
+	movl	%cs:saved_magic, %eax
+	cmpl	$0x12345678, %eax
+	jne	bogus_magic
+
+	# jump to place where we left off
+	movl	saved_eip,%eax
+	jmp	*%eax
+
+bogus_magic:
+	movw	$0x0e00 + 'B', 0xb8018
+	jmp	bogus_magic
+
+
+##
+# acpi_copy_wakeup_routine
+#
+# Copy the above routine to low memory.
+#
+# Parameters:
+# %eax:	place to copy wakeup routine to
+#
+# Returned address is location of code in low memory (past data and stack)
+#
+ENTRY(acpi_copy_wakeup_routine)
+
+	sgdt	saved_gdt
+	sidt	saved_idt
+	sldt	saved_ldt
+	str	saved_tss
+
+	movl	nx_enabled, %edx
+	movl	%edx, real_efer_save_restore - wakeup_start (%eax)
+	testl	$1, real_efer_save_restore - wakeup_start (%eax)
+	jz	2f
+	# save efer setting
+	pushl	%eax
+	movl	%eax, %ebx
+	mov     $0xc0000080, %ecx
+	rdmsr
+	movl	%edx, real_save_efer_edx - wakeup_start (%ebx)
+	movl	%eax, real_save_efer_eax - wakeup_start (%ebx)
+	popl	%eax
+2:
+
+	movl    %cr3, %edx
+	movl    %edx, real_save_cr3 - wakeup_start (%eax)
+	movl    %cr4, %edx
+	movl    %edx, real_save_cr4 - wakeup_start (%eax)
+	movl	%cr0, %edx
+	movl	%edx, real_save_cr0 - wakeup_start (%eax)
+	sgdt    real_save_gdt - wakeup_start (%eax)
+
+	movl	saved_videomode, %edx
+	movl	%edx, video_mode - wakeup_start (%eax)
+	movl	acpi_video_flags, %edx
+	movl	%edx, video_flags - wakeup_start (%eax)
+	movl	$0x12345678, real_magic - wakeup_start (%eax)
+	movl	$0x12345678, saved_magic
+	ret
+
+.data
+ALIGN
+ENTRY(saved_magic)	.long	0
+ENTRY(saved_eip)	.long	0
+
+save_registers:
+	leal	4(%esp), %eax
+	movl	%eax, saved_context_esp
+	movl %ebx, saved_context_ebx
+	movl %ebp, saved_context_ebp
+	movl %esi, saved_context_esi
+	movl %edi, saved_context_edi
+	pushfl ; popl saved_context_eflags
+
+	movl $ret_point, saved_eip
+	ret
+
+
+restore_registers:
+	movl saved_context_ebp, %ebp
+	movl saved_context_ebx, %ebx
+	movl saved_context_esi, %esi
+	movl saved_context_edi, %edi
+	pushl saved_context_eflags ; popfl
+	ret	
+
+ENTRY(do_suspend_lowlevel)
+	call	save_processor_state
+	call	save_registers
+	pushl	$3
+	call	acpi_enter_sleep_state
+	addl	$4, %esp
+	ret
+	.p2align 4,,7
+ret_point:
+	call	restore_registers
+	call	restore_processor_state
+	ret
+
+ENTRY(do_suspend_lowlevel_s4bios)
+	call save_processor_state
+	call save_registers
+	call acpi_enter_sleep_state_s4bios
+	ret
+
+ALIGN
+# saved registers
+saved_gdt:	.long	0,0
+saved_idt:	.long	0,0
+saved_ldt:	.long	0
+saved_tss:	.long	0
+
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
new file mode 100644
index 00000000000..35c1751ea0b
--- /dev/null
+++ b/arch/i386/kernel/apic.c
@@ -0,0 +1,1278 @@
+/*
+ *	Local APIC handling, local APIC timers
+ *
+ *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+ *					thanks to Eric Gilmore
+ *					and Rolf G. Tews
+ *					for testing these extensively.
+ *	Maciej W. Rozycki	:	Various updates and fixes.
+ *	Mikael Pettersson	:	Power Management for UP-APIC.
+ *	Pavel Machek and
+ *	Mikael Pettersson	:	PM converted to driver model.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+
+#include <mach_apic.h>
+
+#include "io_ports.h"
+
+/*
+ * Debug level
+ */
+int apic_verbosity;
+
+
+static void apic_pm_activate(void);
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk("unexpected IRQ trap at vector %02x\n", irq);
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 */
+	ack_APIC_irq();
+}
+
+void __init apic_intr_init(void)
+{
+#ifdef CONFIG_SMP
+	smp_intr_init();
+#endif
+	/* self generated IPI for local APIC timer */
+	set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+	/* IPI vectors for APIC spurious and error interrupts */
+	set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+	set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+	/* thermal monitor LVT interrupt */
+#ifdef CONFIG_X86_MCE_P4THERMAL
+	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+#endif
+}
+
+/* Using APIC to generate smp_local_timer_interrupt? */
+int using_apic_timer = 0;
+
+static DEFINE_PER_CPU(int, prof_multiplier) = 1;
+static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
+static DEFINE_PER_CPU(int, prof_counter) = 1;
+
+static int enabled_via_apicbase;
+
+void enable_NMI_through_LVT0 (void * dummy)
+{
+	unsigned int v, ver;
+
+	ver = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(ver);
+	v = APIC_DM_NMI;			/* unmask and set to NMI */
+	if (!APIC_INTEGRATED(ver))		/* 82489DX */
+		v |= APIC_LVT_LEVEL_TRIGGER;
+	apic_write_around(APIC_LVT0, v);
+}
+
+int get_physical_broadcast(void)
+{
+	unsigned int lvr, version;
+	lvr = apic_read(APIC_LVR);
+	version = GET_APIC_VERSION(lvr);
+	if (!APIC_INTEGRATED(version) || version >= 0x14)
+		return 0xff;
+	else
+		return 0xf;
+}
+
+int get_maxlvt(void)
+{
+	unsigned int v, ver, maxlvt;
+
+	v = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(v);
+	/* 82489DXs do not report # of LVT entries. */
+	maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
+	return maxlvt;
+}
+
+void clear_local_APIC(void)
+{
+	int maxlvt;
+	unsigned long v;
+
+	maxlvt = get_maxlvt();
+
+	/*
+	 * Masking an LVT entry on a P6 can trigger a local APIC error
+	 * if the vector is zero. Mask LVTERR first to prevent this.
+	 */
+	if (maxlvt >= 3) {
+		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+		apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
+	}
+	/*
+	 * Careful: we have to set masks only first to deassert
+	 * any level-triggered sources.
+	 */
+	v = apic_read(APIC_LVTT);
+	apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+	v = apic_read(APIC_LVT0);
+	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+	v = apic_read(APIC_LVT1);
+	apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
+	if (maxlvt >= 4) {
+		v = apic_read(APIC_LVTPC);
+		apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
+	}
+
+/* lets not touch this if we didn't frob it */
+#ifdef CONFIG_X86_MCE_P4THERMAL
+	if (maxlvt >= 5) {
+		v = apic_read(APIC_LVTTHMR);
+		apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+	}
+#endif
+	/*
+	 * Clean APIC state for other OSs:
+	 */
+	apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
+	apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+	apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
+	if (maxlvt >= 3)
+		apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
+	if (maxlvt >= 4)
+		apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+	if (maxlvt >= 5)
+		apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
+#endif
+	v = GET_APIC_VERSION(apic_read(APIC_LVR));
+	if (APIC_INTEGRATED(v)) {	/* !82489DX */
+		if (maxlvt > 3)		/* Due to Pentium errata 3AP and 11AP. */
+			apic_write(APIC_ESR, 0);
+		apic_read(APIC_ESR);
+	}
+}
+
+void __init connect_bsp_APIC(void)
+{
+	if (pic_mode) {
+		/*
+		 * Do not trust the local APIC being empty at bootup.
+		 */
+		clear_local_APIC();
+		/*
+		 * PIC mode, enable APIC mode in the IMCR, i.e.
+		 * connect BSP's local APIC to INT and NMI lines.
+		 */
+		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
+				"enabling APIC mode.\n");
+		outb(0x70, 0x22);
+		outb(0x01, 0x23);
+	}
+	enable_apic_mode();
+}
+
+void disconnect_bsp_APIC(void)
+{
+	if (pic_mode) {
+		/*
+		 * Put the board back into PIC mode (has an effect
+		 * only on certain older boards).  Note that APIC
+		 * interrupts, including IPIs, won't work beyond
+		 * this point!  The only exception are INIT IPIs.
+		 */
+		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
+				"entering PIC mode.\n");
+		outb(0x70, 0x22);
+		outb(0x00, 0x23);
+	}
+}
+
+void disable_local_APIC(void)
+{
+	unsigned long value;
+
+	clear_local_APIC();
+
+	/*
+	 * Disable APIC (implies clearing of registers
+	 * for 82489DX!).
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_SPIV_APIC_ENABLED;
+	apic_write_around(APIC_SPIV, value);
+
+	if (enabled_via_apicbase) {
+		unsigned int l, h;
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		l &= ~MSR_IA32_APICBASE_ENABLE;
+		wrmsr(MSR_IA32_APICBASE, l, h);
+	}
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+	unsigned int reg0, reg1;
+
+	/*
+	 * The version register is read-only in a real APIC.
+	 */
+	reg0 = apic_read(APIC_LVR);
+	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
+	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+	reg1 = apic_read(APIC_LVR);
+	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
+
+	/*
+	 * The two version reads above should print the same
+	 * numbers.  If the second one is different, then we
+	 * poke at a non-APIC.
+	 */
+	if (reg1 != reg0)
+		return 0;
+
+	/*
+	 * Check if the version looks reasonably.
+	 */
+	reg1 = GET_APIC_VERSION(reg0);
+	if (reg1 == 0x00 || reg1 == 0xff)
+		return 0;
+	reg1 = get_maxlvt();
+	if (reg1 < 0x02 || reg1 == 0xff)
+		return 0;
+
+	/*
+	 * The ID register is read/write in a real APIC.
+	 */
+	reg0 = apic_read(APIC_ID);
+	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
+
+	/*
+	 * The next two are just to see if we have sane values.
+	 * They're only really relevant if we're in Virtual Wire
+	 * compatibility mode, but most boxes are anymore.
+	 */
+	reg0 = apic_read(APIC_LVT0);
+	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
+	reg1 = apic_read(APIC_LVT1);
+	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
+
+	return 1;
+}
+
+void __init sync_Arb_IDs(void)
+{
+	/* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
+	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+	if (ver >= 0x14)	/* P4 or higher */
+		return;
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+
+	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
+	apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
+				| APIC_DM_INIT);
+}
+
+extern void __error_in_apic_c (void);
+
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+	unsigned long value, ver;
+
+	/*
+	 * Don't do the setup now if we have a SMP BIOS as the
+	 * through-I/O-APIC virtual wire mode might be active.
+	 */
+	if (smp_found_config || !cpu_has_apic)
+		return;
+
+	value = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(value);
+
+	/*
+	 * Do not trust the local APIC being empty at bootup.
+	 */
+	clear_local_APIC();
+
+	/*
+	 * Enable APIC.
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+	
+	/* This bit is reserved on P4/Xeon and should be cleared */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15))
+		value &= ~APIC_SPIV_FOCUS_DISABLED;
+	else
+		value |= APIC_SPIV_FOCUS_DISABLED;
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write_around(APIC_SPIV, value);
+
+	/*
+	 * Set up the virtual wire mode.
+	 */
+	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+	value = APIC_DM_NMI;
+	if (!APIC_INTEGRATED(ver))		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	apic_write_around(APIC_LVT1, value);
+}
+
+void __init setup_local_APIC (void)
+{
+	unsigned long oldvalue, value, ver, maxlvt;
+
+	/* Pound the ESR really hard over the head with a big hammer - mbligh */
+	if (esr_disable) {
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+	}
+
+	value = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(value);
+
+	if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
+		__error_in_apic_c();
+
+	/*
+	 * Double-check whether this APIC is really registered.
+	 */
+	if (!apic_id_registered())
+		BUG();
+
+	/*
+	 * Intel recommends to set DFR, LDR and TPR before enabling
+	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+	 * document number 292116).  So here it goes...
+	 */
+	init_apic_ldr();
+
+	/*
+	 * Set Task Priority to 'accept all'. We never change this
+	 * later on.
+	 */
+	value = apic_read(APIC_TASKPRI);
+	value &= ~APIC_TPRI_MASK;
+	apic_write_around(APIC_TASKPRI, value);
+
+	/*
+	 * Now that we are all set up, enable the APIC
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	/*
+	 * Enable APIC
+	 */
+	value |= APIC_SPIV_APIC_ENABLED;
+
+	/*
+	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+	 * certain networking cards. If high frequency interrupts are
+	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
+	 * entry is masked/unmasked at a high rate as well then sooner or
+	 * later IOAPIC line gets 'stuck', no more interrupts are received
+	 * from the device. If focus CPU is disabled then the hang goes
+	 * away, oh well :-(
+	 *
+	 * [ This bug can be reproduced easily with a level-triggered
+	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
+	 *   BX chipset. ]
+	 */
+	/*
+	 * Actually disabling the focus CPU check just makes the hang less
+	 * frequent as it makes the interrupt distributon model be more
+	 * like LRU than MRU (the short-term load is more even across CPUs).
+	 * See also the comment in end_level_ioapic_irq().  --macro
+	 */
+#if 1
+	/* Enable focus processor (bit==0) */
+	value &= ~APIC_SPIV_FOCUS_DISABLED;
+#else
+	/* Disable focus processor (bit==1) */
+	value |= APIC_SPIV_FOCUS_DISABLED;
+#endif
+	/*
+	 * Set spurious IRQ vector
+	 */
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write_around(APIC_SPIV, value);
+
+	/*
+	 * Set up LVT0, LVT1:
+	 *
+	 * set up through-local-APIC on the BP's LINT0. This is not
+	 * strictly necessery in pure symmetric-IO mode, but sometimes
+	 * we delegate interrupts to the 8259A.
+	 */
+	/*
+	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+	 */
+	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+	if (!smp_processor_id() && (pic_mode || !value)) {
+		value = APIC_DM_EXTINT;
+		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
+				smp_processor_id());
+	} else {
+		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
+				smp_processor_id());
+	}
+	apic_write_around(APIC_LVT0, value);
+
+	/*
+	 * only the BP should see the LINT1 NMI signal, obviously.
+	 */
+	if (!smp_processor_id())
+		value = APIC_DM_NMI;
+	else
+		value = APIC_DM_NMI | APIC_LVT_MASKED;
+	if (!APIC_INTEGRATED(ver))		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	apic_write_around(APIC_LVT1, value);
+
+	if (APIC_INTEGRATED(ver) && !esr_disable) {		/* !82489DX */
+		maxlvt = get_maxlvt();
+		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+		oldvalue = apic_read(APIC_ESR);
+
+		value = ERROR_APIC_VECTOR;      // enables sending errors
+		apic_write_around(APIC_LVTERR, value);
+		/*
+		 * spec says clear errors after enabling vector.
+		 */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+		value = apic_read(APIC_ESR);
+		if (value != oldvalue)
+			apic_printk(APIC_VERBOSE, "ESR value before enabling "
+				"vector: 0x%08lx  after: 0x%08lx\n",
+				oldvalue, value);
+	} else {
+		if (esr_disable)	
+			/* 
+			 * Something untraceble is creating bad interrupts on 
+			 * secondary quads ... for the moment, just leave the
+			 * ESR disabled - we can't do anything useful with the
+			 * errors anyway - mbligh
+			 */
+			printk("Leaving ESR disabled.\n");
+		else 
+			printk("No ESR for 82489DX.\n");
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		setup_apic_nmi_watchdog();
+	apic_pm_activate();
+}
+
+/*
+ * If Linux enabled the LAPIC against the BIOS default
+ * disable it down before re-entering the BIOS on shutdown.
+ * Otherwise the BIOS may get confused and not power-off.
+ */
+void lapic_shutdown(void)
+{
+	if (!cpu_has_apic || !enabled_via_apicbase)
+		return;
+
+	local_irq_disable();
+	disable_local_APIC();
+	local_irq_enable();
+}
+
+#ifdef CONFIG_PM
+
+static struct {
+	int active;
+	/* r/w apic fields */
+	unsigned int apic_id;
+	unsigned int apic_taskpri;
+	unsigned int apic_ldr;
+	unsigned int apic_dfr;
+	unsigned int apic_spiv;
+	unsigned int apic_lvtt;
+	unsigned int apic_lvtpc;
+	unsigned int apic_lvt0;
+	unsigned int apic_lvt1;
+	unsigned int apic_lvterr;
+	unsigned int apic_tmict;
+	unsigned int apic_tdcr;
+	unsigned int apic_thmr;
+} apic_pm_state;
+
+static int lapic_suspend(struct sys_device *dev, u32 state)
+{
+	unsigned long flags;
+
+	if (!apic_pm_state.active)
+		return 0;
+
+	apic_pm_state.apic_id = apic_read(APIC_ID);
+	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
+	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
+	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
+	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
+	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
+	apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
+	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
+	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
+	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
+	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
+	apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+	
+	local_irq_save(flags);
+	disable_local_APIC();
+	local_irq_restore(flags);
+	return 0;
+}
+
+static int lapic_resume(struct sys_device *dev)
+{
+	unsigned int l, h;
+	unsigned long flags;
+
+	if (!apic_pm_state.active)
+		return 0;
+
+	local_irq_save(flags);
+
+	/*
+	 * Make sure the APICBASE points to the right address
+	 *
+	 * FIXME! This will be wrong if we ever support suspend on
+	 * SMP! We'll need to do this as part of the CPU restore!
+	 */
+	rdmsr(MSR_IA32_APICBASE, l, h);
+	l &= ~MSR_IA32_APICBASE_BASE;
+	l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+	wrmsr(MSR_IA32_APICBASE, l, h);
+
+	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+	apic_write(APIC_ID, apic_pm_state.apic_id);
+	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
+	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
+	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
+	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
+	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+	apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+	apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
+	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
+	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+	local_irq_restore(flags);
+	return 0;
+}
+
+/*
+ * This device has no shutdown method - fully functioning local APICs
+ * are needed on every CPU up until machine_halt/restart/poweroff.
+ */
+
+static struct sysdev_class lapic_sysclass = {
+	set_kset_name("lapic"),
+	.resume		= lapic_resume,
+	.suspend	= lapic_suspend,
+};
+
+static struct sys_device device_lapic = {
+	.id	= 0,
+	.cls	= &lapic_sysclass,
+};
+
+static void __init apic_pm_activate(void)
+{
+	apic_pm_state.active = 1;
+}
+
+static int __init init_lapic_sysfs(void)
+{
+	int error;
+
+	if (!cpu_has_apic)
+		return 0;
+	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+
+	error = sysdev_class_register(&lapic_sysclass);
+	if (!error)
+		error = sysdev_register(&device_lapic);
+	return error;
+}
+device_initcall(init_lapic_sysfs);
+
+#else	/* CONFIG_PM */
+
+static void apic_pm_activate(void) { }
+
+#endif	/* CONFIG_PM */
+
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ */
+
+/*
+ * Knob to control our willingness to enable the local APIC.
+ */
+int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+static int __init lapic_disable(char *str)
+{
+	enable_local_apic = -1;
+	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+	return 0;
+}
+__setup("nolapic", lapic_disable);
+
+static int __init lapic_enable(char *str)
+{
+	enable_local_apic = 1;
+	return 0;
+}
+__setup("lapic", lapic_enable);
+
+static int __init apic_set_verbosity(char *str)
+{
+	if (strcmp("debug", str) == 0)
+		apic_verbosity = APIC_DEBUG;
+	else if (strcmp("verbose", str) == 0)
+		apic_verbosity = APIC_VERBOSE;
+	else
+		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+				" use apic=verbose or apic=debug", str);
+
+	return 0;
+}
+
+__setup("apic=", apic_set_verbosity);
+
+static int __init detect_init_APIC (void)
+{
+	u32 h, l, features;
+	extern void get_cpu_vendor(struct cpuinfo_x86*);
+
+	/* Disabled by kernel option? */
+	if (enable_local_apic < 0)
+		return -1;
+
+	/* Workaround for us being called before identify_cpu(). */
+	get_cpu_vendor(&boot_cpu_data);
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+		    (boot_cpu_data.x86 == 15))	    
+			break;
+		goto no_apic;
+	case X86_VENDOR_INTEL:
+		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
+		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
+			break;
+		goto no_apic;
+	default:
+		goto no_apic;
+	}
+
+	if (!cpu_has_apic) {
+		/*
+		 * Over-ride BIOS and try to enable the local
+		 * APIC only if "lapic" specified.
+		 */
+		if (enable_local_apic <= 0) {
+			printk("Local APIC disabled by BIOS -- "
+			       "you can enable it with \"lapic\"\n");
+			return -1;
+		}
+		/*
+		 * Some BIOSes disable the local APIC in the
+		 * APIC_BASE MSR. This can only be done in
+		 * software for Intel P6 or later and AMD K7
+		 * (Model > 1) or later.
+		 */
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+			printk("Local APIC disabled by BIOS -- reenabling.\n");
+			l &= ~MSR_IA32_APICBASE_BASE;
+			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+			wrmsr(MSR_IA32_APICBASE, l, h);
+			enabled_via_apicbase = 1;
+		}
+	}
+	/*
+	 * The APIC feature bit should now be enabled
+	 * in `cpuid'
+	 */
+	features = cpuid_edx(1);
+	if (!(features & (1 << X86_FEATURE_APIC))) {
+		printk("Could not enable APIC!\n");
+		return -1;
+	}
+	set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	/* The BIOS may have set up the APIC at some other address */
+	rdmsr(MSR_IA32_APICBASE, l, h);
+	if (l & MSR_IA32_APICBASE_ENABLE)
+		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+	if (nmi_watchdog != NMI_NONE)
+		nmi_watchdog = NMI_LOCAL_APIC;
+
+	printk("Found and enabled local APIC!\n");
+
+	apic_pm_activate();
+
+	return 0;
+
+no_apic:
+	printk("No local APIC present or hardware disabled\n");
+	return -1;
+}
+
+void __init init_apic_mappings(void)
+{
+	unsigned long apic_phys;
+
+	/*
+	 * If no local APIC can be found then set up a fake all
+	 * zeroes page to simulate the local APIC and another
+	 * one for the IO-APIC.
+	 */
+	if (!smp_found_config && detect_init_APIC()) {
+		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+		apic_phys = __pa(apic_phys);
+	} else
+		apic_phys = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+	printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
+	       apic_phys);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+#ifdef CONFIG_X86_IO_APIC
+	{
+		unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+		int i;
+
+		for (i = 0; i < nr_ioapics; i++) {
+			if (smp_found_config) {
+				ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+				if (!ioapic_phys) {
+					printk(KERN_ERR
+					       "WARNING: bogus zero IO-APIC "
+					       "address found in MPTABLE, "
+					       "disabling IO/APIC support!\n");
+					smp_found_config = 0;
+					skip_ioapic_setup = 1;
+					goto fake_ioapic_page;
+				}
+			} else {
+fake_ioapic_page:
+				ioapic_phys = (unsigned long)
+					      alloc_bootmem_pages(PAGE_SIZE);
+				ioapic_phys = __pa(ioapic_phys);
+			}
+			set_fixmap_nocache(idx, ioapic_phys);
+			printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
+			       __fix_to_virt(idx), ioapic_phys);
+			idx++;
+		}
+	}
+#endif
+}
+
+/*
+ * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
+ * per second. We assume that the caller has already set up the local
+ * APIC.
+ *
+ * The APIC timer is not exactly sync with the external timer chip, it
+ * closely follows bus clocks.
+ */
+
+/*
+ * The timer chip is already set up at HZ interrupts per second here,
+ * but we do not accept timer interrupts yet. We only allow the BP
+ * to calibrate.
+ */
+static unsigned int __init get_8254_timer_count(void)
+{
+	extern spinlock_t i8253_lock;
+	unsigned long flags;
+
+	unsigned int count;
+
+	spin_lock_irqsave(&i8253_lock, flags);
+
+	outb_p(0x00, PIT_MODE);
+	count = inb_p(PIT_CH0);
+	count |= inb_p(PIT_CH0) << 8;
+
+	spin_unlock_irqrestore(&i8253_lock, flags);
+
+	return count;
+}
+
+/* next tick in 8254 can be caught by catching timer wraparound */
+static void __init wait_8254_wraparound(void)
+{
+	unsigned int curr_count, prev_count;
+
+	curr_count = get_8254_timer_count();
+	do {
+		prev_count = curr_count;
+		curr_count = get_8254_timer_count();
+
+		/* workaround for broken Mercury/Neptune */
+		if (prev_count >= curr_count + 0x100)
+			curr_count = get_8254_timer_count();
+
+	} while (prev_count >= curr_count);
+}
+
+/*
+ * Default initialization for 8254 timers. If we use other timers like HPET,
+ * we override this later
+ */
+void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound;
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+
+#define APIC_DIVISOR 16
+
+static void __setup_APIC_LVTT(unsigned int clocks)
+{
+	unsigned int lvtt_value, tmp_value, ver;
+
+	ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+	lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+	if (!APIC_INTEGRATED(ver))
+		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+	apic_write_around(APIC_LVTT, lvtt_value);
+
+	/*
+	 * Divide PICLK by 16
+	 */
+	tmp_value = apic_read(APIC_TDCR);
+	apic_write_around(APIC_TDCR, (tmp_value
+				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
+				| APIC_TDR_DIV_16);
+
+	apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+static void __init setup_APIC_timer(unsigned int clocks)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	/*
+	 * Wait for IRQ0's slice:
+	 */
+	wait_timer_tick();
+
+	__setup_APIC_LVTT(clocks);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+static int __init calibrate_APIC_clock(void)
+{
+	unsigned long long t1 = 0, t2 = 0;
+	long tt1, tt2;
+	long result;
+	int i;
+	const int LOOPS = HZ/10;
+
+	apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
+
+	/*
+	 * Put whatever arbitrary (but long enough) timeout
+	 * value into the APIC clock, we just want to get the
+	 * counter running for calibration.
+	 */
+	__setup_APIC_LVTT(1000000000);
+
+	/*
+	 * The timer chip counts down to zero. Let's wait
+	 * for a wraparound to start exact measurement:
+	 * (the current tick might have been already half done)
+	 */
+
+	wait_timer_tick();
+
+	/*
+	 * We wrapped around just now. Let's start:
+	 */
+	if (cpu_has_tsc)
+		rdtscll(t1);
+	tt1 = apic_read(APIC_TMCCT);
+
+	/*
+	 * Let's wait LOOPS wraprounds:
+	 */
+	for (i = 0; i < LOOPS; i++)
+		wait_timer_tick();
+
+	tt2 = apic_read(APIC_TMCCT);
+	if (cpu_has_tsc)
+		rdtscll(t2);
+
+	/*
+	 * The APIC bus clock counter is 32 bits only, it
+	 * might have overflown, but note that we use signed
+	 * longs, thus no extra care needed.
+	 *
+	 * underflown to be exact, as the timer counts down ;)
+	 */
+
+	result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+
+	if (cpu_has_tsc)
+		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+			"%ld.%04ld MHz.\n",
+			((long)(t2-t1)/LOOPS)/(1000000/HZ),
+			((long)(t2-t1)/LOOPS)%(1000000/HZ));
+
+	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+		"%ld.%04ld MHz.\n",
+		result/(1000000/HZ),
+		result%(1000000/HZ));
+
+	return result;
+}
+
+static unsigned int calibration_result;
+
+void __init setup_boot_APIC_clock(void)
+{
+	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
+	using_apic_timer = 1;
+
+	local_irq_disable();
+
+	calibration_result = calibrate_APIC_clock();
+	/*
+	 * Now set up the timer for real.
+	 */
+	setup_APIC_timer(calibration_result);
+
+	local_irq_enable();
+}
+
+void __init setup_secondary_APIC_clock(void)
+{
+	setup_APIC_timer(calibration_result);
+}
+
+void __init disable_APIC_timer(void)
+{
+	if (using_apic_timer) {
+		unsigned long v;
+
+		v = apic_read(APIC_LVTT);
+		apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+	}
+}
+
+void enable_APIC_timer(void)
+{
+	if (using_apic_timer) {
+		unsigned long v;
+
+		v = apic_read(APIC_LVTT);
+		apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
+	}
+}
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	int i;
+
+	/*
+	 * Sanity check. [at least 500 APIC cycles should be
+	 * between APIC interrupts as a rule of thumb, to avoid
+	 * irqs flooding us]
+	 */
+	if ( (!multiplier) || (calibration_result/multiplier < 500))
+		return -EINVAL;
+
+	/* 
+	 * Set the new multiplier for each CPU. CPUs don't start using the
+	 * new values until the next timer interrupt in which they do process
+	 * accounting. At that time they also adjust their APIC timers
+	 * accordingly.
+	 */
+	for (i = 0; i < NR_CPUS; ++i)
+		per_cpu(prof_multiplier, i) = multiplier;
+
+	return 0;
+}
+
+#undef APIC_DIVISOR
+
+/*
+ * Local timer interrupt handler. It does both profiling and
+ * process statistics/rescheduling.
+ *
+ * We do profiling in every local tick, statistics/rescheduling
+ * happen only every 'profiling multiplier' ticks. The default
+ * multiplier is 1 and it can be changed by writing the new multiplier
+ * value into /proc/profile.
+ */
+
+inline void smp_local_timer_interrupt(struct pt_regs * regs)
+{
+	int cpu = smp_processor_id();
+
+	profile_tick(CPU_PROFILING, regs);
+	if (--per_cpu(prof_counter, cpu) <= 0) {
+		/*
+		 * The multiplier may have changed since the last time we got
+		 * to this point as a result of the user writing to
+		 * /proc/profile. In this case we need to adjust the APIC
+		 * timer accordingly.
+		 *
+		 * Interrupts are already masked off at this point.
+		 */
+		per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
+		if (per_cpu(prof_counter, cpu) !=
+					per_cpu(prof_old_multiplier, cpu)) {
+			__setup_APIC_LVTT(
+					calibration_result/
+					per_cpu(prof_counter, cpu));
+			per_cpu(prof_old_multiplier, cpu) =
+						per_cpu(prof_counter, cpu);
+		}
+
+#ifdef CONFIG_SMP
+		update_process_times(user_mode(regs));
+#endif
+	}
+
+	/*
+	 * We take the 'long' return path, and there every subsystem
+	 * grabs the apropriate locks (kernel lock/ irq lock).
+	 *
+	 * we might want to decouple profiling from the 'long path',
+	 * and do the profiling totally in assembly.
+	 *
+	 * Currently this isn't too much of an issue (performance wise),
+	 * we can take more than 100K local irqs per second on a 100 MHz P5.
+	 */
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+
+fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * the NMI deadlock-detector uses this.
+	 */
+	per_cpu(irq_stat, cpu).apic_timer_irqs++;
+
+	/*
+	 * NOTE! We'd better ACK the irq immediately,
+	 * because timer handling can be slow.
+	 */
+	ack_APIC_irq();
+	/*
+	 * update_process_times() expects us to have done irq_enter().
+	 * Besides, if we don't timer interrupts ignore the global
+	 * interrupt lock, which is the WrongThing (tm) to do.
+	 */
+	irq_enter();
+	smp_local_timer_interrupt(regs);
+	irq_exit();
+}
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+fastcall void smp_spurious_interrupt(struct pt_regs *regs)
+{
+	unsigned long v;
+
+	irq_enter();
+	/*
+	 * Check if this really is a spurious interrupt and ACK it
+	 * if it is a vectored one.  Just in case...
+	 * Spurious interrupts should not be ACKed.
+	 */
+	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+		ack_APIC_irq();
+
+	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
+	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
+			smp_processor_id());
+	irq_exit();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+
+fastcall void smp_error_interrupt(struct pt_regs *regs)
+{
+	unsigned long v, v1;
+
+	irq_enter();
+	/* First tickle the hardware, only then report what went on. -- REW */
+	v = apic_read(APIC_ESR);
+	apic_write(APIC_ESR, 0);
+	v1 = apic_read(APIC_ESR);
+	ack_APIC_irq();
+	atomic_inc(&irq_err_count);
+
+	/* Here is what the APIC error bits mean:
+	   0: Send CS error
+	   1: Receive CS error
+	   2: Send accept error
+	   3: Receive accept error
+	   4: Reserved
+	   5: Send illegal vector
+	   6: Received illegal vector
+	   7: Illegal register address
+	*/
+	printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+	        smp_processor_id(), v , v1);
+	irq_exit();
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+	if (enable_local_apic < 0)
+		clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+
+	if (!smp_found_config && !cpu_has_apic)
+		return -1;
+
+	/*
+	 * Complain if the BIOS pretends there is one.
+	 */
+	if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+			boot_cpu_physical_apicid);
+		return -1;
+	}
+
+	verify_local_APIC();
+
+	connect_bsp_APIC();
+
+	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+
+	setup_local_APIC();
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		check_nmi_watchdog();
+#ifdef CONFIG_X86_IO_APIC
+	if (smp_found_config)
+		if (!skip_ioapic_setup && nr_ioapics)
+			setup_IO_APIC();
+#endif
+	setup_boot_APIC_clock();
+
+	return 0;
+}
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
new file mode 100644
index 00000000000..45641a87255
--- /dev/null
+++ b/arch/i386/kernel/apm.c
@@ -0,0 +1,2428 @@
+/* -*- linux-c -*-
+ * APM BIOS driver for Linux
+ * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au)
+ *
+ * Initial development of this driver was funded by NEC Australia P/L
+ *	and NEC Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * October 1995, Rik Faith (faith@cs.unc.edu):
+ *    Minor enhancements and updates (to the patch set) for 1.3.x
+ *    Documentation
+ * January 1996, Rik Faith (faith@cs.unc.edu):
+ *    Make /proc/apm easy to format (bump driver version)
+ * March 1996, Rik Faith (faith@cs.unc.edu):
+ *    Prohibit APM BIOS calls unless apm_enabled.
+ *    (Thanks to Ulrich Windl <Ulrich.Windl@rz.uni-regensburg.de>)
+ * April 1996, Stephen Rothwell (sfr@canb.auug.org.au)
+ *    Version 1.0 and 1.1
+ * May 1996, Version 1.2
+ * Feb 1998, Version 1.3
+ * Feb 1998, Version 1.4
+ * Aug 1998, Version 1.5
+ * Sep 1998, Version 1.6
+ * Nov 1998, Version 1.7
+ * Jan 1999, Version 1.8
+ * Jan 1999, Version 1.9
+ * Oct 1999, Version 1.10
+ * Nov 1999, Version 1.11
+ * Jan 2000, Version 1.12
+ * Feb 2000, Version 1.13
+ * Nov 2000, Version 1.14
+ * Oct 2001, Version 1.15
+ * Jan 2002, Version 1.16
+ * Oct 2002, Version 1.16ac
+ *
+ * History:
+ *    0.6b: first version in official kernel, Linux 1.3.46
+ *    0.7: changed /proc/apm format, Linux 1.3.58
+ *    0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59
+ *    0.9: only call bios if bios is present, Linux 1.3.72
+ *    1.0: use fixed device number, consolidate /proc/apm into this file,
+ *         Linux 1.3.85
+ *    1.1: support user-space standby and suspend, power off after system
+ *         halted, Linux 1.3.98
+ *    1.2: When resetting RTC after resume, take care so that the time
+ *         is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth
+ *         <jtoth@princeton.edu>); improve interaction between
+ *         screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4
+ *    1.2a:Simple change to stop mysterious bug reports with SMP also added
+ *	   levels to the printk calls. APM is not defined for SMP machines.
+ *         The new replacment for it is, but Linux doesn't yet support this.
+ *         Alan Cox Linux 2.1.55
+ *    1.3: Set up a valid data descriptor 0x40 for buggy BIOS's
+ *    1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by
+ *         Dean Gaudet <dgaudet@arctic.org>.
+ *         C. Scott Ananian <cananian@alumni.princeton.edu> Linux 2.1.87
+ *    1.5: Fix segment register reloading (in case of bad segments saved
+ *         across BIOS call).
+ *         Stephen Rothwell
+ *    1.6: Cope with complier/assembler differences.
+ *         Only try to turn off the first display device.
+ *         Fix OOPS at power off with no APM BIOS by Jan Echternach
+ *                   <echter@informatik.uni-rostock.de>
+ *         Stephen Rothwell
+ *    1.7: Modify driver's cached copy of the disabled/disengaged flags
+ *         to reflect current state of APM BIOS.
+ *         Chris Rankin <rankinc@bellsouth.net>
+ *         Reset interrupt 0 timer to 100Hz after suspend
+ *         Chad Miller <cmiller@surfsouth.com>
+ *         Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE
+ *         Richard Gooch <rgooch@atnf.csiro.au>
+ *         Allow boot time disabling of APM
+ *         Make boot messages far less verbose by default
+ *         Make asm safer
+ *         Stephen Rothwell
+ *    1.8: Add CONFIG_APM_RTC_IS_GMT
+ *         Richard Gooch <rgooch@atnf.csiro.au>
+ *         change APM_NOINTS to CONFIG_APM_ALLOW_INTS
+ *         remove dependency on CONFIG_PROC_FS
+ *         Stephen Rothwell
+ *    1.9: Fix small typo.  <laslo@wodip.opole.pl>
+ *         Try to cope with BIOS's that need to have all display
+ *         devices blanked and not just the first one.
+ *         Ross Paterson <ross@soi.city.ac.uk>
+ *         Fix segment limit setting it has always been wrong as
+ *         the segments needed to have byte granularity.
+ *         Mark a few things __init.
+ *         Add hack to allow power off of SMP systems by popular request.
+ *         Use CONFIG_SMP instead of __SMP__
+ *         Ignore BOUNCES for three seconds.
+ *         Stephen Rothwell
+ *   1.10: Fix for Thinkpad return code.
+ *         Merge 2.2 and 2.3 drivers.
+ *         Remove APM dependencies in arch/i386/kernel/process.c
+ *         Remove APM dependencies in drivers/char/sysrq.c
+ *         Reset time across standby.
+ *         Allow more inititialisation on SMP.
+ *         Remove CONFIG_APM_POWER_OFF and make it boot time
+ *         configurable (default on).
+ *         Make debug only a boot time parameter (remove APM_DEBUG).
+ *         Try to blank all devices on any error.
+ *   1.11: Remove APM dependencies in drivers/char/console.c
+ *         Check nr_running to detect if we are idle (from
+ *         Borislav Deianov <borislav@lix.polytechnique.fr>)
+ *         Fix for bioses that don't zero the top part of the
+ *         entrypoint offset (Mario Sitta <sitta@al.unipmn.it>)
+ *         (reported by Panos Katsaloulis <teras@writeme.com>).
+ *         Real mode power off patch (Walter Hofmann
+ *         <Walter.Hofmann@physik.stud.uni-erlangen.de>).
+ *   1.12: Remove CONFIG_SMP as the compiler will optimize
+ *         the code away anyway (smp_num_cpus == 1 in UP)
+ *         noted by Artur Skawina <skawina@geocities.com>.
+ *         Make power off under SMP work again.
+ *         Fix thinko with initial engaging of BIOS.
+ *         Make sure power off only happens on CPU 0
+ *         (Paul "Rusty" Russell <rusty@rustcorp.com.au>).
+ *         Do error notification to user mode if BIOS calls fail.
+ *         Move entrypoint offset fix to ...boot/setup.S
+ *         where it belongs (Cosmos <gis88564@cis.nctu.edu.tw>).
+ *         Remove smp-power-off. SMP users must now specify
+ *         "apm=power-off" on the kernel command line. Suggested
+ *         by Jim Avera <jima@hal.com>, modified by Alan Cox
+ *         <alan@lxorguk.ukuu.org.uk>.
+ *         Register the /proc/apm entry even on SMP so that
+ *         scripts that check for it before doing power off
+ *         work (Jim Avera <jima@hal.com>).
+ *   1.13: Changes for new pm_ interfaces (Andy Henroid
+ *         <andy_henroid@yahoo.com>).
+ *         Modularize the code.
+ *         Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS
+ *         is now the way life works).
+ *         Fix thinko in suspend() (wrong return).
+ *         Notify drivers on critical suspend.
+ *         Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz>
+ *         modified by sfr).
+ *         Disable interrupts while we are suspended (Andy Henroid
+ *         <andy_henroid@yahoo.com> fixed by sfr).
+ *         Make power off work on SMP again (Tony Hoyle
+ *         <tmh@magenta-logic.com> and <zlatko@iskon.hr>) modified by sfr.
+ *         Remove CONFIG_APM_SUSPEND_BOUNCE.  The bounce ignore
+ *         interval is now configurable.
+ *   1.14: Make connection version persist across module unload/load.
+ *         Enable and engage power management earlier.
+ *         Disengage power management on module unload.
+ *         Changed to use the sysrq-register hack for registering the
+ *         power off function called by magic sysrq based upon discussions
+ *         in irc://irc.openprojects.net/#kernelnewbies
+ *         (Crutcher Dunnavant <crutcher+kernel@datastacks.com>).
+ *         Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable.
+ *         (Arjan van de Ven <arjanv@redhat.com>) modified by sfr.
+ *         Work around byte swap bug in one of the Vaio's BIOS's
+ *         (Marc Boucher <marc@mbsi.ca>).
+ *         Exposed the disable flag to dmi so that we can handle known
+ *         broken APM (Alan Cox <alan@redhat.com>).
+ *   1.14ac: If the BIOS says "I slowed the CPU down" then don't spin
+ *         calling it - instead idle. (Alan Cox <alan@redhat.com>)
+ *         If an APM idle fails log it and idle sensibly
+ *   1.15: Don't queue events to clients who open the device O_WRONLY.
+ *         Don't expect replies from clients who open the device O_RDONLY.
+ *         (Idea from Thomas Hood)
+ *         Minor waitqueue cleanups. (John Fremlin <chief@bandits.org>)
+ *   1.16: Fix idle calling. (Andreas Steinmetz <ast@domdv.de> et al.)
+ *         Notify listeners of standby or suspend events before notifying
+ *         drivers. Return EBUSY to ioctl() if suspend is rejected.
+ *         (Russell King <rmk@arm.linux.org.uk> and Thomas Hood)
+ *         Ignore first resume after we generate our own resume event
+ *         after a suspend (Thomas Hood)
+ *         Daemonize now gets rid of our controlling terminal (sfr).
+ *         CONFIG_APM_CPU_IDLE now just affects the default value of
+ *         idle_threshold (sfr).
+ *         Change name of kernel apm daemon (as it no longer idles) (sfr).
+ *   1.16ac: Fix up SMP support somewhat. You can now force SMP on and we
+ *	   make _all_ APM calls on the CPU#0. Fix unsafe sign bug.
+ *	   TODO: determine if its "boot CPU" or "CPU0" we want to lock to.
+ *
+ * APM 1.1 Reference:
+ *
+ *   Intel Corporation, Microsoft Corporation. Advanced Power Management
+ *   (APM) BIOS Interface Specification, Revision 1.1, September 1993.
+ *   Intel Order Number 241704-001.  Microsoft Part Number 781-110-X01.
+ *
+ * [This document is available free from Intel by calling 800.628.8686 (fax
+ * 916.356.6100) or 800.548.4725; or via anonymous ftp from
+ * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc.  It is also
+ * available from Microsoft by calling 206.882.8080.]
+ *
+ * APM 1.2 Reference:
+ *   Intel Corporation, Microsoft Corporation. Advanced Power Management
+ *   (APM) BIOS Interface Specification, Revision 1.2, February 1996.
+ *
+ * [This document is available from Microsoft at:
+ *    http://www.microsoft.com/hwdev/busbios/amp_12.htm]
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/poll.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/timer.h>
+#include <linux/fcntl.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/miscdevice.h>
+#include <linux/apm_bios.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <linux/pm.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/dmi.h>
+#include <linux/suspend.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/desc.h>
+
+#include "io_ports.h"
+
+extern spinlock_t i8253_lock;
+extern unsigned long get_cmos_time(void);
+extern void machine_real_restart(unsigned char *, int);
+
+#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
+extern int (*console_blank_hook)(int);
+#endif
+
+/*
+ * The apm_bios device is one of the misc char devices.
+ * This is its minor number.
+ */
+#define	APM_MINOR_DEV	134
+
+/*
+ * See Documentation/Config.help for the configuration options.
+ *
+ * Various options can be changed at boot time as follows:
+ * (We allow underscores for compatibility with the modules code)
+ *	apm=on/off			enable/disable APM
+ *	    [no-]allow[-_]ints		allow interrupts during BIOS calls
+ *	    [no-]broken[-_]psr		BIOS has a broken GetPowerStatus call
+ *	    [no-]realmode[-_]power[-_]off	switch to real mode before
+ *	    					powering off
+ *	    [no-]debug			log some debugging messages
+ *	    [no-]power[-_]off		power off on shutdown
+ *	    [no-]smp			Use apm even on an SMP box
+ *	    bounce[-_]interval=<n>	number of ticks to ignore suspend
+ *	    				bounces
+ *          idle[-_]threshold=<n>       System idle percentage above which to
+ *                                      make APM BIOS idle calls. Set it to
+ *                                      100 to disable.
+ *          idle[-_]period=<n>          Period (in 1/100s of a second) over
+ *                                      which the idle percentage is
+ *                                      calculated.
+ */
+
+/* KNOWN PROBLEM MACHINES:
+ *
+ * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant
+ *                         [Confirmed by TI representative]
+ * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification
+ *                    [Confirmed by BIOS disassembly]
+ *                    [This may work now ...]
+ * P: Toshiba 1950S: battery life information only gets updated after resume
+ * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking
+ * 	broken in BIOS [Reported by Garst R. Reese <reese@isn.net>]
+ * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP
+ * 	Neale Banks <neale@lowendale.com.au> December 2000
+ *
+ * Legend: U = unusable with APM patches
+ *         P = partially usable with APM patches
+ */
+
+/*
+ * Define as 1 to make the driver always call the APM BIOS busy
+ * routine even if the clock was not reported as slowed by the
+ * idle routine.  Otherwise, define as 0.
+ */
+#define ALWAYS_CALL_BUSY   1
+
+/*
+ * Define to make the APM BIOS calls zero all data segment registers (so
+ * that an incorrect BIOS implementation will cause a kernel panic if it
+ * tries to write to arbitrary memory).
+ */
+#define APM_ZERO_SEGS
+
+#include "apm.h"
+
+/*
+ * Define to make all _set_limit calls use 64k limits.  The APM 1.1 BIOS is
+ * supposed to provide limit information that it recognizes.  Many machines
+ * do this correctly, but many others do not restrict themselves to their
+ * claimed limit.  When this happens, they will cause a segmentation
+ * violation in the kernel at boot time.  Most BIOS's, however, will
+ * respect a 64k limit, so we use that.  If you want to be pedantic and
+ * hold your BIOS to its claims, then undefine this.
+ */
+#define APM_RELAX_SEGMENTS
+
+/*
+ * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
+ * This patched by Chad Miller <cmiller@surfsouth.com>, original code by
+ * David Chen <chen@ctpa04.mit.edu>
+ */
+#undef INIT_TIMER_AFTER_SUSPEND
+
+#ifdef INIT_TIMER_AFTER_SUSPEND
+#include <linux/timex.h>
+#include <asm/io.h>
+#include <linux/delay.h>
+#endif
+
+/*
+ * Need to poll the APM BIOS every second
+ */
+#define APM_CHECK_TIMEOUT	(HZ)
+
+/*
+ * Ignore suspend events for this amount of time after a resume
+ */
+#define DEFAULT_BOUNCE_INTERVAL		(3 * HZ)
+
+/*
+ * Maximum number of events stored
+ */
+#define APM_MAX_EVENTS		20
+
+/*
+ * The per-file APM data
+ */
+struct apm_user {
+	int		magic;
+	struct apm_user *	next;
+	int		suser: 1;
+	int		writer: 1;
+	int		reader: 1;
+	int		suspend_wait: 1;
+	int		suspend_result;
+	int		suspends_pending;
+	int		standbys_pending;
+	int		suspends_read;
+	int		standbys_read;
+	int		event_head;
+	int		event_tail;
+	apm_event_t	events[APM_MAX_EVENTS];
+};
+
+/*
+ * The magic number in apm_user
+ */
+#define APM_BIOS_MAGIC		0x4101
+
+/*
+ * idle percentage above which bios idle calls are done
+ */
+#ifdef CONFIG_APM_CPU_IDLE
+#define DEFAULT_IDLE_THRESHOLD	95
+#else
+#define DEFAULT_IDLE_THRESHOLD	100
+#endif
+#define DEFAULT_IDLE_PERIOD	(100 / 3)
+
+/*
+ * Local variables
+ */
+static struct {
+	unsigned long	offset;
+	unsigned short	segment;
+}				apm_bios_entry;
+static int			clock_slowed;
+static int			idle_threshold = DEFAULT_IDLE_THRESHOLD;
+static int			idle_period = DEFAULT_IDLE_PERIOD;
+static int			set_pm_idle;
+static int			suspends_pending;
+static int			standbys_pending;
+static int			ignore_sys_suspend;
+static int			ignore_normal_resume;
+static int			bounce_interval = DEFAULT_BOUNCE_INTERVAL;
+
+#ifdef CONFIG_APM_RTC_IS_GMT
+#	define	clock_cmos_diff	0
+#	define	got_clock_diff	1
+#else
+static long			clock_cmos_diff;
+static int			got_clock_diff;
+#endif
+static int			debug;
+static int			smp;
+static int			apm_disabled = -1;
+#ifdef CONFIG_SMP
+static int			power_off;
+#else
+static int			power_off = 1;
+#endif
+#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
+static int			realmode_power_off = 1;
+#else
+static int			realmode_power_off;
+#endif
+static int			exit_kapmd;
+static int			kapmd_running;
+#ifdef CONFIG_APM_ALLOW_INTS
+static int			allow_ints = 1;
+#else
+static int			allow_ints;
+#endif
+static int			broken_psr;
+
+static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
+static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
+static struct apm_user *	user_list;
+static DEFINE_SPINLOCK(user_list_lock);
+static struct desc_struct	bad_bios_desc = { 0, 0x00409200 };
+
+static char			driver_version[] = "1.16ac";	/* no spaces */
+
+/*
+ *	APM event names taken from the APM 1.2 specification. These are
+ *	the message codes that the BIOS uses to tell us about events
+ */
+static char *	apm_event_name[] = {
+	"system standby",
+	"system suspend",
+	"normal resume",
+	"critical resume",
+	"low battery",
+	"power status change",
+	"update time",
+	"critical suspend",
+	"user standby",
+	"user suspend",
+	"system standby resume",
+	"capabilities change"
+};
+#define NR_APM_EVENT_NAME	\
+		(sizeof(apm_event_name) / sizeof(apm_event_name[0]))
+
+typedef struct lookup_t {
+	int	key;
+	char *	msg;
+} lookup_t;
+
+/*
+ *	The BIOS returns a set of standard error codes in AX when the
+ *	carry flag is set.
+ */
+ 
+static const lookup_t error_table[] = {
+/* N/A	{ APM_SUCCESS,		"Operation succeeded" }, */
+	{ APM_DISABLED,		"Power management disabled" },
+	{ APM_CONNECTED,	"Real mode interface already connected" },
+	{ APM_NOT_CONNECTED,	"Interface not connected" },
+	{ APM_16_CONNECTED,	"16 bit interface already connected" },
+/* N/A	{ APM_16_UNSUPPORTED,	"16 bit interface not supported" }, */
+	{ APM_32_CONNECTED,	"32 bit interface already connected" },
+	{ APM_32_UNSUPPORTED,	"32 bit interface not supported" },
+	{ APM_BAD_DEVICE,	"Unrecognized device ID" },
+	{ APM_BAD_PARAM,	"Parameter out of range" },
+	{ APM_NOT_ENGAGED,	"Interface not engaged" },
+	{ APM_BAD_FUNCTION,     "Function not supported" },
+	{ APM_RESUME_DISABLED,	"Resume timer disabled" },
+	{ APM_BAD_STATE,	"Unable to enter requested state" },
+/* N/A	{ APM_NO_EVENTS,	"No events pending" }, */
+	{ APM_NO_ERROR,		"BIOS did not set a return code" },
+	{ APM_NOT_PRESENT,	"No APM present" }
+};
+#define ERROR_COUNT	(sizeof(error_table)/sizeof(lookup_t))
+
+/**
+ *	apm_error	-	display an APM error
+ *	@str: information string
+ *	@err: APM BIOS return code
+ *
+ *	Write a meaningful log entry to the kernel log in the event of
+ *	an APM error.
+ */
+ 
+static void apm_error(char *str, int err)
+{
+	int	i;
+
+	for (i = 0; i < ERROR_COUNT; i++)
+		if (error_table[i].key == err) break;
+	if (i < ERROR_COUNT)
+		printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
+	else
+		printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
+			str, err);
+}
+
+/*
+ * Lock APM functionality to physical CPU 0
+ */
+ 
+#ifdef CONFIG_SMP
+
+static cpumask_t apm_save_cpus(void)
+{
+	cpumask_t x = current->cpus_allowed;
+	/* Some bioses don't like being called from CPU != 0 */
+	set_cpus_allowed(current, cpumask_of_cpu(0));
+	BUG_ON(smp_processor_id() != 0);
+	return x;
+}
+
+static inline void apm_restore_cpus(cpumask_t mask)
+{
+	set_cpus_allowed(current, mask);
+}
+
+#else
+
+/*
+ *	No CPU lockdown needed on a uniprocessor
+ */
+ 
+#define apm_save_cpus()		(current->cpus_allowed)
+#define apm_restore_cpus(x)	(void)(x)
+
+#endif
+
+/*
+ * These are the actual BIOS calls.  Depending on APM_ZERO_SEGS and
+ * apm_info.allow_ints, we are being really paranoid here!  Not only
+ * are interrupts disabled, but all the segment registers (except SS)
+ * are saved and zeroed this means that if the BIOS tries to reference
+ * any data without explicitly loading the segment registers, the kernel
+ * will fault immediately rather than have some unforeseen circumstances
+ * for the rest of the kernel.  And it will be very obvious!  :-) Doing
+ * this depends on CS referring to the same physical memory as DS so that
+ * DS can be zeroed before the call. Unfortunately, we can't do anything
+ * about the stack segment/pointer.  Also, we tell the compiler that
+ * everything could change.
+ *
+ * Also, we KNOW that for the non error case of apm_bios_call, there
+ * is no useful data returned in the low order 8 bits of eax.
+ */
+#define APM_DO_CLI	\
+	if (apm_info.allow_ints) \
+		local_irq_enable(); \
+	else \
+		local_irq_disable();
+
+#ifdef APM_ZERO_SEGS
+#	define APM_DECL_SEGS \
+		unsigned int saved_fs; unsigned int saved_gs;
+#	define APM_DO_SAVE_SEGS \
+		savesegment(fs, saved_fs); savesegment(gs, saved_gs)
+#	define APM_DO_RESTORE_SEGS \
+		loadsegment(fs, saved_fs); loadsegment(gs, saved_gs)
+#else
+#	define APM_DECL_SEGS
+#	define APM_DO_SAVE_SEGS
+#	define APM_DO_RESTORE_SEGS
+#endif
+
+/**
+ *	apm_bios_call	-	Make an APM BIOS 32bit call
+ *	@func: APM function to execute
+ *	@ebx_in: EBX register for call entry
+ *	@ecx_in: ECX register for call entry
+ *	@eax: EAX register return
+ *	@ebx: EBX register return
+ *	@ecx: ECX register return
+ *	@edx: EDX register return
+ *	@esi: ESI register return
+ *
+ *	Make an APM call using the 32bit protected mode interface. The
+ *	caller is responsible for knowing if APM BIOS is configured and
+ *	enabled. This call can disable interrupts for a long period of
+ *	time on some laptops.  The return value is in AH and the carry
+ *	flag is loaded into AL.  If there is an error, then the error
+ *	code is returned in AH (bits 8-15 of eax) and this function
+ *	returns non-zero.
+ */
+ 
+static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
+	u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
+{
+	APM_DECL_SEGS
+	unsigned long		flags;
+	cpumask_t		cpus;
+	int			cpu;
+	struct desc_struct	save_desc_40;
+
+	cpus = apm_save_cpus();
+	
+	cpu = get_cpu();
+	save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8];
+	per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc;
+
+	local_save_flags(flags);
+	APM_DO_CLI;
+	APM_DO_SAVE_SEGS;
+	apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
+	APM_DO_RESTORE_SEGS;
+	local_irq_restore(flags);
+	per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40;
+	put_cpu();
+	apm_restore_cpus(cpus);
+	
+	return *eax & 0xff;
+}
+
+/**
+ *	apm_bios_call_simple	-	make a simple APM BIOS 32bit call
+ *	@func: APM function to invoke
+ *	@ebx_in: EBX register value for BIOS call
+ *	@ecx_in: ECX register value for BIOS call
+ *	@eax: EAX register on return from the BIOS call
+ *
+ *	Make a BIOS call that does only returns one value, or just status.
+ *	If there is an error, then the error code is returned in AH
+ *	(bits 8-15 of eax) and this function returns non-zero. This is
+ *	used for simpler BIOS operations. This call may hold interrupts
+ *	off for a long time on some laptops.
+ */
+
+static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
+{
+	u8			error;
+	APM_DECL_SEGS
+	unsigned long		flags;
+	cpumask_t		cpus;
+	int			cpu;
+	struct desc_struct	save_desc_40;
+
+
+	cpus = apm_save_cpus();
+	
+	cpu = get_cpu();
+	save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8];
+	per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc;
+
+	local_save_flags(flags);
+	APM_DO_CLI;
+	APM_DO_SAVE_SEGS;
+	error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
+	APM_DO_RESTORE_SEGS;
+	local_irq_restore(flags);
+	__get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40;
+	put_cpu();
+	apm_restore_cpus(cpus);
+	return error;
+}
+
+/**
+ *	apm_driver_version	-	APM driver version
+ *	@val:	loaded with the APM version on return
+ *
+ *	Retrieve the APM version supported by the BIOS. This is only
+ *	supported for APM 1.1 or higher. An error indicates APM 1.0 is
+ *	probably present.
+ *
+ *	On entry val should point to a value indicating the APM driver
+ *	version with the high byte being the major and the low byte the
+ *	minor number both in BCD
+ *
+ *	On return it will hold the BIOS revision supported in the
+ *	same format.
+ */
+
+static int apm_driver_version(u_short *val)
+{
+	u32	eax;
+
+	if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
+		return (eax >> 8) & 0xff;
+	*val = eax;
+	return APM_SUCCESS;
+}
+
+/**
+ *	apm_get_event	-	get an APM event from the BIOS
+ *	@event: pointer to the event
+ *	@info: point to the event information
+ *
+ *	The APM BIOS provides a polled information for event
+ *	reporting. The BIOS expects to be polled at least every second
+ *	when events are pending. When a message is found the caller should
+ *	poll until no more messages are present.  However, this causes
+ *	problems on some laptops where a suspend event notification is
+ *	not cleared until it is acknowledged.
+ *
+ *	Additional information is returned in the info pointer, providing
+ *	that APM 1.2 is in use. If no messges are pending the value 0x80
+ *	is returned (No power management events pending).
+ */
+ 
+static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
+{
+	u32	eax;
+	u32	ebx;
+	u32	ecx;
+	u32	dummy;
+
+	if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
+			&dummy, &dummy))
+		return (eax >> 8) & 0xff;
+	*event = ebx;
+	if (apm_info.connection_version < 0x0102)
+		*info = ~0; /* indicate info not valid */
+	else
+		*info = ecx;
+	return APM_SUCCESS;
+}
+
+/**
+ *	set_power_state	-	set the power management state
+ *	@what: which items to transition
+ *	@state: state to transition to
+ *
+ *	Request an APM change of state for one or more system devices. The
+ *	processor state must be transitioned last of all. what holds the
+ *	class of device in the upper byte and the device number (0xFF for
+ *	all) for the object to be transitioned.
+ *
+ *	The state holds the state to transition to, which may in fact
+ *	be an acceptance of a BIOS requested state change.
+ */
+ 
+static int set_power_state(u_short what, u_short state)
+{
+	u32	eax;
+
+	if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
+		return (eax >> 8) & 0xff;
+	return APM_SUCCESS;
+}
+
+/**
+ *	set_system_power_state - set system wide power state
+ *	@state: which state to enter
+ *
+ *	Transition the entire system into a new APM power state.
+ */
+ 
+static int set_system_power_state(u_short state)
+{
+	return set_power_state(APM_DEVICE_ALL, state);
+}
+
+/**
+ *	apm_do_idle	-	perform power saving
+ *
+ *	This function notifies the BIOS that the processor is (in the view
+ *	of the OS) idle. It returns -1 in the event that the BIOS refuses
+ *	to handle the idle request. On a success the function returns 1
+ *	if the BIOS did clock slowing or 0 otherwise.
+ */
+ 
+static int apm_do_idle(void)
+{
+	u32	eax;
+
+	if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) {
+		static unsigned long t;
+
+		/* This always fails on some SMP boards running UP kernels.
+		 * Only report the failure the first 5 times.
+		 */
+		if (++t < 5)
+		{
+			printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
+					(eax >> 8) & 0xff);
+			t = jiffies;
+		}
+		return -1;
+	}
+	clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0;
+	return clock_slowed;
+}
+
+/**
+ *	apm_do_busy	-	inform the BIOS the CPU is busy
+ *
+ *	Request that the BIOS brings the CPU back to full performance. 
+ */
+ 
+static void apm_do_busy(void)
+{
+	u32	dummy;
+
+	if (clock_slowed || ALWAYS_CALL_BUSY) {
+		(void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
+		clock_slowed = 0;
+	}
+}
+
+/*
+ * If no process has really been interested in
+ * the CPU for some time, we want to call BIOS
+ * power management - we probably want
+ * to conserve power.
+ */
+#define IDLE_CALC_LIMIT   (HZ * 100)
+#define IDLE_LEAKY_MAX    16
+
+static void (*original_pm_idle)(void);
+
+extern void default_idle(void);
+
+/**
+ * apm_cpu_idle		-	cpu idling for APM capable Linux
+ *
+ * This is the idling function the kernel executes when APM is available. It 
+ * tries to do BIOS powermanagement based on the average system idle time.
+ * Furthermore it calls the system default idle routine.
+ */
+
+static void apm_cpu_idle(void)
+{
+	static int use_apm_idle; /* = 0 */
+	static unsigned int last_jiffies; /* = 0 */
+	static unsigned int last_stime; /* = 0 */
+
+	int apm_idle_done = 0;
+	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
+	unsigned int bucket;
+
+recalc:
+	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
+		use_apm_idle = 0;
+		last_jiffies = jiffies;
+		last_stime = current->stime;
+	} else if (jiffies_since_last_check > idle_period) {
+		unsigned int idle_percentage;
+
+		idle_percentage = current->stime - last_stime;
+		idle_percentage *= 100;
+		idle_percentage /= jiffies_since_last_check;
+		use_apm_idle = (idle_percentage > idle_threshold);
+		if (apm_info.forbid_idle)
+			use_apm_idle = 0;
+		last_jiffies = jiffies;
+		last_stime = current->stime;
+	}
+
+	bucket = IDLE_LEAKY_MAX;
+
+	while (!need_resched()) {
+		if (use_apm_idle) {
+			unsigned int t;
+
+			t = jiffies;
+			switch (apm_do_idle()) {
+			case 0: apm_idle_done = 1;
+				if (t != jiffies) {
+					if (bucket) {
+						bucket = IDLE_LEAKY_MAX;
+						continue;
+					}
+				} else if (bucket) {
+					bucket--;
+					continue;
+				}
+				break;
+			case 1: apm_idle_done = 1;
+				break;
+			default: /* BIOS refused */
+				break;
+			}
+		}
+		if (original_pm_idle)
+			original_pm_idle();
+		else
+			default_idle();
+		jiffies_since_last_check = jiffies - last_jiffies;
+		if (jiffies_since_last_check > idle_period)
+			goto recalc;
+	}
+
+	if (apm_idle_done)
+		apm_do_busy();
+}
+
+/**
+ *	apm_power_off	-	ask the BIOS to power off
+ *
+ *	Handle the power off sequence. This is the one piece of code we
+ *	will execute even on SMP machines. In order to deal with BIOS
+ *	bugs we support real mode APM BIOS power off calls. We also make
+ *	the SMP call on CPU0 as some systems will only honour this call
+ *	on their first cpu.
+ */
+ 
+static void apm_power_off(void)
+{
+	unsigned char	po_bios_call[] = {
+		0xb8, 0x00, 0x10,	/* movw  $0x1000,ax  */
+		0x8e, 0xd0,		/* movw  ax,ss       */
+		0xbc, 0x00, 0xf0,	/* movw  $0xf000,sp  */
+		0xb8, 0x07, 0x53,	/* movw  $0x5307,ax  */
+		0xbb, 0x01, 0x00,	/* movw  $0x0001,bx  */
+		0xb9, 0x03, 0x00,	/* movw  $0x0003,cx  */
+		0xcd, 0x15		/* int   $0x15       */
+	};
+
+	/*
+	 * This may be called on an SMP machine.
+	 */
+#ifdef CONFIG_SMP
+	/* Some bioses don't like being called from CPU != 0 */
+	set_cpus_allowed(current, cpumask_of_cpu(0));
+	BUG_ON(smp_processor_id() != 0);
+#endif
+	if (apm_info.realmode_power_off)
+	{
+		(void)apm_save_cpus();
+		machine_real_restart(po_bios_call, sizeof(po_bios_call));
+	}
+	else
+		(void) set_system_power_state(APM_STATE_OFF);
+}
+
+#ifdef CONFIG_APM_DO_ENABLE
+
+/**
+ *	apm_enable_power_management - enable BIOS APM power management
+ *	@enable: enable yes/no
+ *
+ *	Enable or disable the APM BIOS power services. 
+ */
+ 
+static int apm_enable_power_management(int enable)
+{
+	u32	eax;
+
+	if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
+		return APM_NOT_ENGAGED;
+	if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
+			enable, &eax))
+		return (eax >> 8) & 0xff;
+	if (enable)
+		apm_info.bios.flags &= ~APM_BIOS_DISABLED;
+	else
+		apm_info.bios.flags |= APM_BIOS_DISABLED;
+	return APM_SUCCESS;
+}
+#endif
+
+/**
+ *	apm_get_power_status	-	get current power state
+ *	@status: returned status
+ *	@bat: battery info
+ *	@life: estimated life
+ *
+ *	Obtain the current power status from the APM BIOS. We return a
+ *	status which gives the rough battery status, and current power
+ *	source. The bat value returned give an estimate as a percentage
+ *	of life and a status value for the battery. The estimated life
+ *	if reported is a lifetime in secodnds/minutes at current powwer
+ *	consumption.
+ */
+ 
+static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
+{
+	u32	eax;
+	u32	ebx;
+	u32	ecx;
+	u32	edx;
+	u32	dummy;
+
+	if (apm_info.get_power_status_broken)
+		return APM_32_UNSUPPORTED;
+	if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
+			&eax, &ebx, &ecx, &edx, &dummy))
+		return (eax >> 8) & 0xff;
+	*status = ebx;
+	*bat = ecx;
+	if (apm_info.get_power_status_swabinminutes) {
+		*life = swab16((u16)edx);
+		*life |= 0x8000;
+	} else
+		*life = edx;
+	return APM_SUCCESS;
+}
+
+#if 0
+static int apm_get_battery_status(u_short which, u_short *status,
+				  u_short *bat, u_short *life, u_short *nbat)
+{
+	u32	eax;
+	u32	ebx;
+	u32	ecx;
+	u32	edx;
+	u32	esi;
+
+	if (apm_info.connection_version < 0x0102) {
+		/* pretend we only have one battery. */
+		if (which != 1)
+			return APM_BAD_DEVICE;
+		*nbat = 1;
+		return apm_get_power_status(status, bat, life);
+	}
+
+	if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
+			&ebx, &ecx, &edx, &esi))
+		return (eax >> 8) & 0xff;
+	*status = ebx;
+	*bat = ecx;
+	*life = edx;
+	*nbat = esi;
+	return APM_SUCCESS;
+}
+#endif
+
+/**
+ *	apm_engage_power_management	-	enable PM on a device
+ *	@device: identity of device
+ *	@enable: on/off
+ *
+ *	Activate or deactive power management on either a specific device
+ *	or the entire system (%APM_DEVICE_ALL).
+ */
+ 
+static int apm_engage_power_management(u_short device, int enable)
+{
+	u32	eax;
+
+	if ((enable == 0) && (device == APM_DEVICE_ALL)
+	    && (apm_info.bios.flags & APM_BIOS_DISABLED))
+		return APM_DISABLED;
+	if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax))
+		return (eax >> 8) & 0xff;
+	if (device == APM_DEVICE_ALL) {
+		if (enable)
+			apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
+		else
+			apm_info.bios.flags |= APM_BIOS_DISENGAGED;
+	}
+	return APM_SUCCESS;
+}
+
+#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
+
+/**
+ *	apm_console_blank	-	blank the display
+ *	@blank: on/off
+ *
+ *	Attempt to blank the console, firstly by blanking just video device
+ *	zero, and if that fails (some BIOSes don't support it) then it blanks
+ *	all video devices. Typically the BIOS will do laptop backlight and
+ *	monitor powerdown for us.
+ */
+ 
+static int apm_console_blank(int blank)
+{
+	int	error;
+	u_short	state;
+
+	state = blank ? APM_STATE_STANDBY : APM_STATE_READY;
+	/* Blank the first display device */
+	error = set_power_state(0x100, state);
+	if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) {
+		/* try to blank them all instead */
+		error = set_power_state(0x1ff, state);
+		if ((error != APM_SUCCESS) && (error != APM_NO_ERROR))
+			/* try to blank device one instead */
+			error = set_power_state(0x101, state);
+	}
+	if ((error == APM_SUCCESS) || (error == APM_NO_ERROR))
+		return 1;
+	if (error == APM_NOT_ENGAGED) {
+		static int tried;
+		int eng_error;
+		if (tried++ == 0) {
+			eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1);
+			if (eng_error) {
+				apm_error("set display", error);
+				apm_error("engage interface", eng_error);
+				return 0;
+			} else
+				return apm_console_blank(blank);
+		}
+	}
+	apm_error("set display", error);
+	return 0;
+}
+#endif
+
+static int queue_empty(struct apm_user *as)
+{
+	return as->event_head == as->event_tail;
+}
+
+static apm_event_t get_queued_event(struct apm_user *as)
+{
+	as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
+	return as->events[as->event_tail];
+}
+
+static void queue_event(apm_event_t event, struct apm_user *sender)
+{
+	struct apm_user *	as;
+
+	spin_lock(&user_list_lock);
+	if (user_list == NULL)
+		goto out;
+	for (as = user_list; as != NULL; as = as->next) {
+		if ((as == sender) || (!as->reader))
+			continue;
+		as->event_head = (as->event_head + 1) % APM_MAX_EVENTS;
+		if (as->event_head == as->event_tail) {
+			static int notified;
+
+			if (notified++ == 0)
+			    printk(KERN_ERR "apm: an event queue overflowed\n");
+			as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
+		}
+		as->events[as->event_head] = event;
+		if ((!as->suser) || (!as->writer))
+			continue;
+		switch (event) {
+		case APM_SYS_SUSPEND:
+		case APM_USER_SUSPEND:
+			as->suspends_pending++;
+			suspends_pending++;
+			break;
+
+		case APM_SYS_STANDBY:
+		case APM_USER_STANDBY:
+			as->standbys_pending++;
+			standbys_pending++;
+			break;
+		}
+	}
+	wake_up_interruptible(&apm_waitqueue);
+out:
+	spin_unlock(&user_list_lock);
+}
+
+static void set_time(void)
+{
+	if (got_clock_diff) {	/* Must know time zone in order to set clock */
+		xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
+		xtime.tv_nsec = 0; 
+	} 
+}
+
+static void get_time_diff(void)
+{
+#ifndef CONFIG_APM_RTC_IS_GMT
+	/*
+	 * Estimate time zone so that set_time can update the clock
+	 */
+	clock_cmos_diff = -get_cmos_time();
+	clock_cmos_diff += get_seconds();
+	got_clock_diff = 1;
+#endif
+}
+
+static void reinit_timer(void)
+{
+#ifdef INIT_TIMER_AFTER_SUSPEND
+	unsigned long	flags;
+	extern spinlock_t i8253_lock;
+
+	spin_lock_irqsave(&i8253_lock, flags);
+	/* set the clock to 100 Hz */
+	outb_p(0x34, PIT_MODE);		/* binary, mode 2, LSB/MSB, ch 0 */
+	udelay(10);
+	outb_p(LATCH & 0xff, PIT_CH0);	/* LSB */
+	udelay(10);
+	outb(LATCH >> 8, PIT_CH0);	/* MSB */
+	udelay(10);
+	spin_unlock_irqrestore(&i8253_lock, flags);
+#endif
+}
+
+static int suspend(int vetoable)
+{
+	int		err;
+	struct apm_user	*as;
+
+	if (pm_send_all(PM_SUSPEND, (void *)3)) {
+		/* Vetoed */
+		if (vetoable) {
+			if (apm_info.connection_version > 0x100)
+				set_system_power_state(APM_STATE_REJECT);
+			err = -EBUSY;
+			ignore_sys_suspend = 0;
+			printk(KERN_WARNING "apm: suspend was vetoed.\n");
+			goto out;
+		}
+		printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n");
+	}
+
+	device_suspend(PMSG_SUSPEND);
+	local_irq_disable();
+	device_power_down(PMSG_SUSPEND);
+
+	/* serialize with the timer interrupt */
+	write_seqlock(&xtime_lock);
+
+	/* protect against access to timer chip registers */
+	spin_lock(&i8253_lock);
+
+	get_time_diff();
+	/*
+	 * Irq spinlock must be dropped around set_system_power_state.
+	 * We'll undo any timer changes due to interrupts below.
+	 */
+	spin_unlock(&i8253_lock);
+	write_sequnlock(&xtime_lock);
+	local_irq_enable();
+
+	save_processor_state();
+	err = set_system_power_state(APM_STATE_SUSPEND);
+	restore_processor_state();
+
+	local_irq_disable();
+	write_seqlock(&xtime_lock);
+	spin_lock(&i8253_lock);
+	reinit_timer();
+	set_time();
+	ignore_normal_resume = 1;
+
+	spin_unlock(&i8253_lock);
+	write_sequnlock(&xtime_lock);
+
+	if (err == APM_NO_ERROR)
+		err = APM_SUCCESS;
+	if (err != APM_SUCCESS)
+		apm_error("suspend", err);
+	err = (err == APM_SUCCESS) ? 0 : -EIO;
+	device_power_up();
+	local_irq_enable();
+	device_resume();
+	pm_send_all(PM_RESUME, (void *)0);
+	queue_event(APM_NORMAL_RESUME, NULL);
+ out:
+	spin_lock(&user_list_lock);
+	for (as = user_list; as != NULL; as = as->next) {
+		as->suspend_wait = 0;
+		as->suspend_result = err;
+	}
+	spin_unlock(&user_list_lock);
+	wake_up_interruptible(&apm_suspend_waitqueue);
+	return err;
+}
+
+static void standby(void)
+{
+	int	err;
+
+	local_irq_disable();
+	device_power_down(PMSG_SUSPEND);
+	/* serialize with the timer interrupt */
+	write_seqlock(&xtime_lock);
+	/* If needed, notify drivers here */
+	get_time_diff();
+	write_sequnlock(&xtime_lock);
+	local_irq_enable();
+
+	err = set_system_power_state(APM_STATE_STANDBY);
+	if ((err != APM_SUCCESS) && (err != APM_NO_ERROR))
+		apm_error("standby", err);
+
+	local_irq_disable();
+	device_power_up();
+	local_irq_enable();
+}
+
+static apm_event_t get_event(void)
+{
+	int		error;
+	apm_event_t	event;
+	apm_eventinfo_t	info;
+
+	static int notified;
+
+	/* we don't use the eventinfo */
+	error = apm_get_event(&event, &info);
+	if (error == APM_SUCCESS)
+		return event;
+
+	if ((error != APM_NO_EVENTS) && (notified++ == 0))
+		apm_error("get_event", error);
+
+	return 0;
+}
+
+static void check_events(void)
+{
+	apm_event_t		event;
+	static unsigned long	last_resume;
+	static int		ignore_bounce;
+
+	while ((event = get_event()) != 0) {
+		if (debug) {
+			if (event <= NR_APM_EVENT_NAME)
+				printk(KERN_DEBUG "apm: received %s notify\n",
+				       apm_event_name[event - 1]);
+			else
+				printk(KERN_DEBUG "apm: received unknown "
+				       "event 0x%02x\n", event);
+		}
+		if (ignore_bounce
+		    && ((jiffies - last_resume) > bounce_interval))
+			ignore_bounce = 0;
+
+		switch (event) {
+		case APM_SYS_STANDBY:
+		case APM_USER_STANDBY:
+			queue_event(event, NULL);
+			if (standbys_pending <= 0)
+				standby();
+			break;
+
+		case APM_USER_SUSPEND:
+#ifdef CONFIG_APM_IGNORE_USER_SUSPEND
+			if (apm_info.connection_version > 0x100)
+				set_system_power_state(APM_STATE_REJECT);
+			break;
+#endif
+		case APM_SYS_SUSPEND:
+			if (ignore_bounce) {
+				if (apm_info.connection_version > 0x100)
+					set_system_power_state(APM_STATE_REJECT);
+				break;
+			}
+			/*
+			 * If we are already processing a SUSPEND,
+			 * then further SUSPEND events from the BIOS
+			 * will be ignored.  We also return here to
+			 * cope with the fact that the Thinkpads keep
+			 * sending a SUSPEND event until something else
+			 * happens!
+			 */
+			if (ignore_sys_suspend)
+				return;
+			ignore_sys_suspend = 1;
+			queue_event(event, NULL);
+			if (suspends_pending <= 0)
+				(void) suspend(1);
+			break;
+
+		case APM_NORMAL_RESUME:
+		case APM_CRITICAL_RESUME:
+		case APM_STANDBY_RESUME:
+			ignore_sys_suspend = 0;
+			last_resume = jiffies;
+			ignore_bounce = 1;
+			if ((event != APM_NORMAL_RESUME)
+			    || (ignore_normal_resume == 0)) {
+				write_seqlock_irq(&xtime_lock);
+				set_time();
+				write_sequnlock_irq(&xtime_lock);
+				device_resume();
+				pm_send_all(PM_RESUME, (void *)0);
+				queue_event(event, NULL);
+			}
+			ignore_normal_resume = 0;
+			break;
+
+		case APM_CAPABILITY_CHANGE:
+		case APM_LOW_BATTERY:
+		case APM_POWER_STATUS_CHANGE:
+			queue_event(event, NULL);
+			/* If needed, notify drivers here */
+			break;
+
+		case APM_UPDATE_TIME:
+			write_seqlock_irq(&xtime_lock);
+			set_time();
+			write_sequnlock_irq(&xtime_lock);
+			break;
+
+		case APM_CRITICAL_SUSPEND:
+			/*
+			 * We are not allowed to reject a critical suspend.
+			 */
+			(void) suspend(0);
+			break;
+		}
+	}
+}
+
+static void apm_event_handler(void)
+{
+	static int	pending_count = 4;
+	int		err;
+
+	if ((standbys_pending > 0) || (suspends_pending > 0)) {
+		if ((apm_info.connection_version > 0x100) &&
+				(pending_count-- <= 0)) {
+			pending_count = 4;
+			if (debug)
+				printk(KERN_DEBUG "apm: setting state busy\n");
+			err = set_system_power_state(APM_STATE_BUSY);
+			if (err)
+				apm_error("busy", err);
+		}
+	} else
+		pending_count = 4;
+	check_events();
+}
+
+/*
+ * This is the APM thread main loop.
+ */
+
+static void apm_mainloop(void)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(&apm_waitqueue, &wait);
+	set_current_state(TASK_INTERRUPTIBLE);
+	for (;;) {
+		schedule_timeout(APM_CHECK_TIMEOUT);
+		if (exit_kapmd)
+			break;
+		/*
+		 * Ok, check all events, check for idle (and mark us sleeping
+		 * so as not to count towards the load average)..
+		 */
+		set_current_state(TASK_INTERRUPTIBLE);
+		apm_event_handler();
+	}
+	remove_wait_queue(&apm_waitqueue, &wait);
+}
+
+static int check_apm_user(struct apm_user *as, const char *func)
+{
+	if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) {
+		printk(KERN_ERR "apm: %s passed bad filp\n", func);
+		return 1;
+	}
+	return 0;
+}
+
+static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct apm_user *	as;
+	int			i;
+	apm_event_t		event;
+
+	as = fp->private_data;
+	if (check_apm_user(as, "read"))
+		return -EIO;
+	if ((int)count < sizeof(apm_event_t))
+		return -EINVAL;
+	if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK))
+		return -EAGAIN;
+	wait_event_interruptible(apm_waitqueue, !queue_empty(as));
+	i = count;
+	while ((i >= sizeof(event)) && !queue_empty(as)) {
+		event = get_queued_event(as);
+		if (copy_to_user(buf, &event, sizeof(event))) {
+			if (i < count)
+				break;
+			return -EFAULT;
+		}
+		switch (event) {
+		case APM_SYS_SUSPEND:
+		case APM_USER_SUSPEND:
+			as->suspends_read++;
+			break;
+
+		case APM_SYS_STANDBY:
+		case APM_USER_STANDBY:
+			as->standbys_read++;
+			break;
+		}
+		buf += sizeof(event);
+		i -= sizeof(event);
+	}
+	if (i < count)
+		return count - i;
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	return 0;
+}
+
+static unsigned int do_poll(struct file *fp, poll_table * wait)
+{
+	struct apm_user * as;
+
+	as = fp->private_data;
+	if (check_apm_user(as, "poll"))
+		return 0;
+	poll_wait(fp, &apm_waitqueue, wait);
+	if (!queue_empty(as))
+		return POLLIN | POLLRDNORM;
+	return 0;
+}
+
+static int do_ioctl(struct inode * inode, struct file *filp,
+		    u_int cmd, u_long arg)
+{
+	struct apm_user *	as;
+
+	as = filp->private_data;
+	if (check_apm_user(as, "ioctl"))
+		return -EIO;
+	if ((!as->suser) || (!as->writer))
+		return -EPERM;
+	switch (cmd) {
+	case APM_IOC_STANDBY:
+		if (as->standbys_read > 0) {
+			as->standbys_read--;
+			as->standbys_pending--;
+			standbys_pending--;
+		} else
+			queue_event(APM_USER_STANDBY, as);
+		if (standbys_pending <= 0)
+			standby();
+		break;
+	case APM_IOC_SUSPEND:
+		if (as->suspends_read > 0) {
+			as->suspends_read--;
+			as->suspends_pending--;
+			suspends_pending--;
+		} else
+			queue_event(APM_USER_SUSPEND, as);
+		if (suspends_pending <= 0) {
+			return suspend(1);
+		} else {
+			as->suspend_wait = 1;
+			wait_event_interruptible(apm_suspend_waitqueue,
+					as->suspend_wait == 0);
+			return as->suspend_result;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int do_release(struct inode * inode, struct file * filp)
+{
+	struct apm_user *	as;
+
+	as = filp->private_data;
+	if (check_apm_user(as, "release"))
+		return 0;
+	filp->private_data = NULL;
+	if (as->standbys_pending > 0) {
+		standbys_pending -= as->standbys_pending;
+		if (standbys_pending <= 0)
+			standby();
+	}
+	if (as->suspends_pending > 0) {
+		suspends_pending -= as->suspends_pending;
+		if (suspends_pending <= 0)
+			(void) suspend(1);
+	}
+  	spin_lock(&user_list_lock);
+	if (user_list == as)
+		user_list = as->next;
+	else {
+		struct apm_user *	as1;
+
+		for (as1 = user_list;
+		     (as1 != NULL) && (as1->next != as);
+		     as1 = as1->next)
+			;
+		if (as1 == NULL)
+			printk(KERN_ERR "apm: filp not in user list\n");
+		else
+			as1->next = as->next;
+	}
+	spin_unlock(&user_list_lock);
+	kfree(as);
+	return 0;
+}
+
+static int do_open(struct inode * inode, struct file * filp)
+{
+	struct apm_user *	as;
+
+	as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL);
+	if (as == NULL) {
+		printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
+		       sizeof(*as));
+		return -ENOMEM;
+	}
+	as->magic = APM_BIOS_MAGIC;
+	as->event_tail = as->event_head = 0;
+	as->suspends_pending = as->standbys_pending = 0;
+	as->suspends_read = as->standbys_read = 0;
+	/*
+	 * XXX - this is a tiny bit broken, when we consider BSD
+         * process accounting. If the device is opened by root, we
+	 * instantly flag that we used superuser privs. Who knows,
+	 * we might close the device immediately without doing a
+	 * privileged operation -- cevans
+	 */
+	as->suser = capable(CAP_SYS_ADMIN);
+	as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE;
+	as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ;
+	spin_lock(&user_list_lock);
+	as->next = user_list;
+	user_list = as;
+	spin_unlock(&user_list_lock);
+	filp->private_data = as;
+	return 0;
+}
+
+static int apm_get_info(char *buf, char **start, off_t fpos, int length)
+{
+	char *		p;
+	unsigned short	bx;
+	unsigned short	cx;
+	unsigned short	dx;
+	int		error;
+	unsigned short  ac_line_status = 0xff;
+	unsigned short  battery_status = 0xff;
+	unsigned short  battery_flag   = 0xff;
+	int		percentage     = -1;
+	int             time_units     = -1;
+	char            *units         = "?";
+
+	p = buf;
+
+	if ((num_online_cpus() == 1) &&
+	    !(error = apm_get_power_status(&bx, &cx, &dx))) {
+		ac_line_status = (bx >> 8) & 0xff;
+		battery_status = bx & 0xff;
+		if ((cx & 0xff) != 0xff)
+			percentage = cx & 0xff;
+
+		if (apm_info.connection_version > 0x100) {
+			battery_flag = (cx >> 8) & 0xff;
+			if (dx != 0xffff) {
+				units = (dx & 0x8000) ? "min" : "sec";
+				time_units = dx & 0x7fff;
+			}
+		}
+	}
+	/* Arguments, with symbols from linux/apm_bios.h.  Information is
+	   from the Get Power Status (0x0a) call unless otherwise noted.
+
+	   0) Linux driver version (this will change if format changes)
+	   1) APM BIOS Version.  Usually 1.0, 1.1 or 1.2.
+	   2) APM flags from APM Installation Check (0x00):
+	      bit 0: APM_16_BIT_SUPPORT
+	      bit 1: APM_32_BIT_SUPPORT
+	      bit 2: APM_IDLE_SLOWS_CLOCK
+	      bit 3: APM_BIOS_DISABLED
+	      bit 4: APM_BIOS_DISENGAGED
+	   3) AC line status
+	      0x00: Off-line
+	      0x01: On-line
+	      0x02: On backup power (BIOS >= 1.1 only)
+	      0xff: Unknown
+	   4) Battery status
+	      0x00: High
+	      0x01: Low
+	      0x02: Critical
+	      0x03: Charging
+	      0x04: Selected battery not present (BIOS >= 1.2 only)
+	      0xff: Unknown
+	   5) Battery flag
+	      bit 0: High
+	      bit 1: Low
+	      bit 2: Critical
+	      bit 3: Charging
+	      bit 7: No system battery
+	      0xff: Unknown
+	   6) Remaining battery life (percentage of charge):
+	      0-100: valid
+	      -1: Unknown
+	   7) Remaining battery life (time units):
+	      Number of remaining minutes or seconds
+	      -1: Unknown
+	   8) min = minutes; sec = seconds */
+
+	p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
+		     driver_version,
+		     (apm_info.bios.version >> 8) & 0xff,
+		     apm_info.bios.version & 0xff,
+		     apm_info.bios.flags,
+		     ac_line_status,
+		     battery_status,
+		     battery_flag,
+		     percentage,
+		     time_units,
+		     units);
+
+	return p - buf;
+}
+
+static int apm(void *unused)
+{
+	unsigned short	bx;
+	unsigned short	cx;
+	unsigned short	dx;
+	int		error;
+	char *		power_stat;
+	char *		bat_stat;
+
+	kapmd_running = 1;
+
+	daemonize("kapmd");
+
+	current->flags |= PF_NOFREEZE;
+
+#ifdef CONFIG_SMP
+	/* 2002/08/01 - WT
+	 * This is to avoid random crashes at boot time during initialization
+	 * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
+	 * Some bioses don't like being called from CPU != 0.
+	 * Method suggested by Ingo Molnar.
+	 */
+	set_cpus_allowed(current, cpumask_of_cpu(0));
+	BUG_ON(smp_processor_id() != 0);
+#endif
+
+	if (apm_info.connection_version == 0) {
+		apm_info.connection_version = apm_info.bios.version;
+		if (apm_info.connection_version > 0x100) {
+			/*
+			 * We only support BIOSs up to version 1.2
+			 */
+			if (apm_info.connection_version > 0x0102)
+				apm_info.connection_version = 0x0102;
+			error = apm_driver_version(&apm_info.connection_version);
+			if (error != APM_SUCCESS) {
+				apm_error("driver version", error);
+				/* Fall back to an APM 1.0 connection. */
+				apm_info.connection_version = 0x100;
+			}
+		}
+	}
+
+	if (debug)
+		printk(KERN_INFO "apm: Connection version %d.%d\n",
+			(apm_info.connection_version >> 8) & 0xff,
+			apm_info.connection_version & 0xff);
+
+#ifdef CONFIG_APM_DO_ENABLE
+	if (apm_info.bios.flags & APM_BIOS_DISABLED) {
+		/*
+		 * This call causes my NEC UltraLite Versa 33/C to hang if it
+		 * is booted with PM disabled but not in the docking station.
+		 * Unfortunate ...
+		 */
+		error = apm_enable_power_management(1);
+		if (error) {
+			apm_error("enable power management", error);
+			return -1;
+		}
+	}
+#endif
+
+	if ((apm_info.bios.flags & APM_BIOS_DISENGAGED)
+	    && (apm_info.connection_version > 0x0100)) {
+		error = apm_engage_power_management(APM_DEVICE_ALL, 1);
+		if (error) {
+			apm_error("engage power management", error);
+			return -1;
+		}
+	}
+
+	if (debug && (num_online_cpus() == 1 || smp )) {
+		error = apm_get_power_status(&bx, &cx, &dx);
+		if (error)
+			printk(KERN_INFO "apm: power status not available\n");
+		else {
+			switch ((bx >> 8) & 0xff) {
+			case 0: power_stat = "off line"; break;
+			case 1: power_stat = "on line"; break;
+			case 2: power_stat = "on backup power"; break;
+			default: power_stat = "unknown"; break;
+			}
+			switch (bx & 0xff) {
+			case 0: bat_stat = "high"; break;
+			case 1: bat_stat = "low"; break;
+			case 2: bat_stat = "critical"; break;
+			case 3: bat_stat = "charging"; break;
+			default: bat_stat = "unknown"; break;
+			}
+			printk(KERN_INFO
+			       "apm: AC %s, battery status %s, battery life ",
+			       power_stat, bat_stat);
+			if ((cx & 0xff) == 0xff)
+				printk("unknown\n");
+			else
+				printk("%d%%\n", cx & 0xff);
+			if (apm_info.connection_version > 0x100) {
+				printk(KERN_INFO
+				       "apm: battery flag 0x%02x, battery life ",
+				       (cx >> 8) & 0xff);
+				if (dx == 0xffff)
+					printk("unknown\n");
+				else
+					printk("%d %s\n", dx & 0x7fff,
+						(dx & 0x8000) ?
+						"minutes" : "seconds");
+			}
+		}
+	}
+
+	/* Install our power off handler.. */
+	if (power_off)
+		pm_power_off = apm_power_off;
+
+	if (num_online_cpus() == 1 || smp) {
+#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
+		console_blank_hook = apm_console_blank;
+#endif
+		apm_mainloop();
+#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
+		console_blank_hook = NULL;
+#endif
+	}
+	kapmd_running = 0;
+
+	return 0;
+}
+
+#ifndef MODULE
+static int __init apm_setup(char *str)
+{
+	int	invert;
+
+	while ((str != NULL) && (*str != '\0')) {
+		if (strncmp(str, "off", 3) == 0)
+			apm_disabled = 1;
+		if (strncmp(str, "on", 2) == 0)
+			apm_disabled = 0;
+		if ((strncmp(str, "bounce-interval=", 16) == 0) ||
+		    (strncmp(str, "bounce_interval=", 16) == 0))
+			bounce_interval = simple_strtol(str + 16, NULL, 0);
+		if ((strncmp(str, "idle-threshold=", 15) == 0) ||
+		    (strncmp(str, "idle_threshold=", 15) == 0))
+			idle_threshold = simple_strtol(str + 15, NULL, 0);
+		if ((strncmp(str, "idle-period=", 12) == 0) ||
+		    (strncmp(str, "idle_period=", 12) == 0))
+			idle_period = simple_strtol(str + 12, NULL, 0);
+		invert = (strncmp(str, "no-", 3) == 0) ||
+			(strncmp(str, "no_", 3) == 0);
+		if (invert)
+			str += 3;
+		if (strncmp(str, "debug", 5) == 0)
+			debug = !invert;
+		if ((strncmp(str, "power-off", 9) == 0) ||
+		    (strncmp(str, "power_off", 9) == 0))
+			power_off = !invert;
+		if (strncmp(str, "smp", 3) == 0)
+		{
+			smp = !invert;
+			idle_threshold = 100;
+		}
+		if ((strncmp(str, "allow-ints", 10) == 0) ||
+		    (strncmp(str, "allow_ints", 10) == 0))
+ 			apm_info.allow_ints = !invert;
+		if ((strncmp(str, "broken-psr", 10) == 0) ||
+		    (strncmp(str, "broken_psr", 10) == 0))
+			apm_info.get_power_status_broken = !invert;
+		if ((strncmp(str, "realmode-power-off", 18) == 0) ||
+		    (strncmp(str, "realmode_power_off", 18) == 0))
+			apm_info.realmode_power_off = !invert;
+		str = strchr(str, ',');
+		if (str != NULL)
+			str += strspn(str, ", \t");
+	}
+	return 1;
+}
+
+__setup("apm=", apm_setup);
+#endif
+
+static struct file_operations apm_bios_fops = {
+	.owner		= THIS_MODULE,
+	.read		= do_read,
+	.poll		= do_poll,
+	.ioctl		= do_ioctl,
+	.open		= do_open,
+	.release	= do_release,
+};
+
+static struct miscdevice apm_device = {
+	APM_MINOR_DEV,
+	"apm_bios",
+	&apm_bios_fops
+};
+
+
+/* Simple "print if true" callback */
+static int __init print_if_true(struct dmi_system_id *d)
+{
+	printk("%s\n", d->ident);
+	return 0;
+}
+
+/*
+ * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was
+ * disabled before the suspend. Linux used to get terribly confused by that.
+ */
+static int __init broken_ps2_resume(struct dmi_system_id *d)
+{
+	printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident);
+	return 0;
+}
+
+/* Some bioses have a broken protected mode poweroff and need to use realmode */
+static int __init set_realmode_power_off(struct dmi_system_id *d)
+{
+	if (apm_info.realmode_power_off == 0) {
+		apm_info.realmode_power_off = 1;
+		printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident);
+	}
+	return 0;
+}
+
+/* Some laptops require interrupts to be enabled during APM calls */
+static int __init set_apm_ints(struct dmi_system_id *d)
+{
+	if (apm_info.allow_ints == 0) {
+		apm_info.allow_ints = 1;
+		printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident);
+	}
+	return 0;
+}
+
+/* Some APM bioses corrupt memory or just plain do not work */
+static int __init apm_is_horked(struct dmi_system_id *d)
+{
+	if (apm_info.disabled == 0) {
+		apm_info.disabled = 1;
+		printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
+	}
+	return 0;
+}
+
+static int __init apm_is_horked_d850md(struct dmi_system_id *d)
+{
+	if (apm_info.disabled == 0) {
+		apm_info.disabled = 1;
+		printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
+		printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n");
+		printk(KERN_INFO "download from support.intel.com \n");
+	}
+	return 0;
+}
+
+/* Some APM bioses hang on APM idle calls */
+static int __init apm_likes_to_melt(struct dmi_system_id *d)
+{
+	if (apm_info.forbid_idle == 0) {
+		apm_info.forbid_idle = 1;
+		printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident);
+	}
+	return 0;
+}
+
+/*
+ *  Check for clue free BIOS implementations who use
+ *  the following QA technique
+ *
+ *      [ Write BIOS Code ]<------
+ *               |                ^
+ *      < Does it Compile >----N--
+ *               |Y               ^
+ *	< Does it Boot Win98 >-N--
+ *               |Y
+ *           [Ship It]
+ *
+ *	Phoenix A04  08/24/2000 is known bad (Dell Inspiron 5000e)
+ *	Phoenix A07  09/29/2000 is known good (Dell Inspiron 5000)
+ */
+static int __init broken_apm_power(struct dmi_system_id *d)
+{
+	apm_info.get_power_status_broken = 1;
+	printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n");
+	return 0;
+}
+
+/*
+ * This bios swaps the APM minute reporting bytes over (Many sony laptops
+ * have this problem).
+ */
+static int __init swab_apm_power_in_minutes(struct dmi_system_id *d)
+{
+	apm_info.get_power_status_swabinminutes = 1;
+	printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n");
+	return 0;
+}
+
+static struct dmi_system_id __initdata apm_dmi_table[] = {
+	{
+		print_if_true,
+		KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+			DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), },
+	},
+	{	/* Handle problems with APM on the C600 */
+		broken_ps2_resume, "Dell Latitude C600",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), },
+	},
+	{	/* Allow interrupts during suspend on Dell Latitude laptops*/
+		set_apm_ints, "Dell Latitude",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), }
+	},
+	{	/* APM crashes */
+		apm_is_horked, "Dell Inspiron 2500",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
+			DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+	},
+	{	/* Allow interrupts during suspend on Dell Inspiron laptops*/
+		set_apm_ints, "Dell Inspiron", {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), },
+	},
+	{	/* Handle problems with APM on Inspiron 5000e */
+		broken_apm_power, "Dell Inspiron 5000e",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "A04"),
+			DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), },
+	},
+	{	/* Handle problems with APM on Inspiron 2500 */
+		broken_apm_power, "Dell Inspiron 2500",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "A12"),
+			DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), },
+	},
+	{	/* APM crashes */
+		apm_is_horked, "Dell Dimension 4100",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"),
+			DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."),
+			DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+	},
+	{	/* Allow interrupts during suspend on Compaq Laptops*/
+		set_apm_ints, "Compaq 12XL125",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"),
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION,"4.06"), },
+	},
+	{	/* Allow interrupts during APM or the clock goes slow */
+		set_apm_ints, "ASUSTeK",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), },
+	},
+	{	/* APM blows on shutdown */
+		apm_is_horked, "ABIT KX7-333[R]",
+		{	DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"),
+			DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), },
+	},
+	{	/* APM crashes */
+		apm_is_horked, "Trigem Delhi3",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), },
+	},
+	{	/* APM crashes */
+		apm_is_horked, "Fujitsu-Siemens",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"),
+			DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), },
+	},
+	{	/* APM crashes */
+		apm_is_horked_d850md, "Intel D850MD",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
+			DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), },
+	},
+	{	/* APM crashes */
+		apm_is_horked, "Intel D810EMO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
+			DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), },
+	},
+	{	/* APM crashes */
+		apm_is_horked, "Dell XPS-Z",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
+			DMI_MATCH(DMI_BIOS_VERSION, "A11"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), },
+	},
+	{	/* APM crashes */
+		apm_is_horked, "Sharp PC-PJ/AX",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"),
+			DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"),
+			DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), },
+	},
+	{	/* APM crashes */
+		apm_is_horked, "Dell Inspiron 2500",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
+			DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+	},
+	{	/* APM idle hangs */
+		apm_likes_to_melt, "Jabil AMD",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+			DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), },
+	},
+	{	/* APM idle hangs */
+		apm_likes_to_melt, "AMI Bios",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+			DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-N505X(DE) */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0206H"),
+			DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-N505VX */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"),
+			DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-XG29 */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"),
+			DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-Z600NE */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"),
+			DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-Z600NE */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"),
+			DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"),
+			DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-Z505LS */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"),
+			DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-Z505LS */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"),
+			DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"),
+			DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-F104K */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"),
+			DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), },
+	},
+
+	{	/* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"),
+			DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-C1VE */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"),
+			DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), },
+	},
+	{	/* Handle problems with APM on Sony Vaio PCG-C1VE */
+		swab_apm_power_in_minutes, "Sony VAIO",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"),
+			DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), },
+	},
+	{	/* broken PM poweroff bios */
+		set_realmode_power_off, "Award Software v4.60 PGMA",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."),
+			DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"),
+			DMI_MATCH(DMI_BIOS_DATE, "134526184"), },
+	},
+
+	/* Generic per vendor APM settings  */
+
+	{	/* Allow interrupts during suspend on IBM laptops */
+		set_apm_ints, "IBM",
+		{	DMI_MATCH(DMI_SYS_VENDOR, "IBM"), },
+	},
+
+	{ }
+};
+
+/*
+ * Just start the APM thread. We do NOT want to do APM BIOS
+ * calls from anything but the APM thread, if for no other reason
+ * than the fact that we don't trust the APM BIOS. This way,
+ * most common APM BIOS problems that lead to protection errors
+ * etc will have at least some level of being contained...
+ *
+ * In short, if something bad happens, at least we have a choice
+ * of just killing the apm thread..
+ */
+static int __init apm_init(void)
+{
+	struct proc_dir_entry *apm_proc;
+	int ret;
+	int i;
+
+	dmi_check_system(apm_dmi_table);
+
+	if (apm_info.bios.version == 0) {
+		printk(KERN_INFO "apm: BIOS not found.\n");
+		return -ENODEV;
+	}
+	printk(KERN_INFO
+		"apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
+		((apm_info.bios.version >> 8) & 0xff),
+		(apm_info.bios.version & 0xff),
+		apm_info.bios.flags,
+		driver_version);
+	if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) {
+		printk(KERN_INFO "apm: no 32 bit BIOS support\n");
+		return -ENODEV;
+	}
+
+	if (allow_ints)
+		apm_info.allow_ints = 1;
+	if (broken_psr)
+		apm_info.get_power_status_broken = 1;
+	if (realmode_power_off)
+		apm_info.realmode_power_off = 1;
+	/* User can override, but default is to trust DMI */
+	if (apm_disabled != -1)
+		apm_info.disabled = apm_disabled;
+
+	/*
+	 * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1
+	 * but is reportedly a 1.0 BIOS.
+	 */
+	if (apm_info.bios.version == 0x001)
+		apm_info.bios.version = 0x100;
+
+	/* BIOS < 1.2 doesn't set cseg_16_len */
+	if (apm_info.bios.version < 0x102)
+		apm_info.bios.cseg_16_len = 0; /* 64k */
+
+	if (debug) {
+		printk(KERN_INFO "apm: entry %x:%lx cseg16 %x dseg %x",
+			apm_info.bios.cseg, apm_info.bios.offset,
+			apm_info.bios.cseg_16, apm_info.bios.dseg);
+		if (apm_info.bios.version > 0x100)
+			printk(" cseg len %x, dseg len %x",
+				apm_info.bios.cseg_len,
+				apm_info.bios.dseg_len);
+		if (apm_info.bios.version > 0x101)
+			printk(" cseg16 len %x", apm_info.bios.cseg_16_len);
+		printk("\n");
+	}
+
+	if (apm_info.disabled) {
+		printk(KERN_NOTICE "apm: disabled on user request.\n");
+		return -ENODEV;
+	}
+	if ((num_online_cpus() > 1) && !power_off && !smp) {
+		printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
+		apm_info.disabled = 1;
+		return -ENODEV;
+	}
+	if (PM_IS_ACTIVE()) {
+		printk(KERN_NOTICE "apm: overridden by ACPI.\n");
+		apm_info.disabled = 1;
+		return -ENODEV;
+	}
+	pm_active = 1;
+
+	/*
+	 * Set up a segment that references the real mode segment 0x40
+	 * that extends up to the end of page zero (that we have reserved).
+	 * This is for buggy BIOS's that refer to (real mode) segment 0x40
+	 * even though they are called in protected mode.
+	 */
+	set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
+	_set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
+
+	apm_bios_entry.offset = apm_info.bios.offset;
+	apm_bios_entry.segment = APM_CS;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		set_base(per_cpu(cpu_gdt_table, i)[APM_CS >> 3],
+			 __va((unsigned long)apm_info.bios.cseg << 4));
+		set_base(per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3],
+			 __va((unsigned long)apm_info.bios.cseg_16 << 4));
+		set_base(per_cpu(cpu_gdt_table, i)[APM_DS >> 3],
+			 __va((unsigned long)apm_info.bios.dseg << 4));
+#ifndef APM_RELAX_SEGMENTS
+		if (apm_info.bios.version == 0x100) {
+#endif
+			/* For ASUS motherboard, Award BIOS rev 110 (and others?) */
+			_set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 - 1);
+			/* For some unknown machine. */
+			_set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], 64 * 1024 - 1);
+			/* For the DEC Hinote Ultra CT475 (and others?) */
+			_set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], 64 * 1024 - 1);
+#ifndef APM_RELAX_SEGMENTS
+		} else {
+			_set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3],
+				(apm_info.bios.cseg_len - 1) & 0xffff);
+			_set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3],
+				(apm_info.bios.cseg_16_len - 1) & 0xffff);
+			_set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3],
+				(apm_info.bios.dseg_len - 1) & 0xffff);
+		      /* workaround for broken BIOSes */
+	                if (apm_info.bios.cseg_len <= apm_info.bios.offset)
+        	                _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 -1);
+                       if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */
+                        	/* for the BIOS that assumes granularity = 1 */
+                        	per_cpu(cpu_gdt_table, i)[APM_DS >> 3].b |= 0x800000;
+                        	printk(KERN_NOTICE "apm: we set the granularity of dseg.\n");
+        	        }
+		}
+#endif
+	}
+
+	apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info);
+	if (apm_proc)
+		apm_proc->owner = THIS_MODULE;
+
+	ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
+	if (ret < 0) {
+		printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
+		return -ENOMEM;
+	}
+
+	if (num_online_cpus() > 1 && !smp ) {
+		printk(KERN_NOTICE
+		   "apm: disabled - APM is not SMP safe (power off active).\n");
+		return 0;
+	}
+
+	misc_register(&apm_device);
+
+	if (HZ != 100)
+		idle_period = (idle_period * HZ) / 100;
+	if (idle_threshold < 100) {
+		original_pm_idle = pm_idle;
+		pm_idle  = apm_cpu_idle;
+		set_pm_idle = 1;
+	}
+
+	return 0;
+}
+
+static void __exit apm_exit(void)
+{
+	int	error;
+
+	if (set_pm_idle) {
+		pm_idle = original_pm_idle;
+		/*
+		 * We are about to unload the current idle thread pm callback
+		 * (pm_idle), Wait for all processors to update cached/local
+		 * copies of pm_idle before proceeding.
+		 */
+		cpu_idle_wait();
+	}
+	if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
+	    && (apm_info.connection_version > 0x0100)) {
+		error = apm_engage_power_management(APM_DEVICE_ALL, 0);
+		if (error)
+			apm_error("disengage power management", error);
+	}
+	misc_deregister(&apm_device);
+	remove_proc_entry("apm", NULL);
+	if (power_off)
+		pm_power_off = NULL;
+	exit_kapmd = 1;
+	while (kapmd_running)
+		schedule();
+	pm_active = 0;
+}
+
+module_init(apm_init);
+module_exit(apm_exit);
+
+MODULE_AUTHOR("Stephen Rothwell");
+MODULE_DESCRIPTION("Advanced Power Management");
+MODULE_LICENSE("GPL");
+module_param(debug, bool, 0644);
+MODULE_PARM_DESC(debug, "Enable debug mode");
+module_param(power_off, bool, 0444);
+MODULE_PARM_DESC(power_off, "Enable power off");
+module_param(bounce_interval, int, 0444);
+MODULE_PARM_DESC(bounce_interval,
+		"Set the number of ticks to ignore suspend bounces");
+module_param(allow_ints, bool, 0444);
+MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls");
+module_param(broken_psr, bool, 0444);
+MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call");
+module_param(realmode_power_off, bool, 0444);
+MODULE_PARM_DESC(realmode_power_off,
+		"Switch to real mode before powering off");
+module_param(idle_threshold, int, 0444);
+MODULE_PARM_DESC(idle_threshold,
+	"System idle percentage above which to make APM BIOS idle calls");
+module_param(idle_period, int, 0444);
+MODULE_PARM_DESC(idle_period,
+	"Period (in sec/100) over which to caculate the idle percentage");
+module_param(smp, bool, 0444);
+MODULE_PARM_DESC(smp,
+	"Set this to enable APM use on an SMP platform. Use with caution on older systems");
+MODULE_ALIAS_MISCDEV(APM_MINOR_DEV);
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
new file mode 100644
index 00000000000..36d66e2077d
--- /dev/null
+++ b/arch/i386/kernel/asm-offsets.c
@@ -0,0 +1,72 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <asm/ucontext.h>
+#include "sigframe.h"
+#include <asm/fixmap.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+#define DEFINE(sym, val) \
+        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+#define OFFSET(sym, str, mem) \
+	DEFINE(sym, offsetof(struct str, mem));
+
+void foo(void)
+{
+	OFFSET(SIGCONTEXT_eax, sigcontext, eax);
+	OFFSET(SIGCONTEXT_ebx, sigcontext, ebx);
+	OFFSET(SIGCONTEXT_ecx, sigcontext, ecx);
+	OFFSET(SIGCONTEXT_edx, sigcontext, edx);
+	OFFSET(SIGCONTEXT_esi, sigcontext, esi);
+	OFFSET(SIGCONTEXT_edi, sigcontext, edi);
+	OFFSET(SIGCONTEXT_ebp, sigcontext, ebp);
+	OFFSET(SIGCONTEXT_esp, sigcontext, esp);
+	OFFSET(SIGCONTEXT_eip, sigcontext, eip);
+	BLANK();
+
+	OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
+	OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
+	OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
+	OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+	OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
+	OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
+	OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
+	OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
+	BLANK();
+
+	OFFSET(TI_task, thread_info, task);
+	OFFSET(TI_exec_domain, thread_info, exec_domain);
+	OFFSET(TI_flags, thread_info, flags);
+	OFFSET(TI_status, thread_info, status);
+	OFFSET(TI_cpu, thread_info, cpu);
+	OFFSET(TI_preempt_count, thread_info, preempt_count);
+	OFFSET(TI_addr_limit, thread_info, addr_limit);
+	OFFSET(TI_restart_block, thread_info, restart_block);
+	BLANK();
+
+	OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
+	OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
+	BLANK();
+
+	OFFSET(pbe_address, pbe, address);
+	OFFSET(pbe_orig_address, pbe, orig_address);
+	OFFSET(pbe_next, pbe, next);
+
+	/* Offset from the sysenter stack to tss.esp0 */
+	DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+		 sizeof(struct tss_struct));
+
+	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+	DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
+}
diff --git a/arch/i386/kernel/bootflag.c b/arch/i386/kernel/bootflag.c
new file mode 100644
index 00000000000..4c30ed01f4e
--- /dev/null
+++ b/arch/i386/kernel/bootflag.c
@@ -0,0 +1,99 @@
+/*
+ *	Implement 'Simple Boot Flag Specification 2.0'
+ */
+
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/acpi.h>
+#include <asm/io.h>
+
+#include <linux/mc146818rtc.h>
+
+
+#define SBF_RESERVED (0x78)
+#define SBF_PNPOS    (1<<0)
+#define SBF_BOOTING  (1<<1)
+#define SBF_DIAG     (1<<2)
+#define SBF_PARITY   (1<<7)
+
+
+int sbf_port __initdata = -1;	/* set via acpi_boot_init() */
+
+
+static int __init parity(u8 v)
+{
+	int x = 0;
+	int i;
+	
+	for(i=0;i<8;i++)
+	{
+		x^=(v&1);
+		v>>=1;
+	}
+	return x;
+}
+
+static void __init sbf_write(u8 v)
+{
+	unsigned long flags;
+	if(sbf_port != -1)
+	{
+		v &= ~SBF_PARITY;
+		if(!parity(v))
+			v|=SBF_PARITY;
+
+		printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v);
+
+		spin_lock_irqsave(&rtc_lock, flags);
+		CMOS_WRITE(v, sbf_port);
+		spin_unlock_irqrestore(&rtc_lock, flags);
+	}
+}
+
+static u8 __init sbf_read(void)
+{
+	u8 v;
+	unsigned long flags;
+	if(sbf_port == -1)
+		return 0;
+	spin_lock_irqsave(&rtc_lock, flags);
+	v = CMOS_READ(sbf_port);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return v;
+}
+
+static int __init sbf_value_valid(u8 v)
+{
+	if(v&SBF_RESERVED)		/* Reserved bits */
+		return 0;
+	if(!parity(v))
+		return 0;
+	return 1;
+}
+
+static int __init sbf_init(void)
+{
+	u8 v;
+	if(sbf_port == -1)
+		return 0;
+	v = sbf_read();
+	if(!sbf_value_valid(v))
+		printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v);
+
+	v &= ~SBF_RESERVED;
+	v &= ~SBF_BOOTING;
+	v &= ~SBF_DIAG;
+#if defined(CONFIG_ISAPNP)
+	v |= SBF_PNPOS;
+#endif
+	sbf_write(v);
+	return 0;
+}
+
+module_init(sbf_init);
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
new file mode 100644
index 00000000000..010aecfffbc
--- /dev/null
+++ b/arch/i386/kernel/cpu/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for x86-compatible CPU details and quirks
+#
+
+obj-y	:=	common.o proc.o
+
+obj-y	+=	amd.o
+obj-y	+=	cyrix.o
+obj-y	+=	centaur.o
+obj-y	+=	transmeta.o
+obj-y	+=	intel.o intel_cacheinfo.o
+obj-y	+=	rise.o
+obj-y	+=	nexgen.o
+obj-y	+=	umc.o
+
+obj-$(CONFIG_X86_MCE)	+=	mcheck/
+
+obj-$(CONFIG_MTRR)	+= 	mtrr/
+obj-$(CONFIG_CPU_FREQ)	+=	cpufreq/
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
new file mode 100644
index 00000000000..ae94585d044
--- /dev/null
+++ b/arch/i386/kernel/cpu/amd.c
@@ -0,0 +1,249 @@
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+/*
+ *	B step AMD K6 before B 9730xxxx have hardware bugs that can cause
+ *	misexecution of code under Linux. Owners of such processors should
+ *	contact AMD for precise details and a CPU swap.
+ *
+ *	See	http://www.multimania.com/poulot/k6bug.html
+ *		http://www.amd.com/K6/k6docs/revgd.html
+ *
+ *	The following test is erm.. interesting. AMD neglected to up
+ *	the chip setting when fixing the bug but they also tweaked some
+ *	performance at the same time..
+ */
+ 
+extern void vide(void);
+__asm__(".align 4\nvide: ret");
+
+static void __init init_amd(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int mbytes = num_physpages >> (20-PAGE_SHIFT);
+	int r;
+
+	/*
+	 *	FIXME: We should handle the K5 here. Set up the write
+	 *	range and also turn on MSR 83 bits 4 and 31 (write alloc,
+	 *	no bus pipeline)
+	 */
+
+	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+	clear_bit(0*32+31, c->x86_capability);
+	
+	r = get_model_name(c);
+
+	switch(c->x86)
+	{
+		case 4:
+		/*
+		 * General Systems BIOSen alias the cpu frequency registers
+		 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
+		 * drivers subsequently pokes it, and changes the CPU speed.
+		 * Workaround : Remove the unneeded alias.
+		 */
+#define CBAR		(0xfffc) /* Configuration Base Address  (32-bit) */
+#define CBAR_ENB	(0x80000000)
+#define CBAR_KEY	(0X000000CB)
+			if (c->x86_model==9 || c->x86_model == 10) {
+				if (inl (CBAR) & CBAR_ENB)
+					outl (0 | CBAR_KEY, CBAR);
+			}
+			break;
+		case 5:
+			if( c->x86_model < 6 )
+			{
+				/* Based on AMD doc 20734R - June 2000 */
+				if ( c->x86_model == 0 ) {
+					clear_bit(X86_FEATURE_APIC, c->x86_capability);
+					set_bit(X86_FEATURE_PGE, c->x86_capability);
+				}
+				break;
+			}
+			
+			if ( c->x86_model == 6 && c->x86_mask == 1 ) {
+				const int K6_BUG_LOOP = 1000000;
+				int n;
+				void (*f_vide)(void);
+				unsigned long d, d2;
+				
+				printk(KERN_INFO "AMD K6 stepping B detected - ");
+				
+				/*
+				 * It looks like AMD fixed the 2.6.2 bug and improved indirect 
+				 * calls at the same time.
+				 */
+
+				n = K6_BUG_LOOP;
+				f_vide = vide;
+				rdtscl(d);
+				while (n--) 
+					f_vide();
+				rdtscl(d2);
+				d = d2-d;
+				
+				/* Knock these two lines out if it debugs out ok */
+				printk(KERN_INFO "AMD K6 stepping B detected - ");
+				/* -- cut here -- */
+				if (d > 20*K6_BUG_LOOP) 
+					printk("system stability may be impaired when more than 32 MB are used.\n");
+				else 
+					printk("probably OK (after B9730xxxx).\n");
+				printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
+			}
+
+			/* K6 with old style WHCR */
+			if (c->x86_model < 8 ||
+			   (c->x86_model== 8 && c->x86_mask < 8)) {
+				/* We can only write allocate on the low 508Mb */
+				if(mbytes>508)
+					mbytes=508;
+
+				rdmsr(MSR_K6_WHCR, l, h);
+				if ((l&0x0000FFFF)==0) {
+					unsigned long flags;
+					l=(1<<0)|((mbytes/4)<<1);
+					local_irq_save(flags);
+					wbinvd();
+					wrmsr(MSR_K6_WHCR, l, h);
+					local_irq_restore(flags);
+					printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+						mbytes);
+				}
+				break;
+			}
+
+			if ((c->x86_model == 8 && c->x86_mask >7) ||
+			     c->x86_model == 9 || c->x86_model == 13) {
+				/* The more serious chips .. */
+
+				if(mbytes>4092)
+					mbytes=4092;
+
+				rdmsr(MSR_K6_WHCR, l, h);
+				if ((l&0xFFFF0000)==0) {
+					unsigned long flags;
+					l=((mbytes>>2)<<22)|(1<<16);
+					local_irq_save(flags);
+					wbinvd();
+					wrmsr(MSR_K6_WHCR, l, h);
+					local_irq_restore(flags);
+					printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+						mbytes);
+				}
+
+				/*  Set MTRR capability flag if appropriate */
+				if (c->x86_model == 13 || c->x86_model == 9 ||
+				   (c->x86_model == 8 && c->x86_mask >= 8))
+					set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
+				break;
+			}
+			break;
+
+		case 6: /* An Athlon/Duron */
+ 
+			/* Bit 15 of Athlon specific MSR 15, needs to be 0
+ 			 * to enable SSE on Palomino/Morgan/Barton CPU's.
+			 * If the BIOS didn't enable it already, enable it here.
+			 */
+			if (c->x86_model >= 6 && c->x86_model <= 10) {
+				if (!cpu_has(c, X86_FEATURE_XMM)) {
+					printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+					rdmsr(MSR_K7_HWCR, l, h);
+					l &= ~0x00008000;
+					wrmsr(MSR_K7_HWCR, l, h);
+					set_bit(X86_FEATURE_XMM, c->x86_capability);
+				}
+			}
+
+			/* It's been determined by AMD that Athlons since model 8 stepping 1
+			 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
+			 * As per AMD technical note 27212 0.2
+			 */
+			if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) {
+				rdmsr(MSR_K7_CLK_CTL, l, h);
+				if ((l & 0xfff00000) != 0x20000000) {
+					printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
+						((l & 0x000fffff)|0x20000000));
+					wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
+				}
+			}
+			break;
+	}
+
+	switch (c->x86) {
+	case 15:
+		set_bit(X86_FEATURE_K8, c->x86_capability);
+		break;
+	case 6:
+		set_bit(X86_FEATURE_K7, c->x86_capability); 
+		break;
+	}
+
+	display_cacheinfo(c);
+	detect_ht(c);
+
+#ifdef CONFIG_X86_HT
+	/* AMD dual core looks like HT but isn't really. Hide it from the
+	   scheduler. This works around problems with the domain scheduler.
+	   Also probably gives slightly better scheduling and disables
+	   SMT nice which is harmful on dual core.
+	   TBD tune the domain scheduler for dual core. */
+	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+		smp_num_siblings = 1;
+#endif
+
+	if (cpuid_eax(0x80000000) >= 0x80000008) {
+		c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+		if (c->x86_num_cores & (c->x86_num_cores - 1))
+			c->x86_num_cores = 1;
+	}
+}
+
+static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+	/* AMD errata T13 (order #21922) */
+	if ((c->x86 == 6)) {
+		if (c->x86_model == 3 && c->x86_mask == 0)	/* Duron Rev A0 */
+			size = 64;
+		if (c->x86_model == 4 &&
+		    (c->x86_mask==0 || c->x86_mask==1))	/* Tbird rev A1/A2 */
+			size = 256;
+	}
+	return size;
+}
+
+static struct cpu_dev amd_cpu_dev __initdata = {
+	.c_vendor	= "AMD",
+	.c_ident 	= { "AuthenticAMD" },
+	.c_models = {
+		{ .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
+		  {
+			  [3] = "486 DX/2",
+			  [7] = "486 DX/2-WB",
+			  [8] = "486 DX/4", 
+			  [9] = "486 DX/4-WB", 
+			  [14] = "Am5x86-WT",
+			  [15] = "Am5x86-WB" 
+		  }
+		},
+	},
+	.c_init		= init_amd,
+	.c_identify	= generic_identify,
+	.c_size_cache	= amd_size_cache,
+};
+
+int __init amd_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
+	return 0;
+}
+
+//early_arch_initcall(amd_init_cpu);
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c
new file mode 100644
index 00000000000..394814e5767
--- /dev/null
+++ b/arch/i386/kernel/cpu/centaur.c
@@ -0,0 +1,476 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/e820.h>
+#include "cpu.h"
+
+#ifdef CONFIG_X86_OOSTORE
+
+static u32 __init power2(u32 x)
+{
+	u32 s=1;
+	while(s<=x)
+		s<<=1;
+	return s>>=1;
+}
+
+
+/*
+ *	Set up an actual MCR
+ */
+ 
+static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
+{
+	u32 lo, hi;
+	
+	hi = base & ~0xFFF;
+	lo = ~(size-1);		/* Size is a power of 2 so this makes a mask */
+	lo &= ~0xFFF;		/* Remove the ctrl value bits */
+	lo |= key;		/* Attribute we wish to set */
+	wrmsr(reg+MSR_IDT_MCR0, lo, hi);
+	mtrr_centaur_report_mcr(reg, lo, hi);	/* Tell the mtrr driver */
+}
+
+/*
+ *	Figure what we can cover with MCR's
+ *
+ *	Shortcut: We know you can't put 4Gig of RAM on a winchip
+ */
+
+static u32 __init ramtop(void)		/* 16388 */
+{
+	int i;
+	u32 top = 0;
+	u32 clip = 0xFFFFFFFFUL;
+	
+	for (i = 0; i < e820.nr_map; i++) {
+		unsigned long start, end;
+
+		if (e820.map[i].addr > 0xFFFFFFFFUL)
+			continue;
+		/*
+		 *	Don't MCR over reserved space. Ignore the ISA hole
+		 *	we frob around that catastrophy already
+		 */
+		 			
+		if (e820.map[i].type == E820_RESERVED)
+		{
+			if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip)
+				clip = e820.map[i].addr;
+			continue;
+		}
+		start = e820.map[i].addr;
+		end = e820.map[i].addr + e820.map[i].size;
+		if (start >= end)
+			continue;
+		if (end > top)
+			top = end;
+	}
+	/* Everything below 'top' should be RAM except for the ISA hole.
+	   Because of the limited MCR's we want to map NV/ACPI into our
+	   MCR range for gunk in RAM 
+	   
+	   Clip might cause us to MCR insufficient RAM but that is an
+	   acceptable failure mode and should only bite obscure boxes with
+	   a VESA hole at 15Mb
+	   
+	   The second case Clip sometimes kicks in is when the EBDA is marked
+	   as reserved. Again we fail safe with reasonable results
+	*/
+	
+	if(top>clip)
+		top=clip;
+		
+	return top;
+}
+
+/*
+ *	Compute a set of MCR's to give maximum coverage
+ */
+
+static int __init centaur_mcr_compute(int nr, int key)
+{
+	u32 mem = ramtop();
+	u32 root = power2(mem);
+	u32 base = root;
+	u32 top = root;
+	u32 floor = 0;
+	int ct = 0;
+	
+	while(ct<nr)
+	{
+		u32 fspace = 0;
+
+		/*
+		 *	Find the largest block we will fill going upwards
+		 */
+
+		u32 high = power2(mem-top);	
+
+		/*
+		 *	Find the largest block we will fill going downwards
+		 */
+
+		u32 low = base/2;
+
+		/*
+		 *	Don't fill below 1Mb going downwards as there
+		 *	is an ISA hole in the way.
+		 */		
+		 
+		if(base <= 1024*1024)
+			low = 0;
+			
+		/*
+		 *	See how much space we could cover by filling below
+		 *	the ISA hole
+		 */
+		 
+		if(floor == 0)
+			fspace = 512*1024;
+		else if(floor ==512*1024)
+			fspace = 128*1024;
+
+		/* And forget ROM space */
+		
+		/*
+		 *	Now install the largest coverage we get
+		 */
+		 
+		if(fspace > high && fspace > low)
+		{
+			centaur_mcr_insert(ct, floor, fspace, key);
+			floor += fspace;
+		}
+		else if(high > low)
+		{
+			centaur_mcr_insert(ct, top, high, key);
+			top += high;
+		}
+		else if(low > 0)
+		{
+			base -= low;
+			centaur_mcr_insert(ct, base, low, key);
+		}
+		else break;
+		ct++;
+	}
+	/*
+	 *	We loaded ct values. We now need to set the mask. The caller
+	 *	must do this bit.
+	 */
+	 
+	return ct;
+}
+
+static void __init centaur_create_optimal_mcr(void)
+{
+	int i;
+	/*
+	 *	Allocate up to 6 mcrs to mark as much of ram as possible
+	 *	as write combining and weak write ordered.
+	 *
+	 *	To experiment with: Linux never uses stack operations for 
+	 *	mmio spaces so we could globally enable stack operation wc
+	 *
+	 *	Load the registers with type 31 - full write combining, all
+	 *	writes weakly ordered.
+	 */
+	int used = centaur_mcr_compute(6, 31);
+
+	/*
+	 *	Wipe unused MCRs
+	 */
+	 
+	for(i=used;i<8;i++)
+		wrmsr(MSR_IDT_MCR0+i, 0, 0);
+}
+
+static void __init winchip2_create_optimal_mcr(void)
+{
+	u32 lo, hi;
+	int i;
+
+	/*
+	 *	Allocate up to 6 mcrs to mark as much of ram as possible
+	 *	as write combining, weak store ordered.
+	 *
+	 *	Load the registers with type 25
+	 *		8	-	weak write ordering
+	 *		16	-	weak read ordering
+	 *		1	-	write combining
+	 */
+
+	int used = centaur_mcr_compute(6, 25);
+	
+	/*
+	 *	Mark the registers we are using.
+	 */
+	 
+	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	for(i=0;i<used;i++)
+		lo|=1<<(9+i);
+	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	
+	/*
+	 *	Wipe unused MCRs
+	 */
+	 
+	for(i=used;i<8;i++)
+		wrmsr(MSR_IDT_MCR0+i, 0, 0);
+}
+
+/*
+ *	Handle the MCR key on the Winchip 2.
+ */
+
+static void __init winchip2_unprotect_mcr(void)
+{
+	u32 lo, hi;
+	u32 key;
+	
+	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	lo&=~0x1C0;	/* blank bits 8-6 */
+	key = (lo>>17) & 7;
+	lo |= key<<6;	/* replace with unlock key */
+	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+
+static void __init winchip2_protect_mcr(void)
+{
+	u32 lo, hi;
+	
+	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	lo&=~0x1C0;	/* blank bits 8-6 */
+	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+#endif /* CONFIG_X86_OOSTORE */
+
+#define ACE_PRESENT	(1 << 6)
+#define ACE_ENABLED	(1 << 7)
+#define ACE_FCR		(1 << 28)	/* MSR_VIA_FCR */
+
+#define RNG_PRESENT	(1 << 2)
+#define RNG_ENABLED	(1 << 3)
+#define RNG_ENABLE	(1 << 6)	/* MSR_VIA_RNG */
+
+static void __init init_c3(struct cpuinfo_x86 *c)
+{
+	u32  lo, hi;
+
+	/* Test for Centaur Extended Feature Flags presence */
+	if (cpuid_eax(0xC0000000) >= 0xC0000001) {
+		u32 tmp = cpuid_edx(0xC0000001);
+
+		/* enable ACE unit, if present and disabled */
+		if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
+			rdmsr (MSR_VIA_FCR, lo, hi);
+			lo |= ACE_FCR;		/* enable ACE unit */
+			wrmsr (MSR_VIA_FCR, lo, hi);
+			printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+		}
+
+		/* enable RNG unit, if present and disabled */
+		if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
+			rdmsr (MSR_VIA_RNG, lo, hi);
+			lo |= RNG_ENABLE;	/* enable RNG unit */
+			wrmsr (MSR_VIA_RNG, lo, hi);
+			printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+		}
+
+		/* store Centaur Extended Feature Flags as
+		 * word 5 of the CPU capability bit array
+		 */
+		c->x86_capability[5] = cpuid_edx(0xC0000001);
+	}
+
+	/* Cyrix III family needs CX8 & PGE explicity enabled. */
+	if (c->x86_model >=6 && c->x86_model <= 9) {
+		rdmsr (MSR_VIA_FCR, lo, hi);
+		lo |= (1<<1 | 1<<7);
+		wrmsr (MSR_VIA_FCR, lo, hi);
+		set_bit(X86_FEATURE_CX8, c->x86_capability);
+	}
+
+	/* Before Nehemiah, the C3's had 3dNOW! */
+	if (c->x86_model >=6 && c->x86_model <9)
+		set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+
+	get_model_name(c);
+	display_cacheinfo(c);
+}
+
+static void __init init_centaur(struct cpuinfo_x86 *c)
+{
+	enum {
+		ECX8=1<<1,
+		EIERRINT=1<<2,
+		DPM=1<<3,
+		DMCE=1<<4,
+		DSTPCLK=1<<5,
+		ELINEAR=1<<6,
+		DSMC=1<<7,
+		DTLOCK=1<<8,
+		EDCTLB=1<<8,
+		EMMX=1<<9,
+		DPDC=1<<11,
+		EBRPRED=1<<12,
+		DIC=1<<13,
+		DDC=1<<14,
+		DNA=1<<15,
+		ERETSTK=1<<16,
+		E2MMX=1<<19,
+		EAMD3D=1<<20,
+	};
+
+	char *name;
+	u32  fcr_set=0;
+	u32  fcr_clr=0;
+	u32  lo,hi,newlo;
+	u32  aa,bb,cc,dd;
+
+	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+	clear_bit(0*32+31, c->x86_capability);
+
+	switch (c->x86) {
+
+		case 5:
+			switch(c->x86_model) {
+			case 4:
+				name="C6";
+				fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
+				fcr_clr=DPDC;
+				printk(KERN_NOTICE "Disabling bugged TSC.\n");
+				clear_bit(X86_FEATURE_TSC, c->x86_capability);
+#ifdef CONFIG_X86_OOSTORE
+				centaur_create_optimal_mcr();
+				/* Enable
+					write combining on non-stack, non-string
+					write combining on string, all types
+					weak write ordering 
+					
+				   The C6 original lacks weak read order 
+				   
+				   Note 0x120 is write only on Winchip 1 */
+				   
+				wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
+#endif				
+				break;
+			case 8:
+				switch(c->x86_mask) {
+				default:
+					name="2";
+					break;
+				case 7 ... 9:
+					name="2A";
+					break;
+				case 10 ... 15:
+					name="2B";
+					break;
+				}
+				fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
+				fcr_clr=DPDC;
+#ifdef CONFIG_X86_OOSTORE
+				winchip2_unprotect_mcr();
+				winchip2_create_optimal_mcr();
+				rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+				/* Enable
+					write combining on non-stack, non-string
+					write combining on string, all types
+					weak write ordering 
+				*/
+				lo|=31;				
+				wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+				winchip2_protect_mcr();
+#endif
+				break;
+			case 9:
+				name="3";
+				fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
+				fcr_clr=DPDC;
+#ifdef CONFIG_X86_OOSTORE
+				winchip2_unprotect_mcr();
+				winchip2_create_optimal_mcr();
+				rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+				/* Enable
+					write combining on non-stack, non-string
+					write combining on string, all types
+					weak write ordering 
+				*/
+				lo|=31;				
+				wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+				winchip2_protect_mcr();
+#endif
+				break;
+			case 10:
+				name="4";
+				/* no info on the WC4 yet */
+				break;
+			default:
+				name="??";
+			}
+
+			rdmsr(MSR_IDT_FCR1, lo, hi);
+			newlo=(lo|fcr_set) & (~fcr_clr);
+
+			if (newlo!=lo) {
+				printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo );
+				wrmsr(MSR_IDT_FCR1, newlo, hi );
+			} else {
+				printk(KERN_INFO "Centaur FCR is 0x%X\n",lo);
+			}
+			/* Emulate MTRRs using Centaur's MCR. */
+			set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability);
+			/* Report CX8 */
+			set_bit(X86_FEATURE_CX8, c->x86_capability);
+			/* Set 3DNow! on Winchip 2 and above. */
+			if (c->x86_model >=8)
+				set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+			/* See if we can find out some more. */
+			if ( cpuid_eax(0x80000000) >= 0x80000005 ) {
+				/* Yes, we can. */
+				cpuid(0x80000005,&aa,&bb,&cc,&dd);
+				/* Add L1 data and code cache sizes. */
+				c->x86_cache_size = (cc>>24)+(dd>>24);
+			}
+			sprintf( c->x86_model_id, "WinChip %s", name );
+			break;
+
+		case 6:
+			init_c3(c);
+			break;
+	}
+}
+
+static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+	/* VIA C3 CPUs (670-68F) need further shifting. */
+	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
+		size >>= 8;
+
+	/* VIA also screwed up Nehemiah stepping 1, and made
+	   it return '65KB' instead of '64KB'
+	   - Note, it seems this may only be in engineering samples. */
+	if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65))
+		size -=1;
+
+	return size;
+}
+
+static struct cpu_dev centaur_cpu_dev __initdata = {
+	.c_vendor	= "Centaur",
+	.c_ident	= { "CentaurHauls" },
+	.c_init		= init_centaur,
+	.c_size_cache	= centaur_size_cache,
+};
+
+int __init centaur_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
+	return 0;
+}
+
+//early_arch_initcall(centaur_init_cpu);
diff --git a/arch/i386/kernel/cpu/changelog b/arch/i386/kernel/cpu/changelog
new file mode 100644
index 00000000000..cef76b80a71
--- /dev/null
+++ b/arch/i386/kernel/cpu/changelog
@@ -0,0 +1,63 @@
+/*
+ *  Enhanced CPU type detection by Mike Jagdis, Patrick St. Jean
+ *  and Martin Mares, November 1997.
+ *
+ *  Force Cyrix 6x86(MX) and M II processors to report MTRR capability
+ *  and Cyrix "coma bug" recognition by
+ *  Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu> February 1999.
+ * 
+ *  Force Centaur C6 processors to report MTRR capability.
+ *  Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999.
+ *
+ *  Intel Mobile Pentium II detection fix. Sean Gilley, June 1999.
+ *
+ *  IDT Winchip tweaks, misc clean ups.
+ *  Dave Jones <davej@suse.de>, August 1999
+ *
+ *  Better detection of Centaur/IDT WinChip models.
+ *  Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999.
+ *
+ *  Cleaned up cache-detection code
+ *  Dave Jones <davej@suse.de>, October 1999
+ *
+ *  Added proper L2 cache detection for Coppermine
+ *  Dragan Stancevic <visitor@valinux.com>, October 1999
+ *
+ *  Added the original array for capability flags but forgot to credit 
+ *  myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff
+ *  Jauder Ho <jauderho@carumba.com>, January 2000
+ *
+ *  Detection for Celeron coppermine, identify_cpu() overhauled,
+ *  and a few other clean ups.
+ *  Dave Jones <davej@suse.de>, April 2000
+ *
+ *  Pentium III FXSR, SSE support
+ *  General FPU state handling cleanups
+ *  Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ *  Added proper Cascades CPU and L2 cache detection for Cascades
+ *  and 8-way type cache happy bunch from Intel:^)
+ *  Dragan Stancevic <visitor@valinux.com>, May 2000 
+ *
+ *  Forward port AMD Duron errata T13 from 2.2.17pre
+ *  Dave Jones <davej@suse.de>, August 2000
+ *
+ *  Forward port lots of fixes/improvements from 2.2.18pre
+ *  Cyrix III, Pentium IV support.
+ *  Dave Jones <davej@suse.de>, October 2000
+ *
+ *  Massive cleanup of CPU detection and bug handling;
+ *  Transmeta CPU detection,
+ *  H. Peter Anvin <hpa@zytor.com>, November 2000
+ *
+ *  VIA C3 Support.
+ *  Dave Jones <davej@suse.de>, March 2001
+ *
+ *  AMD Athlon/Duron/Thunderbird bluesmoke support.
+ *  Dave Jones <davej@suse.de>, April 2001.
+ *
+ *  CacheSize bug workaround updates for AMD, Intel & VIA Cyrix.
+ *  Dave Jones <davej@suse.de>, September, October 2001.
+ *
+ */
+
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
new file mode 100644
index 00000000000..ebd5d8247fa
--- /dev/null
+++ b/arch/i386/kernel/cpu/common.c
@@ -0,0 +1,634 @@
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#endif
+
+#include "cpu.h"
+
+DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
+EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
+
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+
+static int cachesize_override __initdata = -1;
+static int disable_x86_fxsr __initdata = 0;
+static int disable_x86_serial_nr __initdata = 1;
+
+struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+
+extern void mcheck_init(struct cpuinfo_x86 *c);
+
+extern int disable_pse;
+
+static void default_init(struct cpuinfo_x86 * c)
+{
+	/* Not much we can do here... */
+	/* Check if at least it has cpuid */
+	if (c->cpuid_level == -1) {
+		/* No cpuid. It must be an ancient CPU */
+		if (c->x86 == 4)
+			strcpy(c->x86_model_id, "486");
+		else if (c->x86 == 3)
+			strcpy(c->x86_model_id, "386");
+	}
+}
+
+static struct cpu_dev default_cpu = {
+	.c_init	= default_init,
+};
+static struct cpu_dev * this_cpu = &default_cpu;
+
+static int __init cachesize_setup(char *str)
+{
+	get_option (&str, &cachesize_override);
+	return 1;
+}
+__setup("cachesize=", cachesize_setup);
+
+int __init get_model_name(struct cpuinfo_x86 *c)
+{
+	unsigned int *v;
+	char *p, *q;
+
+	if (cpuid_eax(0x80000000) < 0x80000004)
+		return 0;
+
+	v = (unsigned int *) c->x86_model_id;
+	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+	c->x86_model_id[48] = 0;
+
+	/* Intel chips right-justify this string for some dumb reason;
+	   undo that brain damage */
+	p = q = &c->x86_model_id[0];
+	while ( *p == ' ' )
+	     p++;
+	if ( p != q ) {
+	     while ( *p )
+		  *q++ = *p++;
+	     while ( q <= &c->x86_model_id[48] )
+		  *q++ = '\0';	/* Zero-pad the rest */
+	}
+
+	return 1;
+}
+
+
+void __init display_cacheinfo(struct cpuinfo_x86 *c)
+{
+	unsigned int n, dummy, ecx, edx, l2size;
+
+	n = cpuid_eax(0x80000000);
+
+	if (n >= 0x80000005) {
+		cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
+			edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+		c->x86_cache_size=(ecx>>24)+(edx>>24);	
+	}
+
+	if (n < 0x80000006)	/* Some chips just has a large L1. */
+		return;
+
+	ecx = cpuid_ecx(0x80000006);
+	l2size = ecx >> 16;
+	
+	/* do processor-specific cache resizing */
+	if (this_cpu->c_size_cache)
+		l2size = this_cpu->c_size_cache(c,l2size);
+
+	/* Allow user to override all this if necessary. */
+	if (cachesize_override != -1)
+		l2size = cachesize_override;
+
+	if ( l2size == 0 )
+		return;		/* Again, no L2 cache is possible */
+
+	c->x86_cache_size = l2size;
+
+	printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+	       l2size, ecx & 0xFF);
+}
+
+/* Naming convention should be: <Name> [(<Codename>)] */
+/* This table only is used unless init_<vendor>() below doesn't set it; */
+/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
+
+/* Look up CPU names by table lookup. */
+static char __init *table_lookup_model(struct cpuinfo_x86 *c)
+{
+	struct cpu_model_info *info;
+
+	if ( c->x86_model >= 16 )
+		return NULL;	/* Range check */
+
+	if (!this_cpu)
+		return NULL;
+
+	info = this_cpu->c_models;
+
+	while (info && info->family) {
+		if (info->family == c->x86)
+			return info->model_names[c->x86_model];
+		info++;
+	}
+	return NULL;		/* Not found */
+}
+
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+{
+	char *v = c->x86_vendor_id;
+	int i;
+
+	for (i = 0; i < X86_VENDOR_NUM; i++) {
+		if (cpu_devs[i]) {
+			if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
+			    (cpu_devs[i]->c_ident[1] && 
+			     !strcmp(v,cpu_devs[i]->c_ident[1]))) {
+				c->x86_vendor = i;
+				if (!early)
+					this_cpu = cpu_devs[i];
+				break;
+			}
+		}
+	}
+}
+
+
+static int __init x86_fxsr_setup(char * s)
+{
+	disable_x86_fxsr = 1;
+	return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+	u32 f1, f2;
+
+	asm("pushfl\n\t"
+	    "pushfl\n\t"
+	    "popl %0\n\t"
+	    "movl %0,%1\n\t"
+	    "xorl %2,%0\n\t"
+	    "pushl %0\n\t"
+	    "popfl\n\t"
+	    "pushfl\n\t"
+	    "popl %0\n\t"
+	    "popfl\n\t"
+	    : "=&r" (f1), "=&r" (f2)
+	    : "ir" (flag));
+
+	return ((f1^f2) & flag) != 0;
+}
+
+
+/* Probe for the CPUID instruction */
+static int __init have_cpuid_p(void)
+{
+	return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+/* Do minimum CPU detection early.
+   Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+   The others are not touched to avoid unwanted side effects. */
+static void __init early_cpu_detect(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	c->x86_cache_alignment = 32;
+
+	if (!have_cpuid_p())
+		return;
+
+	/* Get vendor name */
+	cpuid(0x00000000, &c->cpuid_level,
+	      (int *)&c->x86_vendor_id[0],
+	      (int *)&c->x86_vendor_id[8],
+	      (int *)&c->x86_vendor_id[4]);
+
+	get_cpu_vendor(c, 1);
+
+	c->x86 = 4;
+	if (c->cpuid_level >= 0x00000001) {
+		u32 junk, tfms, cap0, misc;
+		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
+		c->x86 = (tfms >> 8) & 15;
+		c->x86_model = (tfms >> 4) & 15;
+		if (c->x86 == 0xf) {
+			c->x86 += (tfms >> 20) & 0xff;
+			c->x86_model += ((tfms >> 16) & 0xF) << 4;
+		}
+		c->x86_mask = tfms & 15;
+		if (cap0 & (1<<19))
+			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
+	}
+
+	early_intel_workaround(c);
+}
+
+void __init generic_identify(struct cpuinfo_x86 * c)
+{
+	u32 tfms, xlvl;
+	int junk;
+
+	if (have_cpuid_p()) {
+		/* Get vendor name */
+		cpuid(0x00000000, &c->cpuid_level,
+		      (int *)&c->x86_vendor_id[0],
+		      (int *)&c->x86_vendor_id[8],
+		      (int *)&c->x86_vendor_id[4]);
+		
+		get_cpu_vendor(c, 0);
+		/* Initialize the standard set of capabilities */
+		/* Note that the vendor-specific code below might override */
+	
+		/* Intel-defined flags: level 0x00000001 */
+		if ( c->cpuid_level >= 0x00000001 ) {
+			u32 capability, excap;
+			cpuid(0x00000001, &tfms, &junk, &excap, &capability);
+			c->x86_capability[0] = capability;
+			c->x86_capability[4] = excap;
+			c->x86 = (tfms >> 8) & 15;
+			c->x86_model = (tfms >> 4) & 15;
+			if (c->x86 == 0xf) {
+				c->x86 += (tfms >> 20) & 0xff;
+				c->x86_model += ((tfms >> 16) & 0xF) << 4;
+			} 
+			c->x86_mask = tfms & 15;
+		} else {
+			/* Have CPUID level 0 only - unheard of */
+			c->x86 = 4;
+		}
+
+		/* AMD-defined flags: level 0x80000001 */
+		xlvl = cpuid_eax(0x80000000);
+		if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+			if ( xlvl >= 0x80000001 ) {
+				c->x86_capability[1] = cpuid_edx(0x80000001);
+				c->x86_capability[6] = cpuid_ecx(0x80000001);
+			}
+			if ( xlvl >= 0x80000004 )
+				get_model_name(c); /* Default name */
+		}
+	}
+}
+
+static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
+		/* Disable processor serial number */
+		unsigned long lo,hi;
+		rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+		lo |= 0x200000;
+		wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+		printk(KERN_NOTICE "CPU serial number disabled.\n");
+		clear_bit(X86_FEATURE_PN, c->x86_capability);
+
+		/* Disabling the serial number may affect the cpuid level */
+		c->cpuid_level = cpuid_eax(0);
+	}
+}
+
+static int __init x86_serial_nr_setup(char *s)
+{
+	disable_x86_serial_nr = 0;
+	return 1;
+}
+__setup("serialnumber", x86_serial_nr_setup);
+
+
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+	int i;
+
+	c->loops_per_jiffy = loops_per_jiffy;
+	c->x86_cache_size = -1;
+	c->x86_vendor = X86_VENDOR_UNKNOWN;
+	c->cpuid_level = -1;	/* CPUID not detected */
+	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
+	c->x86_vendor_id[0] = '\0'; /* Unset */
+	c->x86_model_id[0] = '\0';  /* Unset */
+	c->x86_num_cores = 1;
+	memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+	if (!have_cpuid_p()) {
+		/* First of all, decide if this is a 486 or higher */
+		/* It's a 486 if we can modify the AC flag */
+		if ( flag_is_changeable_p(X86_EFLAGS_AC) )
+			c->x86 = 4;
+		else
+			c->x86 = 3;
+	}
+
+	generic_identify(c);
+
+	printk(KERN_DEBUG "CPU: After generic identify, caps:");
+	for (i = 0; i < NCAPINTS; i++)
+		printk(" %08lx", c->x86_capability[i]);
+	printk("\n");
+
+	if (this_cpu->c_identify) {
+		this_cpu->c_identify(c);
+
+		printk(KERN_DEBUG "CPU: After vendor identify, caps:");
+		for (i = 0; i < NCAPINTS; i++)
+			printk(" %08lx", c->x86_capability[i]);
+		printk("\n");
+	}
+
+	/*
+	 * Vendor-specific initialization.  In this section we
+	 * canonicalize the feature flags, meaning if there are
+	 * features a certain CPU supports which CPUID doesn't
+	 * tell us, CPUID claiming incorrect flags, or other bugs,
+	 * we handle them here.
+	 *
+	 * At the end of this section, c->x86_capability better
+	 * indicate the features this CPU genuinely supports!
+	 */
+	if (this_cpu->c_init)
+		this_cpu->c_init(c);
+
+	/* Disable the PN if appropriate */
+	squash_the_stupid_serial_number(c);
+
+	/*
+	 * The vendor-specific functions might have changed features.  Now
+	 * we do "generic changes."
+	 */
+
+	/* TSC disabled? */
+	if ( tsc_disable )
+		clear_bit(X86_FEATURE_TSC, c->x86_capability);
+
+	/* FXSR disabled? */
+	if (disable_x86_fxsr) {
+		clear_bit(X86_FEATURE_FXSR, c->x86_capability);
+		clear_bit(X86_FEATURE_XMM, c->x86_capability);
+	}
+
+	if (disable_pse)
+		clear_bit(X86_FEATURE_PSE, c->x86_capability);
+
+	/* If the model name is still unset, do table lookup. */
+	if ( !c->x86_model_id[0] ) {
+		char *p;
+		p = table_lookup_model(c);
+		if ( p )
+			strcpy(c->x86_model_id, p);
+		else
+			/* Last resort... */
+			sprintf(c->x86_model_id, "%02x/%02x",
+				c->x86_vendor, c->x86_model);
+	}
+
+	/* Now the feature flags better reflect actual CPU features! */
+
+	printk(KERN_DEBUG "CPU: After all inits, caps:");
+	for (i = 0; i < NCAPINTS; i++)
+		printk(" %08lx", c->x86_capability[i]);
+	printk("\n");
+
+	/*
+	 * On SMP, boot_cpu_data holds the common feature set between
+	 * all CPUs; so make sure that we indicate which features are
+	 * common between the CPUs.  The first time this routine gets
+	 * executed, c == &boot_cpu_data.
+	 */
+	if ( c != &boot_cpu_data ) {
+		/* AND the already accumulated flags with these */
+		for ( i = 0 ; i < NCAPINTS ; i++ )
+			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+	}
+
+	/* Init Machine Check Exception if available. */
+#ifdef CONFIG_X86_MCE
+	mcheck_init(c);
+#endif
+}
+
+#ifdef CONFIG_X86_HT
+void __init detect_ht(struct cpuinfo_x86 *c)
+{
+	u32 	eax, ebx, ecx, edx;
+	int 	index_lsb, index_msb, tmp;
+	int 	cpu = smp_processor_id();
+
+	if (!cpu_has(c, X86_FEATURE_HT))
+		return;
+
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+	smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+	if (smp_num_siblings == 1) {
+		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+	} else if (smp_num_siblings > 1 ) {
+		index_lsb = 0;
+		index_msb = 31;
+
+		if (smp_num_siblings > NR_CPUS) {
+			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
+			smp_num_siblings = 1;
+			return;
+		}
+		tmp = smp_num_siblings;
+		while ((tmp & 1) == 0) {
+			tmp >>=1 ;
+			index_lsb++;
+		}
+		tmp = smp_num_siblings;
+		while ((tmp & 0x80000000 ) == 0) {
+			tmp <<=1 ;
+			index_msb--;
+		}
+		if (index_lsb != index_msb )
+			index_msb++;
+		phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+		       phys_proc_id[cpu]);
+	}
+}
+#endif
+
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+	char *vendor = NULL;
+
+	if (c->x86_vendor < X86_VENDOR_NUM)
+		vendor = this_cpu->c_vendor;
+	else if (c->cpuid_level >= 0)
+		vendor = c->x86_vendor_id;
+
+	if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+		printk("%s ", vendor);
+
+	if (!c->x86_model_id[0])
+		printk("%d86", c->x86);
+	else
+		printk("%s", c->x86_model_id);
+
+	if (c->x86_mask || c->cpuid_level >= 0) 
+		printk(" stepping %02x\n", c->x86_mask);
+	else
+		printk("\n");
+}
+
+cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
+
+/* This is hacky. :)
+ * We're emulating future behavior.
+ * In the future, the cpu-specific init functions will be called implicitly
+ * via the magic of initcalls.
+ * They will insert themselves into the cpu_devs structure.
+ * Then, when cpu_init() is called, we can just iterate over that array.
+ */
+
+extern int intel_cpu_init(void);
+extern int cyrix_init_cpu(void);
+extern int nsc_init_cpu(void);
+extern int amd_init_cpu(void);
+extern int centaur_init_cpu(void);
+extern int transmeta_init_cpu(void);
+extern int rise_init_cpu(void);
+extern int nexgen_init_cpu(void);
+extern int umc_init_cpu(void);
+
+void __init early_cpu_init(void)
+{
+	intel_cpu_init();
+	cyrix_init_cpu();
+	nsc_init_cpu();
+	amd_init_cpu();
+	centaur_init_cpu();
+	transmeta_init_cpu();
+	rise_init_cpu();
+	nexgen_init_cpu();
+	umc_init_cpu();
+	early_cpu_detect();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	/* pse is not compatible with on-the-fly unmapping,
+	 * disable it even if the cpus claim to support it.
+	 */
+	clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+	disable_pse = 1;
+#endif
+}
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __init cpu_init (void)
+{
+	int cpu = smp_processor_id();
+	struct tss_struct * t = &per_cpu(init_tss, cpu);
+	struct thread_struct *thread = &current->thread;
+	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
+
+	if (cpu_test_and_set(cpu, cpu_initialized)) {
+		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
+		for (;;) local_irq_enable();
+	}
+	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+	if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
+		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+	if (tsc_disable && cpu_has_tsc) {
+		printk(KERN_NOTICE "Disabling TSC...\n");
+		/**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
+		clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+		set_in_cr4(X86_CR4_TSD);
+	}
+
+	/*
+	 * Initialize the per-CPU GDT with the boot GDT,
+	 * and set up the GDT descriptor:
+	 */
+	memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table,
+	       GDT_SIZE);
+
+	/* Set up GDT entry for 16bit stack */
+	*(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |=
+		((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
+		((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
+		(CPU_16BIT_STACK_SIZE - 1);
+
+	cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
+	cpu_gdt_descr[cpu].address =
+	    (unsigned long)&per_cpu(cpu_gdt_table, cpu);
+
+	/*
+	 * Set up the per-thread TLS descriptor cache:
+	 */
+	memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu),
+		GDT_ENTRY_TLS_ENTRIES * 8);
+
+	__asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu]));
+	__asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+
+	/*
+	 * Delete NT
+	 */
+	__asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+
+	/*
+	 * Set up and load the per-CPU TSS and LDT
+	 */
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	if (current->mm)
+		BUG();
+	enter_lazy_tlb(&init_mm, current);
+
+	load_esp0(t, thread);
+	set_tss_desc(cpu,t);
+	load_TR_desc();
+	load_LDT(&init_mm.context);
+
+	/* Set up doublefault TSS pointer in the GDT */
+	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+
+	/* Clear %fs and %gs. */
+	asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+
+	/* Clear all 6 debug registers: */
+
+#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
+
+	CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
+
+#undef CD
+
+	/*
+	 * Force FPU initialization:
+	 */
+	current_thread_info()->status = 0;
+	clear_used_math();
+	mxcsr_feature_mask_init();
+}
diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h
new file mode 100644
index 00000000000..5a1d4f163e8
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpu.h
@@ -0,0 +1,30 @@
+
+struct cpu_model_info {
+	int vendor;
+	int family;
+	char *model_names[16];
+};
+
+/* attempt to consolidate cpu attributes */
+struct cpu_dev {
+	char	* c_vendor;
+
+	/* some have two possibilities for cpuid string */
+	char	* c_ident[2];	
+
+	struct		cpu_model_info c_models[4];
+
+	void		(*c_init)(struct cpuinfo_x86 * c);
+	void		(*c_identify)(struct cpuinfo_x86 * c);
+	unsigned int	(*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
+};
+
+extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
+
+extern int get_model_name(struct cpuinfo_x86 *c);
+extern void display_cacheinfo(struct cpuinfo_x86 *c);
+
+extern void generic_identify(struct cpuinfo_x86 * c);
+
+extern void early_intel_workaround(struct cpuinfo_x86 *c);
+
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
new file mode 100644
index 00000000000..f25ffd74235
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/Kconfig
@@ -0,0 +1,231 @@
+#
+# CPU Frequency scaling
+#
+
+menu "CPU Frequency scaling"
+
+source "drivers/cpufreq/Kconfig"
+
+if CPU_FREQ
+
+comment "CPUFreq processor drivers"
+
+config X86_ACPI_CPUFREQ
+	tristate "ACPI Processor P-States driver"
+	select CPU_FREQ_TABLE
+	depends on ACPI_PROCESSOR
+	help
+	  This driver adds a CPUFreq driver which utilizes the ACPI
+	  Processor Performance States.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config ELAN_CPUFREQ
+	tristate "AMD Elan"
+	select CPU_FREQ_TABLE
+	depends on X86_ELAN
+	---help---
+	  This adds the CPUFreq driver for AMD Elan SC400 and SC410
+	  processors.
+
+	  You need to specify the processor maximum speed as boot
+	  parameter: elanfreq=maxspeed (in kHz) or as module
+	  parameter "max_freq".
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K6
+	tristate "AMD Mobile K6-2/K6-3 PowerNow!"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
+	  AMD K6-3+ processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K7
+	tristate "AMD Mobile Athlon/Duron PowerNow!"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for mobile AMD K7 mobile processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K7_ACPI
+	bool
+	depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
+	depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
+	default y
+
+config X86_POWERNOW_K8
+	tristate "AMD Opteron/Athlon64 PowerNow!"
+	select CPU_FREQ_TABLE
+	depends on EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K8_ACPI
+	bool
+	depends on X86_POWERNOW_K8 && ACPI_PROCESSOR
+	depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
+	default y
+
+config X86_GX_SUSPMOD
+	tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
+	help
+	 This add the CPUFreq driver for NatSemi Geode processors which
+	 support suspend modulation.
+
+	 For details, take a look at <file:Documentation/cpu-freq/>.
+
+	 If in doubt, say N.
+
+config X86_SPEEDSTEP_CENTRINO
+	tristate "Intel Enhanced SpeedStep"
+	select CPU_FREQ_TABLE
+	select X86_SPEEDSTEP_CENTRINO_TABLE if (!X86_SPEEDSTEP_CENTRINO_ACPI)
+	help
+	  This adds the CPUFreq driver for Enhanced SpeedStep enabled
+	  mobile CPUs.  This means Intel Pentium M (Centrino) CPUs. However,
+	  you also need to say Y to "Use ACPI tables to decode..." below
+	  [which might imply enabling ACPI] if you want to use this driver
+	  on non-Banias CPUs.
+	  
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+	  
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_CENTRINO_ACPI
+	bool "Use ACPI tables to decode valid frequency/voltage pairs"
+	depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR
+	depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m)
+	default y
+	help
+	  Use primarily the information provided in the BIOS ACPI tables
+	  to determine valid CPU frequency and voltage pairings. It is
+	  required for the driver to work on non-Banias CPUs.
+
+	  If in doubt, say Y.
+
+config X86_SPEEDSTEP_CENTRINO_TABLE
+	bool "Built-in tables for Banias CPUs"
+	depends on X86_SPEEDSTEP_CENTRINO
+	default y
+	help
+	  Use built-in tables for Banias CPUs if ACPI encoding
+	  is not available.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_ICH
+	tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for certain mobile Intel Pentium III
+	  (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
+	  mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, 
+	  ICH3 or ICH4 southbridge.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_SMI
+	tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
+	select CPU_FREQ_TABLE
+	depends on EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for certain mobile Intel Pentium III
+	  (Coppermine), all mobile Intel Pentium III-M (Tualatin)  
+	  on systems which have an Intel 440BX/ZX/MX southbridge.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_P4_CLOCKMOD
+	tristate "Intel Pentium 4 clock modulation"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for Intel Pentium 4 / XEON
+	  processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_CPUFREQ_NFORCE2
+	tristate "nVidia nForce2 FSB changing"
+	depends on EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for FSB changing on nVidia nForce2
+	  platforms.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_LONGRUN
+	tristate "Transmeta LongRun"
+	help
+	  This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
+	  which support LongRun.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_LONGHAUL
+	tristate "VIA Cyrix III Longhaul"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for VIA Samuel/CyrixIII, 
+	  VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T 
+	  processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+comment "shared options"
+
+config X86_ACPI_CPUFREQ_PROC_INTF
+        bool "/proc/acpi/processor/../performance interface (deprecated)"
+	depends on PROC_FS
+	depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
+	help
+	  This enables the deprecated /proc/acpi/processor/../performance 
+	  interface. While it is helpful for debugging, the generic,
+	  cross-architecture cpufreq interfaces should be used.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_LIB
+	tristate
+	default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD
+
+config X86_SPEEDSTEP_RELAXED_CAP_CHECK
+	bool "Relaxed speedstep capability checks"
+	depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
+	help
+	  Don't perform all checks for a speedstep capable system which would 
+	  normally be done. Some ancient or strange systems, though speedstep 
+	  capable, don't always indicate that they are speedstep capable. This 
+	  option lets the probing code bypass some of those checks if the
+	  parameter "relaxed_check=1" is passed to the module.
+
+endif	# CPU_FREQ
+
+endmenu
diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile
new file mode 100644
index 00000000000..a922e97aeed
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/Makefile
@@ -0,0 +1,14 @@
+obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
+obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
+obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o
+obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
+obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
+obj-$(CONFIG_X86_LONGRUN)		+= longrun.o  
+obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
+obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
+obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
+obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
+obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o
+obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
+obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
new file mode 100644
index 00000000000..963e17aa205
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -0,0 +1,537 @@
+/*
+ * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.3 $)
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/uaccess.h>
+
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+
+#include "speedstep-est-common.h"
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
+
+MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
+MODULE_DESCRIPTION("ACPI Processor P-States Driver");
+MODULE_LICENSE("GPL");
+
+
+struct cpufreq_acpi_io {
+	struct acpi_processor_performance	acpi_data;
+	struct cpufreq_frequency_table		*freq_table;
+	unsigned int				resume;
+};
+
+static struct cpufreq_acpi_io	*acpi_io_data[NR_CPUS];
+
+static struct cpufreq_driver acpi_cpufreq_driver;
+
+static int
+acpi_processor_write_port(
+	u16	port,
+	u8	bit_width,
+	u32	value)
+{
+	if (bit_width <= 8) {
+		outb(value, port);
+	} else if (bit_width <= 16) {
+		outw(value, port);
+	} else if (bit_width <= 32) {
+		outl(value, port);
+	} else {
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int
+acpi_processor_read_port(
+	u16	port,
+	u8	bit_width,
+	u32	*ret)
+{
+	*ret = 0;
+	if (bit_width <= 8) {
+		*ret = inb(port);
+	} else if (bit_width <= 16) {
+		*ret = inw(port);
+	} else if (bit_width <= 32) {
+		*ret = inl(port);
+	} else {
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int
+acpi_processor_set_performance (
+	struct cpufreq_acpi_io	*data,
+	unsigned int		cpu,
+	int			state)
+{
+	u16			port = 0;
+	u8			bit_width = 0;
+	int			ret = 0;
+	u32			value = 0;
+	int			i = 0;
+	struct cpufreq_freqs    cpufreq_freqs;
+	cpumask_t		saved_mask;
+	int			retval;
+
+	dprintk("acpi_processor_set_performance\n");
+
+	/*
+	 * TBD: Use something other than set_cpus_allowed.
+	 * As set_cpus_allowed is a bit racy, 
+	 * with any other set_cpus_allowed for this process.
+	 */
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (smp_processor_id() != cpu) {
+		return (-EAGAIN);
+	}
+	
+	if (state == data->acpi_data.state) {
+		if (unlikely(data->resume)) {
+			dprintk("Called after resume, resetting to P%d\n", state);
+			data->resume = 0;
+		} else {
+			dprintk("Already at target state (P%d)\n", state);
+			retval = 0;
+			goto migrate_end;
+		}
+	}
+
+	dprintk("Transitioning from P%d to P%d\n",
+		data->acpi_data.state, state);
+
+	/* cpufreq frequency struct */
+	cpufreq_freqs.cpu = cpu;
+	cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency;
+	cpufreq_freqs.new = data->freq_table[state].frequency;
+
+	/* notify cpufreq */
+	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
+
+	/*
+	 * First we write the target state's 'control' value to the
+	 * control_register.
+	 */
+
+	port = data->acpi_data.control_register.address;
+	bit_width = data->acpi_data.control_register.bit_width;
+	value = (u32) data->acpi_data.states[state].control;
+
+	dprintk("Writing 0x%08x to port 0x%04x\n", value, port);
+
+	ret = acpi_processor_write_port(port, bit_width, value);
+	if (ret) {
+		dprintk("Invalid port width 0x%04x\n", bit_width);
+		retval = ret;
+		goto migrate_end;
+	}
+
+	/*
+	 * Then we read the 'status_register' and compare the value with the
+	 * target state's 'status' to make sure the transition was successful.
+	 * Note that we'll poll for up to 1ms (100 cycles of 10us) before
+	 * giving up.
+	 */
+
+	port = data->acpi_data.status_register.address;
+	bit_width = data->acpi_data.status_register.bit_width;
+
+	dprintk("Looking for 0x%08x from port 0x%04x\n",
+		(u32) data->acpi_data.states[state].status, port);
+
+	for (i=0; i<100; i++) {
+		ret = acpi_processor_read_port(port, bit_width, &value);
+		if (ret) {	
+			dprintk("Invalid port width 0x%04x\n", bit_width);
+			retval = ret;
+			goto migrate_end;
+		}
+		if (value == (u32) data->acpi_data.states[state].status)
+			break;
+		udelay(10);
+	}
+
+	/* notify cpufreq */
+	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+
+	if (value != (u32) data->acpi_data.states[state].status) {
+		unsigned int tmp = cpufreq_freqs.new;
+		cpufreq_freqs.new = cpufreq_freqs.old;
+		cpufreq_freqs.old = tmp;
+		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
+		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+		printk(KERN_WARNING "acpi-cpufreq: Transition failed\n");
+		retval = -ENODEV;
+		goto migrate_end;
+	}
+
+	dprintk("Transition successful after %d microseconds\n", i * 10);
+
+	data->acpi_data.state = state;
+
+	retval = 0;
+migrate_end:
+	set_cpus_allowed(current, saved_mask);
+	return (retval);
+}
+
+
+static int
+acpi_cpufreq_target (
+	struct cpufreq_policy   *policy,
+	unsigned int target_freq,
+	unsigned int relation)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+	unsigned int next_state = 0;
+	unsigned int result = 0;
+
+	dprintk("acpi_cpufreq_setpolicy\n");
+
+	result = cpufreq_frequency_table_target(policy,
+			data->freq_table,
+			target_freq,
+			relation,
+			&next_state);
+	if (result)
+		return (result);
+
+	result = acpi_processor_set_performance (data, policy->cpu, next_state);
+
+	return (result);
+}
+
+
+static int
+acpi_cpufreq_verify (
+	struct cpufreq_policy   *policy)
+{
+	unsigned int result = 0;
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+
+	dprintk("acpi_cpufreq_verify\n");
+
+	result = cpufreq_frequency_table_verify(policy, 
+			data->freq_table);
+
+	return (result);
+}
+
+
+static unsigned long
+acpi_cpufreq_guess_freq (
+	struct cpufreq_acpi_io	*data,
+	unsigned int		cpu)
+{
+	if (cpu_khz) {
+		/* search the closest match to cpu_khz */
+		unsigned int i;
+		unsigned long freq;
+		unsigned long freqn = data->acpi_data.states[0].core_frequency * 1000;
+
+		for (i=0; i < (data->acpi_data.state_count - 1); i++) {
+			freq = freqn;
+			freqn = data->acpi_data.states[i+1].core_frequency * 1000;
+			if ((2 * cpu_khz) > (freqn + freq)) {
+				data->acpi_data.state = i;
+				return (freq);
+			}
+		}
+		data->acpi_data.state = data->acpi_data.state_count - 1;
+		return (freqn);
+	} else
+		/* assume CPU is at P0... */
+		data->acpi_data.state = 0;
+		return data->acpi_data.states[0].core_frequency * 1000;
+	
+}
+
+
+/* 
+ * acpi_processor_cpu_init_pdc_est - let BIOS know about the SMP capabilities
+ * of this driver
+ * @perf: processor-specific acpi_io_data struct
+ * @cpu: CPU being initialized
+ *
+ * To avoid issues with legacy OSes, some BIOSes require to be informed of
+ * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC 
+ * accordingly, for Enhanced Speedstep. Actual call to _PDC is done in
+ * driver/acpi/processor.c
+ */
+static void 
+acpi_processor_cpu_init_pdc_est(
+		struct acpi_processor_performance *perf, 
+		unsigned int cpu,
+		struct acpi_object_list *obj_list
+		)
+{
+	union acpi_object *obj;
+	u32 *buf;
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+	dprintk("acpi_processor_cpu_init_pdc_est\n");
+
+	if (!cpu_has(c, X86_FEATURE_EST))
+		return;
+
+	/* Initialize pdc. It will be used later. */
+	if (!obj_list)
+		return;
+		
+	if (!(obj_list->count && obj_list->pointer))
+		return;
+
+	obj = obj_list->pointer;
+	if ((obj->buffer.length == 12) && obj->buffer.pointer) {
+		buf = (u32 *)obj->buffer.pointer;
+       		buf[0] = ACPI_PDC_REVISION_ID;
+       		buf[1] = 1;
+       		buf[2] = ACPI_PDC_EST_CAPABILITY_SMP;
+		perf->pdc = obj_list;
+	}
+	return;
+}
+ 
+
+/* CPU specific PDC initialization */
+static void 
+acpi_processor_cpu_init_pdc(
+		struct acpi_processor_performance *perf, 
+		unsigned int cpu,
+		struct acpi_object_list *obj_list
+		)
+{
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+	dprintk("acpi_processor_cpu_init_pdc\n");
+	perf->pdc = NULL;
+	if (cpu_has(c, X86_FEATURE_EST))
+		acpi_processor_cpu_init_pdc_est(perf, cpu, obj_list);
+	return;
+}
+
+
+static int
+acpi_cpufreq_cpu_init (
+	struct cpufreq_policy   *policy)
+{
+	unsigned int		i;
+	unsigned int		cpu = policy->cpu;
+	struct cpufreq_acpi_io	*data;
+	unsigned int		result = 0;
+
+	union acpi_object		arg0 = {ACPI_TYPE_BUFFER};
+	u32				arg0_buf[3];
+	struct acpi_object_list 	arg_list = {1, &arg0};
+
+	dprintk("acpi_cpufreq_cpu_init\n");
+	/* setup arg_list for _PDC settings */
+        arg0.buffer.length = 12;
+        arg0.buffer.pointer = (u8 *) arg0_buf;
+
+	data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
+	if (!data)
+		return (-ENOMEM);
+	memset(data, 0, sizeof(struct cpufreq_acpi_io));
+
+	acpi_io_data[cpu] = data;
+
+	acpi_processor_cpu_init_pdc(&data->acpi_data, cpu, &arg_list);
+	result = acpi_processor_register_performance(&data->acpi_data, cpu);
+	data->acpi_data.pdc = NULL;
+
+	if (result)
+		goto err_free;
+
+	if (is_const_loops_cpu(cpu)) {
+		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
+	}
+
+	/* capability check */
+	if (data->acpi_data.state_count <= 1) {
+		dprintk("No P-States\n");
+		result = -ENODEV;
+		goto err_unreg;
+	}
+	if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) ||
+	    (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
+		dprintk("Unsupported address space [%d, %d]\n",
+			(u32) (data->acpi_data.control_register.space_id),
+			(u32) (data->acpi_data.status_register.space_id));
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	/* alloc freq_table */
+	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (data->acpi_data.state_count + 1), GFP_KERNEL);
+	if (!data->freq_table) {
+		result = -ENOMEM;
+		goto err_unreg;
+	}
+
+	/* detect transition latency */
+	policy->cpuinfo.transition_latency = 0;
+	for (i=0; i<data->acpi_data.state_count; i++) {
+		if ((data->acpi_data.states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency)
+			policy->cpuinfo.transition_latency = data->acpi_data.states[i].transition_latency * 1000;
+	}
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+
+	/* The current speed is unknown and not detectable by ACPI...  */
+	policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
+
+	/* table init */
+	for (i=0; i<=data->acpi_data.state_count; i++)
+	{
+		data->freq_table[i].index = i;
+		if (i<data->acpi_data.state_count)
+			data->freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+		else
+			data->freq_table[i].frequency = CPUFREQ_TABLE_END;
+	}
+
+	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
+	if (result) {
+		goto err_freqfree;
+	}
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management activated.\n",
+	       cpu);
+	for (i = 0; i < data->acpi_data.state_count; i++)
+		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
+			(i == data->acpi_data.state?'*':' '), i,
+			(u32) data->acpi_data.states[i].core_frequency,
+			(u32) data->acpi_data.states[i].power,
+			(u32) data->acpi_data.states[i].transition_latency);
+
+	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
+	return (result);
+
+ err_freqfree:
+	kfree(data->freq_table);
+ err_unreg:
+	acpi_processor_unregister_performance(&data->acpi_data, cpu);
+ err_free:
+	kfree(data);
+	acpi_io_data[cpu] = NULL;
+
+	return (result);
+}
+
+
+static int
+acpi_cpufreq_cpu_exit (
+	struct cpufreq_policy   *policy)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+
+
+	dprintk("acpi_cpufreq_cpu_exit\n");
+
+	if (data) {
+		cpufreq_frequency_table_put_attr(policy->cpu);
+		acpi_io_data[policy->cpu] = NULL;
+		acpi_processor_unregister_performance(&data->acpi_data, policy->cpu);
+		kfree(data);
+	}
+
+	return (0);
+}
+
+static int
+acpi_cpufreq_resume (
+	struct cpufreq_policy   *policy)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+
+
+	dprintk("acpi_cpufreq_resume\n");
+
+	data->resume = 1;
+
+	return (0);
+}
+
+
+static struct freq_attr* acpi_cpufreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver acpi_cpufreq_driver = {
+	.verify 	= acpi_cpufreq_verify,
+	.target 	= acpi_cpufreq_target,
+	.init		= acpi_cpufreq_cpu_init,
+	.exit		= acpi_cpufreq_cpu_exit,
+	.resume		= acpi_cpufreq_resume,
+	.name		= "acpi-cpufreq",
+	.owner		= THIS_MODULE,
+	.attr           = acpi_cpufreq_attr,
+};
+
+
+static int __init
+acpi_cpufreq_init (void)
+{
+	int                     result = 0;
+
+	dprintk("acpi_cpufreq_init\n");
+
+ 	result = cpufreq_register_driver(&acpi_cpufreq_driver);
+	
+	return (result);
+}
+
+
+static void __exit
+acpi_cpufreq_exit (void)
+{
+	dprintk("acpi_cpufreq_exit\n");
+
+	cpufreq_unregister_driver(&acpi_cpufreq_driver);
+
+	return;
+}
+
+
+late_initcall(acpi_cpufreq_init);
+module_exit(acpi_cpufreq_exit);
+
+MODULE_ALIAS("acpi");
diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
new file mode 100644
index 00000000000..04a40534520
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -0,0 +1,457 @@
+/*
+ * (C) 2004  Sebastian Witt <se.witt@gmx.net>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon reverse engineered information
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#define NFORCE2_XTAL 25
+#define NFORCE2_BOOTFSB 0x48
+#define NFORCE2_PLLENABLE 0xa8
+#define NFORCE2_PLLREG 0xa4
+#define NFORCE2_PLLADR 0xa0
+#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
+
+#define NFORCE2_MIN_FSB 50
+#define NFORCE2_SAFE_DISTANCE 50
+
+/* Delay in ms between FSB changes */
+//#define NFORCE2_DELAY 10
+
+/* nforce2_chipset:
+ * FSB is changed using the chipset
+ */
+static struct pci_dev *nforce2_chipset_dev;
+
+/* fid:
+ * multiplier * 10
+ */
+static int fid = 0;
+
+/* min_fsb, max_fsb:
+ * minimum and maximum FSB (= FSB at boot time) 
+ */
+static int min_fsb = 0;
+static int max_fsb = 0;
+
+MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
+MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
+MODULE_LICENSE("GPL");
+
+module_param(fid, int, 0444);
+module_param(min_fsb, int, 0444);
+
+MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
+MODULE_PARM_DESC(min_fsb,
+                 "Minimum FSB to use, if not defined: current FSB - 50");
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
+
+/*
+ * nforce2_calc_fsb - calculate FSB
+ * @pll: PLL value
+ * 
+ *   Calculates FSB from PLL value
+ */
+static int nforce2_calc_fsb(int pll)
+{
+	unsigned char mul, div;
+
+	mul = (pll >> 8) & 0xff;
+	div = pll & 0xff;
+
+	if (div > 0)
+		return NFORCE2_XTAL * mul / div;
+
+	return 0;
+}
+
+/*
+ * nforce2_calc_pll - calculate PLL value
+ * @fsb: FSB
+ * 
+ *   Calculate PLL value for given FSB
+ */
+static int nforce2_calc_pll(unsigned int fsb)
+{
+	unsigned char xmul, xdiv;
+	unsigned char mul = 0, div = 0;
+	int tried = 0;
+
+	/* Try to calculate multiplier and divider up to 4 times */
+	while (((mul == 0) || (div == 0)) && (tried <= 3)) {
+		for (xdiv = 1; xdiv <= 0x80; xdiv++)
+			for (xmul = 1; xmul <= 0xfe; xmul++)
+				if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
+				    fsb + tried) {
+					mul = xmul;
+					div = xdiv;
+				}
+		tried++;
+	}
+
+	if ((mul == 0) || (div == 0))
+		return -1;
+
+	return NFORCE2_PLL(mul, div);
+}
+
+/*
+ * nforce2_write_pll - write PLL value to chipset
+ * @pll: PLL value
+ * 
+ *   Writes new FSB PLL value to chipset
+ */
+static void nforce2_write_pll(int pll)
+{
+	int temp;
+
+	/* Set the pll addr. to 0x00 */
+	temp = 0x00;
+	pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp);
+
+	/* Now write the value in all 64 registers */
+	for (temp = 0; temp <= 0x3f; temp++) {
+		pci_write_config_dword(nforce2_chipset_dev, 
+                                       NFORCE2_PLLREG, pll);
+	}
+
+	return;
+}
+
+/*
+ * nforce2_fsb_read - Read FSB
+ *
+ *   Read FSB from chipset
+ *   If bootfsb != 0, return FSB at boot-time
+ */
+static unsigned int nforce2_fsb_read(int bootfsb)
+{
+	struct pci_dev *nforce2_sub5;
+	u32 fsb, temp = 0;
+
+	
+	/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
+	nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
+                                      0x01EF,
+                                      PCI_ANY_ID,
+                                      PCI_ANY_ID,
+                                      NULL);
+	
+	if (!nforce2_sub5)
+		return 0;
+
+	pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
+	fsb /= 1000000;
+	
+	/* Check if PLL register is already set */
+	pci_read_config_byte(nforce2_chipset_dev, 
+                             NFORCE2_PLLENABLE, (u8 *)&temp);
+	
+	if(bootfsb || !temp)
+		return fsb;
+		
+	/* Use PLL register FSB value */
+	pci_read_config_dword(nforce2_chipset_dev, 
+                              NFORCE2_PLLREG, &temp);
+	fsb = nforce2_calc_fsb(temp);
+
+	return fsb;
+}
+
+/*
+ * nforce2_set_fsb - set new FSB
+ * @fsb: New FSB
+ * 
+ *   Sets new FSB
+ */
+static int nforce2_set_fsb(unsigned int fsb)
+{
+	u32 pll, temp = 0;
+	unsigned int tfsb;
+	int diff;
+
+	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
+		printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb);
+		return -EINVAL;
+	}
+	
+	tfsb = nforce2_fsb_read(0);
+	if (!tfsb) {
+		printk(KERN_ERR "cpufreq: Error while reading the FSB\n");
+		return -EINVAL;
+	}
+
+	/* First write? Then set actual value */
+	pci_read_config_byte(nforce2_chipset_dev, 
+                             NFORCE2_PLLENABLE, (u8 *)&temp);
+	if (!temp) {
+		pll = nforce2_calc_pll(tfsb);
+
+		if (pll < 0)
+			return -EINVAL;
+
+		nforce2_write_pll(pll);
+	}
+
+	/* Enable write access */
+	temp = 0x01;
+	pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp);
+
+	diff = tfsb - fsb;
+
+	if (!diff)
+		return 0;
+
+	while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
+		if (diff < 0)
+			tfsb++;
+		else
+			tfsb--;
+
+		/* Calculate the PLL reg. value */
+		if ((pll = nforce2_calc_pll(tfsb)) == -1)
+			return -EINVAL;
+		
+		nforce2_write_pll(pll);
+#ifdef NFORCE2_DELAY
+		mdelay(NFORCE2_DELAY);
+#endif
+	}
+
+	temp = 0x40;
+	pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp);
+
+	return 0;
+}
+
+/**
+ * nforce2_get - get the CPU frequency
+ * @cpu: CPU number
+ * 
+ * Returns the CPU frequency
+ */
+static unsigned int nforce2_get(unsigned int cpu)
+{
+	if (cpu)
+		return 0;
+	return nforce2_fsb_read(0) * fid * 100;
+}
+
+/**
+ * nforce2_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int nforce2_target(struct cpufreq_policy *policy,
+			  unsigned int target_freq, unsigned int relation)
+{
+//        unsigned long         flags;
+	struct cpufreq_freqs freqs;
+	unsigned int target_fsb;
+
+	if ((target_freq > policy->max) || (target_freq < policy->min))
+		return -EINVAL;
+
+	target_fsb = target_freq / (fid * 100);
+
+	freqs.old = nforce2_get(policy->cpu);
+	freqs.new = target_fsb * fid * 100;
+	freqs.cpu = 0;		/* Only one CPU on nForce2 plattforms */
+
+	if (freqs.old == freqs.new)
+		return 0;
+
+	dprintk(KERN_INFO "cpufreq: Old CPU frequency %d kHz, new %d kHz\n",
+	       freqs.old, freqs.new);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* Disable IRQs */
+	//local_irq_save(flags);
+
+	if (nforce2_set_fsb(target_fsb) < 0)
+		printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
+                       target_fsb);
+	else
+		dprintk(KERN_INFO "cpufreq: Changed FSB successfully to %d\n",
+                       target_fsb);
+
+	/* Enable IRQs */
+	//local_irq_restore(flags);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return 0;
+}
+
+/**
+ * nforce2_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ */
+static int nforce2_verify(struct cpufreq_policy *policy)
+{
+	unsigned int fsb_pol_max;
+
+	fsb_pol_max = policy->max / (fid * 100);
+
+	if (policy->min < (fsb_pol_max * fid * 100))
+		policy->max = (fsb_pol_max + 1) * fid * 100;
+
+	cpufreq_verify_within_limits(policy,
+                                     policy->cpuinfo.min_freq,
+                                     policy->cpuinfo.max_freq);
+	return 0;
+}
+
+static int nforce2_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int fsb;
+	unsigned int rfid;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* Get current FSB */
+	fsb = nforce2_fsb_read(0);
+
+	if (!fsb)
+		return -EIO;
+
+	/* FIX: Get FID from CPU */
+	if (!fid) {
+		if (!cpu_khz) {
+			printk(KERN_WARNING
+			       "cpufreq: cpu_khz not set, can't calculate multiplier!\n");
+			return -ENODEV;
+		}
+
+		fid = cpu_khz / (fsb * 100);
+		rfid = fid % 5;
+
+		if (rfid) {
+			if (rfid > 2)
+				fid += 5 - rfid;
+			else
+				fid -= rfid;
+		}
+	}
+
+	printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb,
+	       fid / 10, fid % 10);
+	
+	/* Set maximum FSB to FSB at boot time */
+	max_fsb = nforce2_fsb_read(1);
+	
+	if(!max_fsb)
+		return -EIO;
+
+	if (!min_fsb)
+		min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
+
+	if (min_fsb < NFORCE2_MIN_FSB)
+		min_fsb = NFORCE2_MIN_FSB;
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.min_freq = min_fsb * fid * 100;
+	policy->cpuinfo.max_freq = max_fsb * fid * 100;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = nforce2_get(policy->cpu);
+	policy->min = policy->cpuinfo.min_freq;
+	policy->max = policy->cpuinfo.max_freq;
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+
+	return 0;
+}
+
+static int nforce2_cpu_exit(struct cpufreq_policy *policy)
+{
+	return 0;
+}
+
+static struct cpufreq_driver nforce2_driver = {
+	.name = "nforce2",
+	.verify = nforce2_verify,
+	.target = nforce2_target,
+	.get = nforce2_get,
+	.init = nforce2_cpu_init,
+	.exit = nforce2_cpu_exit,
+	.owner = THIS_MODULE,
+};
+
+/**
+ * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
+ *
+ * Detects nForce2 A2 and C1 stepping
+ * 
+ */
+static unsigned int nforce2_detect_chipset(void)
+{
+	u8 revision;
+
+	nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
+                                             PCI_DEVICE_ID_NVIDIA_NFORCE2,
+                                             PCI_ANY_ID,
+                                             PCI_ANY_ID,
+                                             NULL);
+
+	if (nforce2_chipset_dev == NULL)
+		return -ENODEV;
+
+	pci_read_config_byte(nforce2_chipset_dev, PCI_REVISION_ID, &revision);
+
+	printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n",
+	       revision);
+	printk(KERN_INFO
+	       "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n");
+
+	return 0;
+}
+
+/**
+ * nforce2_init - initializes the nForce2 CPUFreq driver
+ *
+ * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
+ * devices, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init nforce2_init(void)
+{
+	/* TODO: do we need to detect the processor? */
+
+	/* detect chipset */
+	if (nforce2_detect_chipset()) {
+		printk(KERN_ERR "cpufreq: No nForce2 chipset.\n");
+		return -ENODEV;
+	}
+
+	return cpufreq_register_driver(&nforce2_driver);
+}
+
+/**
+ * nforce2_exit - unregisters cpufreq module
+ *
+ *   Unregisters nForce2 FSB change support.
+ */
+static void __exit nforce2_exit(void)
+{
+	cpufreq_unregister_driver(&nforce2_driver);
+}
+
+module_init(nforce2_init);
+module_exit(nforce2_exit);
+
diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c
new file mode 100644
index 00000000000..3f7caa4ae6d
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/elanfreq.c
@@ -0,0 +1,312 @@
+/*
+ * 	elanfreq: 	cpufreq driver for the AMD ELAN family
+ *
+ *	(c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
+ *
+ *	Parts of this code are (c) Sven Geggus <sven@geggus.net> 
+ *
+ *      All Rights Reserved. 
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version. 
+ *
+ *	2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+
+#define REG_CSCIR 0x22 		/* Chip Setup and Control Index Register    */
+#define REG_CSCDR 0x23		/* Chip Setup and Control Data  Register    */
+
+/* Module parameter */
+static int max_freq;
+
+struct s_elan_multiplier {
+	int clock;		/* frequency in kHz                         */
+	int val40h;		/* PMU Force Mode register                  */
+	int val80h;		/* CPU Clock Speed Register                 */
+};
+
+/*
+ * It is important that the frequencies 
+ * are listed in ascending order here!
+ */
+struct s_elan_multiplier elan_multiplier[] = {
+	{1000,	0x02,	0x18},
+	{2000,	0x02,	0x10},
+	{4000,	0x02,	0x08},
+	{8000,	0x00,	0x00},
+	{16000,	0x00,	0x02},
+	{33000,	0x00,	0x04},
+	{66000,	0x01,	0x04},
+	{99000,	0x01,	0x05}
+};
+
+static struct cpufreq_frequency_table elanfreq_table[] = {
+	{0,	1000},
+	{1,	2000},
+	{2,	4000},
+	{3,	8000},
+	{4,	16000},
+	{5,	33000},
+	{6,	66000},
+	{7,	99000},
+	{0,	CPUFREQ_TABLE_END},
+};
+
+
+/**
+ *	elanfreq_get_cpu_frequency: determine current cpu speed
+ *
+ *	Finds out at which frequency the CPU of the Elan SOC runs
+ *	at the moment. Frequencies from 1 to 33 MHz are generated 
+ *	the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
+ *	and have the rest of the chip running with 33 MHz. 
+ */
+
+static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
+{
+        u8 clockspeed_reg;    /* Clock Speed Register */
+	
+	local_irq_disable();
+        outb_p(0x80,REG_CSCIR);
+        clockspeed_reg = inb_p(REG_CSCDR);
+	local_irq_enable();
+
+        if ((clockspeed_reg & 0xE0) == 0xE0) { return 0; }
+
+        /* Are we in CPU clock multiplied mode (66/99 MHz)? */
+        if ((clockspeed_reg & 0xE0) == 0xC0) {
+                if ((clockspeed_reg & 0x01) == 0) {
+			return 66000;
+		} else {
+			return 99000;             
+		}
+        }
+
+	/* 33 MHz is not 32 MHz... */
+	if ((clockspeed_reg & 0xE0)==0xA0)
+		return 33000;
+
+        return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000);
+}
+
+
+/**
+ *      elanfreq_set_cpu_frequency: Change the CPU core frequency
+ * 	@cpu: cpu number
+ *	@freq: frequency in kHz
+ *
+ *      This function takes a frequency value and changes the CPU frequency 
+ *	according to this. Note that the frequency has to be checked by
+ *	elanfreq_validatespeed() for correctness!
+ *	
+ *	There is no return value. 
+ */
+
+static void elanfreq_set_cpu_state (unsigned int state) {
+
+	struct cpufreq_freqs    freqs;
+
+	freqs.old = elanfreq_get_cpu_frequency(0);
+	freqs.new = elan_multiplier[state].clock;
+	freqs.cpu = 0; /* elanfreq.c is UP only driver */
+	
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",elan_multiplier[state].clock);
+
+
+	/* 
+	 * Access to the Elan's internal registers is indexed via    
+	 * 0x22: Chip Setup & Control Register Index Register (CSCI) 
+	 * 0x23: Chip Setup & Control Register Data  Register (CSCD) 
+	 *
+	 */
+
+	/* 
+	 * 0x40 is the Power Management Unit's Force Mode Register. 
+	 * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
+	 */
+
+	local_irq_disable();
+	outb_p(0x40,REG_CSCIR); 	/* Disable hyperspeed mode          */
+	outb_p(0x00,REG_CSCDR);
+	local_irq_enable();		/* wait till internal pipelines and */
+	udelay(1000);			/* buffers have cleaned up          */
+
+	local_irq_disable();
+
+	/* now, set the CPU clock speed register (0x80) */
+	outb_p(0x80,REG_CSCIR);
+	outb_p(elan_multiplier[state].val80h,REG_CSCDR);
+
+	/* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
+	outb_p(0x40,REG_CSCIR);
+	outb_p(elan_multiplier[state].val40h,REG_CSCDR);
+	udelay(10000);
+	local_irq_enable();
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+};
+
+
+/**
+ *	elanfreq_validatespeed: test if frequency range is valid
+ *      @policy: the policy to validate
+ *
+ *	This function checks if a given frequency range in kHz is valid 
+ *      for the hardware supported by the driver. 
+ */
+
+static int elanfreq_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
+}
+
+static int elanfreq_target (struct cpufreq_policy *policy, 
+			    unsigned int target_freq, 
+			    unsigned int relation)
+{
+	unsigned int    newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	elanfreq_set_cpu_state(newstate);
+
+	return 0;
+}
+
+
+/*
+ *	Module init and exit code
+ */
+
+static int elanfreq_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	unsigned int i;
+	int result;
+
+	/* capability check */
+	if ((c->x86_vendor != X86_VENDOR_AMD) ||
+	    (c->x86 != 4) || (c->x86_model!=10))
+		return -ENODEV;
+
+	/* max freq */
+	if (!max_freq)
+		max_freq = elanfreq_get_cpu_frequency(0);
+
+	/* table init */
+ 	for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if (elanfreq_table[i].frequency > max_freq)
+			elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+	}
+
+	/* cpuinfo and default policy values */
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = elanfreq_get_cpu_frequency(0);
+
+	result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
+	if (result)
+		return (result);
+
+        cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
+
+	return 0;
+}
+
+
+static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+
+#ifndef MODULE
+/**
+ * elanfreq_setup - elanfreq command line parameter parsing
+ *
+ * elanfreq command line parameter.  Use:
+ *  elanfreq=66000
+ * to set the maximum CPU frequency to 66 MHz. Note that in
+ * case you do not give this boot parameter, the maximum
+ * frequency will fall back to _current_ CPU frequency which
+ * might be lower. If you build this as a module, use the
+ * max_freq module parameter instead.
+ */
+static int __init elanfreq_setup(char *str)
+{
+	max_freq = simple_strtoul(str, &str, 0);
+	printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
+	return 1;
+}
+__setup("elanfreq=", elanfreq_setup);
+#endif
+
+
+static struct freq_attr* elanfreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver elanfreq_driver = {
+	.get	 	= elanfreq_get_cpu_frequency,
+	.verify 	= elanfreq_verify,
+	.target 	= elanfreq_target,
+	.init		= elanfreq_cpu_init,
+	.exit		= elanfreq_cpu_exit,
+	.name		= "elanfreq",
+	.owner		= THIS_MODULE,
+	.attr		= elanfreq_attr,
+};
+
+
+static int __init elanfreq_init(void) 
+{	
+	struct cpuinfo_x86 *c = cpu_data;
+
+	/* Test if we have the right hardware */
+	if ((c->x86_vendor != X86_VENDOR_AMD) ||
+		(c->x86 != 4) || (c->x86_model!=10))
+	{
+		printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
+                return -ENODEV;
+	}
+	
+	return cpufreq_register_driver(&elanfreq_driver);
+}
+
+
+static void __exit elanfreq_exit(void) 
+{
+	cpufreq_unregister_driver(&elanfreq_driver);
+}
+
+
+module_param (max_freq, int, 0444);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
+MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
+
+module_init(elanfreq_init);
+module_exit(elanfreq_exit);
+
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
new file mode 100644
index 00000000000..1a49adb1f4a
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
@@ -0,0 +1,502 @@
+/*
+ *	Cyrix MediaGX and NatSemi Geode Suspend Modulation
+ *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ *	(C) 2002 Hiroshi Miura   <miura@da-cha.org>
+ *	All Rights Reserved
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      version 2 as published by the Free Software Foundation 
+ *
+ *      The author(s) of this software shall not be held liable for damages
+ *      of any nature resulting due to the use of this software. This
+ *      software is provided AS-IS with no warranties.
+ *	
+ * Theoritical note:
+ *
+ *	(see Geode(tm) CS5530 manual (rev.4.1) page.56)
+ *
+ *	CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
+ *	are based on Suspend Moduration.
+ *
+ *	Suspend Modulation works by asserting and de-asserting the SUSP# pin
+ *	to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
+ *	the CPU enters an idle state. GX1 stops its core clock when SUSP# is 
+ *	asserted then power consumption is reduced.
+ *
+ *	Suspend Modulation's OFF/ON duration are configurable 
+ *	with 'Suspend Modulation OFF Count Register'
+ *	and 'Suspend Modulation ON Count Register'.
+ *	These registers are 8bit counters that represent the number of 
+ *	32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
+ *	to the processor.
+ *
+ *	These counters define a ratio which is the effective frequency 
+ * 	of operation of the system.
+ *
+ *			       OFF Count
+ *	F_eff = Fgx * ----------------------
+ *	                OFF Count + ON Count
+ *
+ *	0 <= On Count, Off Count <= 255
+ *
+ *	From these limits, we can get register values 
+ *
+ *	off_duration + on_duration <= MAX_DURATION
+ *	on_duration = off_duration * (stock_freq - freq) / freq
+ *
+ *      off_duration  =  (freq * DURATION) / stock_freq 
+ *      on_duration = DURATION - off_duration 
+ *
+ *
+ *---------------------------------------------------------------------------
+ *
+ * ChangeLog:
+ *  	Dec. 12, 2003	Hiroshi Miura <miura@da-cha.org>
+ *  		- fix on/off register mistake
+ *  		- fix cpu_khz calc when it stops cpu modulation.
+ *
+ *	Dec. 11, 2002 	Hiroshi Miura <miura@da-cha.org>
+ *		- rewrite for Cyrix MediaGX Cx5510/5520 and 
+ *		  NatSemi Geode Cs5530(A).
+ *
+ *	Jul. ??, 2002  Zwane Mwaikambo <zwane@commfireservices.com>
+ *		- cs5530_mod patch for 2.4.19-rc1.
+ *
+ *---------------------------------------------------------------------------
+ *
+ * Todo
+ *	Test on machines with 5510, 5530, 5530A
+ */
+
+/************************************************************************
+ *			Suspend Modulation - Definitions		*
+ ************************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h> 
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <asm/processor.h> 
+#include <asm/errno.h>
+
+/* PCI config registers, all at F0 */
+#define PCI_PMER1              0x80    /* power management enable register 1 */
+#define PCI_PMER2              0x81    /* power management enable register 2 */
+#define PCI_PMER3              0x82    /* power management enable register 3 */
+#define PCI_IRQTC              0x8c    /* irq speedup timer counter register:typical 2 to 4ms */
+#define PCI_VIDTC              0x8d    /* video speedup timer counter register: typical 50 to 100ms */
+#define PCI_MODOFF             0x94    /* suspend modulation OFF counter register, 1 = 32us */
+#define PCI_MODON              0x95    /* suspend modulation ON counter register */
+#define PCI_SUSCFG             0x96    /* suspend configuration register */
+
+/* PMER1 bits */
+#define GPM                    (1<<0)  /* global power management */
+#define GIT                    (1<<1)  /* globally enable PM device idle timers */
+#define GTR                    (1<<2)  /* globally enable IO traps */
+#define IRQ_SPDUP              (1<<3)  /* disable clock throttle during interrupt handling */
+#define VID_SPDUP              (1<<4)  /* disable clock throttle during vga video handling */
+
+/* SUSCFG bits */
+#define SUSMOD                 (1<<0)  /* enable/disable suspend modulation */
+/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ 
+#define SMISPDUP               (1<<1)  /* select how SMI re-enable suspend modulation: */
+                                       /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
+#define SUSCFG                 (1<<2)  /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
+/* the belows support only with cs5530A */ 
+#define PWRSVE_ISA             (1<<3)  /* stop ISA clock  */
+#define PWRSVE                 (1<<4)  /* active idle */
+
+struct gxfreq_params {
+	u8 on_duration;
+	u8 off_duration;
+	u8 pci_suscfg;
+	u8 pci_pmer1;
+	u8 pci_pmer2;
+	u8 pci_rev;
+	struct pci_dev *cs55x0;
+};
+
+static struct gxfreq_params *gx_params;
+static int stock_freq;
+
+/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
+static int pci_busclk = 0;
+module_param (pci_busclk, int, 0444);
+
+/* maximum duration for which the cpu may be suspended
+ * (32us * MAX_DURATION). If no parameter is given, this defaults
+ * to 255. 
+ * Note that this leads to a maximum of 8 ms(!) where the CPU clock
+ * is suspended -- processing power is just 0.39% of what it used to be,
+ * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
+static int max_duration = 255;
+module_param (max_duration, int, 0444);
+
+/* For the default policy, we want at least some processing power
+ * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
+ */
+#define POLICY_MIN_DIV 20
+
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg)
+
+/**
+ *      we can detect a core multipiler from dir0_lsb 
+ *      from GX1 datasheet p.56, 
+ *	   MULT[3:0]:
+ *	   0000 = SYSCLK multiplied by 4 (test only)
+ *	   0001 = SYSCLK multiplied by 10
+ *	   0010 = SYSCLK multiplied by 4
+ *	   0011 = SYSCLK multiplied by 6
+ *	   0100 = SYSCLK multiplied by 9
+ *	   0101 = SYSCLK multiplied by 5
+ *	   0110 = SYSCLK multiplied by 7
+ *	   0111 = SYSCLK multiplied by 8
+ *              of 33.3MHz
+ **/
+static int gx_freq_mult[16] = {
+		4, 10, 4, 6, 9, 5, 7, 8,
+		0, 0, 0, 0, 0, 0, 0, 0
+};
+
+
+/****************************************************************
+ * 	Low Level chipset interface				*
+ ****************************************************************/
+static struct pci_device_id gx_chipset_tbl[] __initdata = {
+        { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID },
+        { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID },
+        { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID },
+        { 0, },
+};
+
+/**
+ *     gx_detect_chipset:
+ *
+ **/
+static __init struct pci_dev *gx_detect_chipset(void)
+{
+	struct pci_dev *gx_pci = NULL;
+
+	/* check if CPU is a MediaGX or a Geode. */
+        if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && 
+	    (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
+		dprintk("error: no MediaGX/Geode processor found!\n");
+		return NULL;		
+	}
+
+	/* detect which companion chip is used */
+	while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) {
+		if ((pci_match_device (gx_chipset_tbl, gx_pci)) != NULL) {
+			return gx_pci;
+		}
+	}
+
+	dprintk("error: no supported chipset found!\n");
+	return NULL;
+}
+
+/**
+ *      gx_get_cpuspeed:
+ *
+ * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs.
+ */
+static unsigned int gx_get_cpuspeed(unsigned int cpu)
+{
+	if ((gx_params->pci_suscfg & SUSMOD) == 0) 
+		return stock_freq;
+
+	return (stock_freq * gx_params->off_duration) 
+		/ (gx_params->on_duration + gx_params->off_duration);
+}
+
+/**
+ *      gx_validate_speed:
+ *      determine current cpu speed
+ *       
+**/
+
+static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration)
+{
+	unsigned int i;
+	u8 tmp_on, tmp_off;
+	int old_tmp_freq = stock_freq;
+	int tmp_freq;
+
+	*off_duration=1;
+	*on_duration=0;
+
+	for (i=max_duration; i>0; i--) {
+		tmp_off = ((khz * i) / stock_freq) & 0xff; 
+		tmp_on = i - tmp_off;
+		tmp_freq = (stock_freq * tmp_off) / i;
+		/* if this relation is closer to khz, use this. If it's equal,
+		 * prefer it, too - lower latency */
+		if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
+			*on_duration = tmp_on;
+			*off_duration = tmp_off;
+			old_tmp_freq = tmp_freq;
+		}
+	}
+
+	return old_tmp_freq;
+}
+
+
+/**
+ * 	gx_set_cpuspeed:
+ *		set cpu speed in khz.
+ **/
+
+static void gx_set_cpuspeed(unsigned int khz)
+{
+        u8 suscfg, pmer1;
+	unsigned int new_khz;
+	unsigned long flags;
+	struct cpufreq_freqs freqs;
+
+
+	freqs.cpu = 0;
+	freqs.old = gx_get_cpuspeed(0);
+
+	new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration);
+
+	freqs.new = new_khz;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	local_irq_save(flags);
+
+	if (new_khz != stock_freq) {  /* if new khz == 100% of CPU speed, it is special case */
+		switch (gx_params->cs55x0->device) {
+		case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
+			pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
+			/* FIXME: need to test other values -- Zwane,Miura */
+			pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */
+			pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */
+			pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1);
+
+			if (gx_params->pci_rev < 0x10) {   /* CS5530(rev 1.2, 1.3) */
+				suscfg = gx_params->pci_suscfg | SUSMOD;
+			} else {                           /* CS5530A,B.. */
+				suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE;
+			}
+			break;
+		case PCI_DEVICE_ID_CYRIX_5520:
+		case PCI_DEVICE_ID_CYRIX_5510:
+			suscfg = gx_params->pci_suscfg | SUSMOD;
+			break;
+		default:
+			local_irq_restore(flags);
+			dprintk("fatal: try to set unknown chipset.\n");
+			return;
+		}
+	} else {
+		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
+		gx_params->off_duration = 0;
+		gx_params->on_duration = 0;
+		dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n");
+	}
+
+	pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration);
+	pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration);
+
+        pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg);
+        pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
+
+        local_irq_restore(flags);
+
+	gx_params->pci_suscfg = suscfg;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+        dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
+                gx_params->on_duration * 32, gx_params->off_duration * 32);
+	dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); 
+}
+
+/****************************************************************
+ *             High level functions                             *
+ ****************************************************************/
+
+/*
+ *	cpufreq_gx_verify: test if frequency range is valid 
+ *
+ *	This function checks if a given frequency range in kHz is valid 
+ *      for the hardware supported by the driver. 
+ */
+
+static int cpufreq_gx_verify(struct cpufreq_policy *policy)
+{
+	unsigned int tmp_freq = 0;
+	u8 tmp1, tmp2;
+
+        if (!stock_freq || !policy)
+                return -EINVAL;
+
+	policy->cpu = 0;
+	cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
+
+	/* it needs to be assured that at least one supported frequency is
+	 * within policy->min and policy->max. If it is not, policy->max
+	 * needs to be increased until one freuqency is supported.
+	 * policy->min may not be decreased, though. This way we guarantee a 
+	 * specific processing capacity.
+	 */
+	tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
+	if (tmp_freq < policy->min) 
+		tmp_freq += stock_freq / max_duration;
+	policy->min = tmp_freq;
+	if (policy->min > policy->max) 
+		policy->max = tmp_freq;
+	tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
+	if (tmp_freq > policy->max)
+		tmp_freq -= stock_freq / max_duration;
+	policy->max = tmp_freq;
+	if (policy->max < policy->min)
+		policy->max = policy->min;
+	cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
+	
+	return 0;
+}
+
+/*
+ *      cpufreq_gx_target:  
+ *
+ */
+static int cpufreq_gx_target(struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	u8 tmp1, tmp2;
+	unsigned int tmp_freq;
+
+        if (!stock_freq || !policy)
+                return -EINVAL;
+
+	policy->cpu = 0;
+
+	tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
+	while (tmp_freq < policy->min) {
+		tmp_freq += stock_freq / max_duration;
+		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
+	}
+	while (tmp_freq > policy->max) {
+		tmp_freq -= stock_freq / max_duration;
+		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
+	}
+
+	gx_set_cpuspeed(tmp_freq);
+
+	return 0;
+}
+
+static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int maxfreq, curfreq;
+
+	if (!policy || policy->cpu != 0)
+		return -ENODEV;
+
+	/* determine maximum frequency */
+	if (pci_busclk) {
+		maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
+	} else if (cpu_khz) {
+		maxfreq = cpu_khz;
+	} else {
+		maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
+	}
+	stock_freq = maxfreq;
+	curfreq = gx_get_cpuspeed(0);
+
+	dprintk("cpu max frequency is %d.\n", maxfreq);
+	dprintk("cpu current frequency is %dkHz.\n",curfreq);
+
+	/* setup basic struct for cpufreq API */
+	policy->cpu = 0;
+
+	if (max_duration < POLICY_MIN_DIV)
+		policy->min = maxfreq / max_duration;
+	else
+		policy->min = maxfreq / POLICY_MIN_DIV;
+	policy->max = maxfreq;
+	policy->cur = curfreq;
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.min_freq = maxfreq / max_duration;
+	policy->cpuinfo.max_freq = maxfreq;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+
+	return 0;
+}
+
+/* 
+ * cpufreq_gx_init:
+ *   MediaGX/Geode GX initialize cpufreq driver
+ */
+static struct cpufreq_driver gx_suspmod_driver = {
+	.get		= gx_get_cpuspeed,
+	.verify		= cpufreq_gx_verify,
+	.target		= cpufreq_gx_target,
+	.init		= cpufreq_gx_cpu_init,
+	.name		= "gx-suspmod",
+	.owner		= THIS_MODULE,
+};
+
+static int __init cpufreq_gx_init(void)
+{
+	int ret;
+	struct gxfreq_params *params;
+	struct pci_dev *gx_pci;
+	u32 class_rev;
+
+	/* Test if we have the right hardware */
+	if ((gx_pci = gx_detect_chipset()) == NULL) 
+		return -ENODEV;
+
+	/* check whether module parameters are sane */
+	if (max_duration > 0xff)
+		max_duration = 0xff;
+
+	dprintk("geode suspend modulation available.\n");
+
+	params = kmalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
+	if (params == NULL)
+		return -ENOMEM;
+	memset(params, 0, sizeof(struct gxfreq_params));
+
+	params->cs55x0 = gx_pci;
+	gx_params = params;
+
+	/* keep cs55x0 configurations */
+	pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
+	pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
+	pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
+	pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
+	pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration));
+        pci_read_config_dword(params->cs55x0, PCI_CLASS_REVISION, &class_rev);
+	params->pci_rev = class_rev && 0xff;
+
+	if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { 
+		kfree(params);
+		return ret;                   /* register error! */
+	}
+
+	return 0;
+}
+
+static void __exit cpufreq_gx_exit(void)
+{
+	cpufreq_unregister_driver(&gx_suspmod_driver);
+	pci_dev_put(gx_params->cs55x0);
+	kfree(gx_params);
+}
+
+MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>");
+MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
+MODULE_LICENSE ("GPL");
+
+module_init(cpufreq_gx_init);
+module_exit(cpufreq_gx_exit);
+
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
new file mode 100644
index 00000000000..ab0f9f5aac1
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -0,0 +1,658 @@
+/*
+ *  (C) 2001-2004  Dave Jones. <davej@codemonkey.org.uk>
+ *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by VIA.
+ *
+ *  VIA have currently 3 different versions of Longhaul.
+ *  Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
+ *   It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
+ *  Version 2 of longhaul is the same as v1, but adds voltage scaling.
+ *   Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C)
+ *   voltage scaling support has currently been disabled in this driver
+ *   until we have code that gets it right.
+ *  Version 3 of longhaul got renamed to Powersaver and redesigned
+ *   to use the POWERSAVER MSR at 0x110a.
+ *   It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
+ *   It's pretty much the same feature wise to longhaul v2, though
+ *   there is provision for scaling FSB too, but this doesn't work
+ *   too well in practice so we don't even try to use this.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+
+#include "longhaul.h"
+
+#define PFX "longhaul: "
+
+#define TYPE_LONGHAUL_V1	1
+#define TYPE_LONGHAUL_V2	2
+#define TYPE_POWERSAVER		3
+
+#define	CPU_SAMUEL	1
+#define	CPU_SAMUEL2	2
+#define	CPU_EZRA	3
+#define	CPU_EZRA_T	4
+#define	CPU_NEHEMIAH	5
+
+static int cpu_model;
+static unsigned int numscales=16, numvscales;
+static unsigned int fsb;
+static int minvid, maxvid;
+static unsigned int minmult, maxmult;
+static int can_scale_voltage;
+static int vrmrev;
+
+/* Module parameters */
+static int dont_scale_voltage;
+
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
+
+
+#define __hlt()     __asm__ __volatile__("hlt": : :"memory")
+
+/* Clock ratios multiplied by 10 */
+static int clock_ratio[32];
+static int eblcr_table[32];
+static int voltage_table[32];
+static unsigned int highest_speed, lowest_speed; /* kHz */
+static int longhaul_version;
+static struct cpufreq_frequency_table *longhaul_table;
+
+#ifdef CONFIG_CPU_FREQ_DEBUG
+static char speedbuffer[8];
+
+static char *print_speed(int speed)
+{
+	if (speed > 1000) {
+		if (speed%1000 == 0)
+			sprintf (speedbuffer, "%dGHz", speed/1000);
+		else
+			sprintf (speedbuffer, "%d.%dGHz", speed/1000, (speed%1000)/100);
+	} else
+		sprintf (speedbuffer, "%dMHz", speed);
+
+	return speedbuffer;
+}
+#endif
+
+
+static unsigned int calc_speed(int mult)
+{
+	int khz;
+	khz = (mult/10)*fsb;
+	if (mult%10)
+		khz += fsb/2;
+	khz *= 1000;
+	return khz;
+}
+
+
+static int longhaul_get_cpu_mult(void)
+{
+	unsigned long invalue=0,lo, hi;
+
+	rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
+	invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22;
+	if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) {
+		if (lo & (1<<27))
+			invalue+=16;
+	}
+	return eblcr_table[invalue];
+}
+
+
+static void do_powersaver(union msr_longhaul *longhaul,
+			unsigned int clock_ratio_index)
+{
+	int version;
+
+	switch (cpu_model) {
+	case CPU_EZRA_T:
+		version = 3;
+		break;
+	case CPU_NEHEMIAH:
+		version = 0xf;
+		break;
+	default:
+		return;
+	}
+
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul->val);
+	longhaul->bits.SoftBusRatio = clock_ratio_index & 0xf;
+	longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
+	longhaul->bits.EnableSoftBusRatio = 1;
+	longhaul->bits.RevisionKey = 0;
+	local_irq_disable();
+	wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
+	local_irq_enable();
+	__hlt();
+
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul->val);
+	longhaul->bits.EnableSoftBusRatio = 0;
+	longhaul->bits.RevisionKey = version;
+	local_irq_disable();
+	wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
+	local_irq_enable();
+}
+
+/**
+ * longhaul_set_cpu_frequency()
+ * @clock_ratio_index : bitpattern of the new multiplier.
+ *
+ * Sets a new clock ratio.
+ */
+
+static void longhaul_setstate(unsigned int clock_ratio_index)
+{
+	int speed, mult;
+	struct cpufreq_freqs freqs;
+	union msr_longhaul longhaul;
+	union msr_bcr2 bcr2;
+	static unsigned int old_ratio=-1;
+
+	if (old_ratio == clock_ratio_index)
+		return;
+	old_ratio = clock_ratio_index;
+
+	mult = clock_ratio[clock_ratio_index];
+	if (mult == -1)
+		return;
+
+	speed = calc_speed(mult);
+	if ((speed > highest_speed) || (speed < lowest_speed))
+		return;
+
+	freqs.old = calc_speed(longhaul_get_cpu_mult());
+	freqs.new = speed;
+	freqs.cpu = 0; /* longhaul.c is UP only driver */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+			fsb, mult/10, mult%10, print_speed(speed/1000));
+
+	switch (longhaul_version) {
+
+	/*
+	 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
+	 * Software controlled multipliers only.
+	 *
+	 * *NB* Until we get voltage scaling working v1 & v2 are the same code.
+	 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5b] and Ezra [C5C]
+	 */
+	case TYPE_LONGHAUL_V1:
+	case TYPE_LONGHAUL_V2:
+		rdmsrl (MSR_VIA_BCR2, bcr2.val);
+		/* Enable software clock multiplier */
+		bcr2.bits.ESOFTBF = 1;
+		bcr2.bits.CLOCKMUL = clock_ratio_index;
+		local_irq_disable();
+		wrmsrl (MSR_VIA_BCR2, bcr2.val);
+		local_irq_enable();
+
+		__hlt();
+
+		/* Disable software clock multiplier */
+		rdmsrl (MSR_VIA_BCR2, bcr2.val);
+		bcr2.bits.ESOFTBF = 0;
+		local_irq_disable();
+		wrmsrl (MSR_VIA_BCR2, bcr2.val);
+		local_irq_enable();
+		break;
+
+	/*
+	 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
+	 * We can scale voltage with this too, but that's currently
+	 * disabled until we come up with a decent 'match freq to voltage'
+	 * algorithm.
+	 * When we add voltage scaling, we will also need to do the
+	 * voltage/freq setting in order depending on the direction
+	 * of scaling (like we do in powernow-k7.c)
+	 * Nehemiah can do FSB scaling too, but this has never been proven
+	 * to work in practice.
+	 */
+	case TYPE_POWERSAVER:
+		do_powersaver(&longhaul, clock_ratio_index);
+		break;
+	}
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+}
+
+/*
+ * Centaur decided to make life a little more tricky.
+ * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
+ * Samuel2 and above have to try and guess what the FSB is.
+ * We do this by assuming we booted at maximum multiplier, and interpolate
+ * between that value multiplied by possible FSBs and cpu_mhz which
+ * was calculated at boot time. Really ugly, but no other way to do this.
+ */
+
+#define ROUNDING	0xf
+
+static int _guess(int guess)
+{
+	int target;
+
+	target = ((maxmult/10)*guess);
+	if (maxmult%10 != 0)
+		target += (guess/2);
+	target += ROUNDING/2;
+	target &= ~ROUNDING;
+	return target;
+}
+
+
+static int guess_fsb(void)
+{
+	int speed = (cpu_khz/1000);
+	int i;
+	int speeds[3] = { 66, 100, 133 };
+
+	speed += ROUNDING/2;
+	speed &= ~ROUNDING;
+
+	for (i=0; i<3; i++) {
+		if (_guess(speeds[i]) == speed)
+			return speeds[i];
+	}
+	return 0;
+}
+
+
+static int __init longhaul_get_ranges(void)
+{
+	unsigned long invalue;
+	unsigned int multipliers[32]= {
+		50,30,40,100,55,35,45,95,90,70,80,60,120,75,85,65,
+		-1,110,120,-1,135,115,125,105,130,150,160,140,-1,155,-1,145 };
+	unsigned int j, k = 0;
+	union msr_longhaul longhaul;
+	unsigned long lo, hi;
+	unsigned int eblcr_fsb_table_v1[] = { 66, 133, 100, -1 };
+	unsigned int eblcr_fsb_table_v2[] = { 133, 100, -1, 66 };
+
+	switch (longhaul_version) {
+	case TYPE_LONGHAUL_V1:
+	case TYPE_LONGHAUL_V2:
+		/* Ugh, Longhaul v1 didn't have the min/max MSRs.
+		   Assume min=3.0x & max = whatever we booted at. */
+		minmult = 30;
+		maxmult = longhaul_get_cpu_mult();
+		rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
+		invalue = (lo & (1<<18|1<<19)) >>18;
+		if (cpu_model==CPU_SAMUEL || cpu_model==CPU_SAMUEL2)
+			fsb = eblcr_fsb_table_v1[invalue];
+		else
+			fsb = guess_fsb();
+		break;
+
+	case TYPE_POWERSAVER:
+		/* Ezra-T */
+		if (cpu_model==CPU_EZRA_T) {
+			rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
+			invalue = longhaul.bits.MaxMHzBR;
+			if (longhaul.bits.MaxMHzBR4)
+				invalue += 16;
+			maxmult=multipliers[invalue];
+
+			invalue = longhaul.bits.MinMHzBR;
+			if (longhaul.bits.MinMHzBR4 == 1)
+				minmult = 30;
+			else
+				minmult = multipliers[invalue];
+			fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB];
+			break;
+		}
+
+		/* Nehemiah */
+		if (cpu_model==CPU_NEHEMIAH) {
+			rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
+
+			/*
+			 * TODO: This code works, but raises a lot of questions.
+			 * - Some Nehemiah's seem to have broken Min/MaxMHzBR's.
+			 *   We get around this by using a hardcoded multiplier of 4.0x
+			 *   for the minimimum speed, and the speed we booted up at for the max.
+			 *   This is done in longhaul_get_cpu_mult() by reading the EBLCR register.
+			 * - According to some VIA documentation EBLCR is only
+			 *   in pre-Nehemiah C3s. How this still works is a mystery.
+			 *   We're possibly using something undocumented and unsupported,
+			 *   But it works, so we don't grumble.
+			 */
+			minmult=40;
+			maxmult=longhaul_get_cpu_mult();
+
+			/* Starting with the 1.2GHz parts, theres a 200MHz bus. */
+			if ((cpu_khz/1000) > 1200)
+				fsb = 200;
+			else
+				fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB];
+			break;
+		}
+	}
+
+	dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n",
+		 minmult/10, minmult%10, maxmult/10, maxmult%10);
+
+	if (fsb == -1) {
+		printk (KERN_INFO PFX "Invalid (reserved) FSB!\n");
+		return -EINVAL;
+	}
+
+	highest_speed = calc_speed(maxmult);
+	lowest_speed = calc_speed(minmult);
+	dprintk ("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
+		 print_speed(lowest_speed/1000), 
+		 print_speed(highest_speed/1000));
+
+	if (lowest_speed == highest_speed) {
+		printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+		return -EINVAL;
+	}
+	if (lowest_speed > highest_speed) {
+		printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+			lowest_speed, highest_speed);
+		return -EINVAL;
+	}
+
+	longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL);
+	if(!longhaul_table)
+		return -ENOMEM;
+
+	for (j=0; j < numscales; j++) {
+		unsigned int ratio;
+		ratio = clock_ratio[j];
+		if (ratio == -1)
+			continue;
+		if (ratio > maxmult || ratio < minmult)
+			continue;
+		longhaul_table[k].frequency = calc_speed(ratio);
+		longhaul_table[k].index	= j;
+		k++;
+	}
+
+	longhaul_table[k].frequency = CPUFREQ_TABLE_END;
+	if (!k) {
+		kfree (longhaul_table);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+static void __init longhaul_setup_voltagescaling(void)
+{
+	union msr_longhaul longhaul;
+
+	rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
+
+	if (!(longhaul.bits.RevisionID & 1))
+		return;
+
+	minvid = longhaul.bits.MinimumVID;
+	maxvid = longhaul.bits.MaximumVID;
+	vrmrev = longhaul.bits.VRMRev;
+
+	if (minvid == 0 || maxvid == 0) {
+		printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
+					"Voltage scaling disabled.\n",
+					minvid/1000, minvid%1000, maxvid/1000, maxvid%1000);
+		return;
+	}
+
+	if (minvid == maxvid) {
+		printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
+				"both %d.%03d. Voltage scaling disabled\n",
+				maxvid/1000, maxvid%1000);
+		return;
+	}
+
+	if (vrmrev==0) {
+		dprintk ("VRM 8.5 \n");
+		memcpy (voltage_table, vrm85scales, sizeof(voltage_table));
+		numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25;
+	} else {
+		dprintk ("Mobile VRM \n");
+		memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table));
+		numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5;
+	}
+
+	/* Current voltage isn't readable at first, so we need to
+	   set it to a known value. The spec says to use maxvid */
+	longhaul.bits.RevisionKey = longhaul.bits.RevisionID;	/* FIXME: This is bad. */
+	longhaul.bits.EnableSoftVID = 1;
+	longhaul.bits.SoftVID = maxvid;
+	wrmsrl (MSR_VIA_LONGHAUL, longhaul.val);
+
+	minvid = voltage_table[minvid];
+	maxvid = voltage_table[maxvid];
+
+	dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n",
+		maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales);
+
+	can_scale_voltage = 1;
+}
+
+
+static int longhaul_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, longhaul_table);
+}
+
+
+static int longhaul_target(struct cpufreq_policy *policy,
+			    unsigned int target_freq, unsigned int relation)
+{
+	unsigned int table_index = 0;
+	unsigned int new_clock_ratio = 0;
+
+	if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index))
+		return -EINVAL;
+
+	new_clock_ratio = longhaul_table[table_index].index & 0xFF;
+
+	longhaul_setstate(new_clock_ratio);
+
+	return 0;
+}
+
+
+static unsigned int longhaul_get(unsigned int cpu)
+{
+	if (cpu)
+		return 0;
+	return calc_speed(longhaul_get_cpu_mult());
+}
+
+
+static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	char *cpuname=NULL;
+	int ret;
+
+	switch (c->x86_model) {
+	case 6:
+		cpu_model = CPU_SAMUEL;
+		cpuname = "C3 'Samuel' [C5A]";
+		longhaul_version = TYPE_LONGHAUL_V1;
+		memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
+		memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr));
+		break;
+
+	case 7:
+		longhaul_version = TYPE_LONGHAUL_V1;
+		switch (c->x86_mask) {
+		case 0:
+			cpu_model = CPU_SAMUEL2;
+			cpuname = "C3 'Samuel 2' [C5B]";
+			/* Note, this is not a typo, early Samuel2's had Samuel1 ratios. */
+			memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
+			memcpy (eblcr_table, samuel2_eblcr, sizeof(samuel2_eblcr));
+			break;
+		case 1 ... 15:
+			if (c->x86_mask < 8) {
+				cpu_model = CPU_SAMUEL2;
+				cpuname = "C3 'Samuel 2' [C5B]";
+			} else {
+				cpu_model = CPU_EZRA;
+				cpuname = "C3 'Ezra' [C5C]";
+			}
+			memcpy (clock_ratio, ezra_clock_ratio, sizeof(ezra_clock_ratio));
+			memcpy (eblcr_table, ezra_eblcr, sizeof(ezra_eblcr));
+			break;
+		}
+		break;
+
+	case 8:
+		cpu_model = CPU_EZRA_T;
+		cpuname = "C3 'Ezra-T' [C5M]";
+		longhaul_version = TYPE_POWERSAVER;
+		numscales=32;
+		memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio));
+		memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr));
+		break;
+
+	case 9:
+		cpu_model = CPU_NEHEMIAH;
+		longhaul_version = TYPE_POWERSAVER;
+		numscales=32;
+		switch (c->x86_mask) {
+		case 0 ... 1:
+			cpuname = "C3 'Nehemiah A' [C5N]";
+			memcpy (clock_ratio, nehemiah_a_clock_ratio, sizeof(nehemiah_a_clock_ratio));
+			memcpy (eblcr_table, nehemiah_a_eblcr, sizeof(nehemiah_a_eblcr));
+			break;
+		case 2 ... 4:
+			cpuname = "C3 'Nehemiah B' [C5N]";
+			memcpy (clock_ratio, nehemiah_b_clock_ratio, sizeof(nehemiah_b_clock_ratio));
+			memcpy (eblcr_table, nehemiah_b_eblcr, sizeof(nehemiah_b_eblcr));
+			break;
+		case 5 ... 15:
+			cpuname = "C3 'Nehemiah C' [C5N]";
+			memcpy (clock_ratio, nehemiah_c_clock_ratio, sizeof(nehemiah_c_clock_ratio));
+			memcpy (eblcr_table, nehemiah_c_eblcr, sizeof(nehemiah_c_eblcr));
+			break;
+		}
+		break;
+
+	default:
+		cpuname = "Unknown";
+		break;
+	}
+
+	printk (KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
+	switch (longhaul_version) {
+	case TYPE_LONGHAUL_V1:
+	case TYPE_LONGHAUL_V2:
+		printk ("Longhaul v%d supported.\n", longhaul_version);
+		break;
+	case TYPE_POWERSAVER:
+		printk ("Powersaver supported.\n");
+		break;
+	};
+
+	ret = longhaul_get_ranges();
+	if (ret != 0)
+		return ret;
+
+	if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) &&
+		 (dont_scale_voltage==0))
+		longhaul_setup_voltagescaling();
+
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = calc_speed(longhaul_get_cpu_mult());
+
+	ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
+	if (ret)
+		return ret;
+
+	cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
+
+	return 0;
+}
+
+static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static struct freq_attr* longhaul_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver longhaul_driver = {
+	.verify	= longhaul_verify,
+	.target	= longhaul_target,
+	.get	= longhaul_get,
+	.init	= longhaul_cpu_init,
+	.exit	= __devexit_p(longhaul_cpu_exit),
+	.name	= "longhaul",
+	.owner	= THIS_MODULE,
+	.attr	= longhaul_attr,
+};
+
+
+static int __init longhaul_init(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+
+	if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
+		return -ENODEV;
+
+	switch (c->x86_model) {
+	case 6 ... 9:
+		return cpufreq_register_driver(&longhaul_driver);
+	default:
+		printk (KERN_INFO PFX "Unknown VIA CPU. Contact davej@codemonkey.org.uk\n");
+	}
+
+	return -ENODEV;
+}
+
+
+static void __exit longhaul_exit(void)
+{
+	int i=0;
+
+	for (i=0; i < numscales; i++) {
+		if (clock_ratio[i] == maxmult) {
+			longhaul_setstate(i);
+			break;
+		}
+	}
+
+	cpufreq_unregister_driver(&longhaul_driver);
+	kfree(longhaul_table);
+}
+
+module_param (dont_scale_voltage, int, 0644);
+MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
+
+MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
+MODULE_LICENSE ("GPL");
+
+module_init(longhaul_init);
+module_exit(longhaul_exit);
+
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
new file mode 100644
index 00000000000..2a495c162ec
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -0,0 +1,466 @@
+/*
+ *  longhaul.h
+ *  (C) 2003 Dave Jones.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  VIA-specific information
+ */
+
+union msr_bcr2 {
+	struct {
+		unsigned Reseved:19,	// 18:0
+		ESOFTBF:1,		// 19
+		Reserved2:3,		// 22:20
+		CLOCKMUL:4,		// 26:23
+		Reserved3:5;		// 31:27
+	} bits;
+	unsigned long val;
+};
+
+union msr_longhaul {
+	struct {
+		unsigned RevisionID:4,	// 3:0
+		RevisionKey:4,		// 7:4
+		EnableSoftBusRatio:1,	// 8
+		EnableSoftVID:1,	// 9
+		EnableSoftBSEL:1,	// 10
+		Reserved:3,		// 11:13
+		SoftBusRatio4:1,	// 14
+		VRMRev:1,		// 15
+		SoftBusRatio:4,		// 19:16
+		SoftVID:5,		// 24:20
+		Reserved2:3,		// 27:25
+		SoftBSEL:2,		// 29:28
+		Reserved3:2,		// 31:30
+		MaxMHzBR:4,		// 35:32
+		MaximumVID:5,		// 40:36
+		MaxMHzFSB:2,		// 42:41
+		MaxMHzBR4:1,		// 43
+		Reserved4:4,		// 47:44
+		MinMHzBR:4,		// 51:48
+		MinimumVID:5,		// 56:52
+		MinMHzFSB:2,		// 58:57
+		MinMHzBR4:1,		// 59
+		Reserved5:4;		// 63:60
+	} bits;
+	unsigned long long val;
+};
+
+/*
+ * Clock ratio tables. Div/Mod by 10 to get ratio.
+ * The eblcr ones specify the ratio read from the CPU.
+ * The clock_ratio ones specify what to write to the CPU.
+ */
+
+/*
+ * VIA C3 Samuel 1  & Samuel 2 (stepping 0)
+ */
+static int __initdata samuel1_clock_ratio[16] = {
+	-1, /* 0000 -> RESERVED */
+	30, /* 0001 ->  3.0x */
+	40, /* 0010 ->  4.0x */
+	-1, /* 0011 -> RESERVED */
+	-1, /* 0100 -> RESERVED */
+	35, /* 0101 ->  3.5x */
+	45, /* 0110 ->  4.5x */
+	55, /* 0111 ->  5.5x */
+	60, /* 1000 ->  6.0x */
+	70, /* 1001 ->  7.0x */
+	80, /* 1010 ->  8.0x */
+	50, /* 1011 ->  5.0x */
+	65, /* 1100 ->  6.5x */
+	75, /* 1101 ->  7.5x */
+	-1, /* 1110 -> RESERVED */
+	-1, /* 1111 -> RESERVED */
+};
+
+static int __initdata samuel1_eblcr[16] = {
+	50, /* 0000 -> RESERVED */
+	30, /* 0001 ->  3.0x */
+	40, /* 0010 ->  4.0x */
+	-1, /* 0011 -> RESERVED */
+	55, /* 0100 ->  5.5x */
+	35, /* 0101 ->  3.5x */
+	45, /* 0110 ->  4.5x */
+	-1, /* 0111 -> RESERVED */
+	-1, /* 1000 -> RESERVED */
+	70, /* 1001 ->  7.0x */
+	80, /* 1010 ->  8.0x */
+	60, /* 1011 ->  6.0x */
+	-1, /* 1100 -> RESERVED */
+	75, /* 1101 ->  7.5x */
+	-1, /* 1110 -> RESERVED */
+	65, /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 Samuel2 Stepping 1->15
+ */
+static int __initdata samuel2_eblcr[16] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	110, /* 0111 -> 11.0x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	130, /* 1110 -> 13.0x */
+	65,  /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 Ezra
+ */
+static int __initdata ezra_clock_ratio[16] = {
+	100, /* 0000 -> 10.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 -> 12.0x */
+};
+
+static int __initdata ezra_eblcr[16] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 (Ezra-T) [C5M].
+ */
+static int __initdata ezrat_clock_ratio[32] = {
+	100, /* 0000 -> 10.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 ->  12.0x */
+
+	-1,  /* 0000 -> RESERVED (10.0x) */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	-1,  /* 0011 -> RESERVED (9.0x)*/
+	105, /* 0100 -> 10.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	135, /* 0111 -> 13.5x */
+	140, /* 1000 -> 14.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	130, /* 1011 -> 13.0x */
+	145, /* 1100 -> 14.5x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	-1,  /* 1111 -> RESERVED (12.0x) */
+};
+
+static int __initdata ezrat_eblcr[32] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+
+	-1,  /* 0000 -> RESERVED (9.0x) */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	-1,  /* 0011 -> RESERVED (10.0x)*/
+	135, /* 0100 -> 13.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	105, /* 0111 -> 10.5x */
+	130, /* 1000 -> 13.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	140, /* 1011 -> 14.0x */
+	-1,  /* 1100 -> RESERVED (12.0x) */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	145, /* 1111 -> 14.5x */
+};
+
+/*
+ * VIA C3 Nehemiah */
+ 
+static int __initdata nehemiah_a_clock_ratio[32] = {
+	100, /* 0000 -> 10.0x */
+	160, /* 0001 -> 16.0x */
+	-1,  /* 0010 ->  RESERVED */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	-1,  /* 0101 ->  RESERVED */
+	-1,  /* 0110 ->  RESERVED */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 -> 12.0x */
+	100, /* 0000 -> 10.0x */
+	-1,  /* 0001 -> RESERVED */
+	120, /* 0010 -> 12.0x */
+	90,  /* 0011 ->  9.0x */
+	105, /* 0100 -> 10.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	135, /* 0111 -> 13.5x */
+	140, /* 1000 -> 14.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	130, /* 1011 -> 13.0x */
+	145, /* 1100 -> 14.5x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	120, /* 1111 -> 12.0x */
+};
+
+static int __initdata  nehemiah_b_clock_ratio[32] = {
+	100, /* 0000 -> 10.0x */
+	160, /* 0001 -> 16.0x */
+	-1,  /* 0010 ->  RESERVED */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	-1,  /* 0101 ->  RESERVED */
+	-1,  /* 0110 ->  RESERVED */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 -> 12.0x */
+	100, /* 0000 -> 10.0x */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	90,  /* 0011 ->  9.0x */
+	105, /* 0100 -> 10.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	135, /* 0111 -> 13.5x */
+	140, /* 1000 -> 14.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	130, /* 1011 -> 13.0x */
+	145, /* 1100 -> 14.5x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	120, /* 1111 -> 12.0x */
+};
+
+static int __initdata  nehemiah_c_clock_ratio[32] = {
+	100, /* 0000 -> 10.0x */
+	160, /* 0001 -> 16.0x */
+	40,  /* 0010 ->  RESERVED */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	-1,  /* 0101 ->  RESERVED */
+	45,  /* 0110 ->  RESERVED */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 -> 12.0x */
+	100, /* 0000 -> 10.0x */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	90,  /* 0011 ->  9.0x */
+	105, /* 0100 -> 10.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	135, /* 0111 -> 13.5x */
+	140, /* 1000 -> 14.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	130, /* 1011 -> 13.0x */
+	145, /* 1100 -> 14.5x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	120, /* 1111 -> 12.0x */
+};
+
+static int __initdata nehemiah_a_eblcr[32] = {
+	50,  /* 0000 ->  5.0x */
+	160, /* 0001 -> 16.0x */
+	-1,  /* 0010 ->  RESERVED */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	-1,  /* 0101 ->  RESERVED */
+	-1,  /* 0110 ->  RESERVED */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+	90,  /* 0000 ->  9.0x */
+	-1,  /* 0001 -> RESERVED */
+	120, /* 0010 -> 12.0x */
+	100, /* 0011 -> 10.0x */
+	135, /* 0100 -> 13.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	105, /* 0111 -> 10.5x */
+	130, /* 1000 -> 13.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	140, /* 1011 -> 14.0x */
+	120, /* 1100 -> 12.0x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	145 /* 1111 -> 14.5x */
+   /* end of table  */
+};
+static int __initdata nehemiah_b_eblcr[32] = {
+	50,  /* 0000 ->  5.0x */
+	160, /* 0001 -> 16.0x */
+	-1,  /* 0010 ->  RESERVED */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	-1,  /* 0101 ->  RESERVED */
+	-1,  /* 0110 ->  RESERVED */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+	90,  /* 0000 ->  9.0x */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	100, /* 0011 -> 10.0x */
+	135, /* 0100 -> 13.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	105, /* 0111 -> 10.5x */
+	130, /* 1000 -> 13.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	140, /* 1011 -> 14.0x */
+	120, /* 1100 -> 12.0x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	145 /* 1111 -> 14.5x */
+	   /* end of table  */
+};
+static int __initdata nehemiah_c_eblcr[32] = {
+	50,  /* 0000 ->  5.0x */
+	160, /* 0001 -> 16.0x */
+	40,  /* 0010 ->  RESERVED */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	-1,  /* 0101 ->  RESERVED */
+	45,  /* 0110 ->  RESERVED */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+	90,  /* 0000 ->  9.0x */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	100, /* 0011 -> 10.0x */
+	135, /* 0100 -> 13.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	105, /* 0111 -> 10.5x */
+	130, /* 1000 -> 13.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	140, /* 1011 -> 14.0x */
+	120, /* 1100 -> 12.0x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	145 /* 1111 -> 14.5x */
+	  /* end of table  */
+};
+
+/* 
+ * Voltage scales. Div/Mod by 1000 to get actual voltage.
+ * Which scale to use depends on the VRM type in use.
+ */
+static int __initdata vrm85scales[32] = {
+	1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700,
+	1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300,
+	1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725,
+	1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325,
+};
+
+static int __initdata mobilevrmscales[32] = {
+	2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
+	1600, 1550, 1500, 1450, 1500, 1350, 1300, -1,
+	1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
+	1075, 1050, 1025, 1000, 975, 950, 925, -1,
+};
+
diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c
new file mode 100644
index 00000000000..e3868de4dc2
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/longrun.c
@@ -0,0 +1,326 @@
+/*
+ * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/timex.h>
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg)
+
+static struct cpufreq_driver	longrun_driver;
+
+/**
+ * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
+ * values into per cent values. In TMTA microcode, the following is valid:
+ * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
+ */
+static unsigned int longrun_low_freq, longrun_high_freq;
+
+
+/**
+ * longrun_get_policy - get the current LongRun policy
+ * @policy: struct cpufreq_policy where current policy is written into
+ *
+ * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
+ * and MSR_TMTA_LONGRUN_CTRL
+ */
+static void __init longrun_get_policy(struct cpufreq_policy *policy)
+{
+	u32 msr_lo, msr_hi;
+
+	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+	dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
+	if (msr_lo & 0x01)
+		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+	else
+		policy->policy = CPUFREQ_POLICY_POWERSAVE;
+
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
+	msr_lo &= 0x0000007F;
+	msr_hi &= 0x0000007F;
+
+	if ( longrun_high_freq <= longrun_low_freq ) {
+		/* Assume degenerate Longrun table */
+		policy->min = policy->max = longrun_high_freq;
+	} else {
+		policy->min = longrun_low_freq + msr_lo *
+			((longrun_high_freq - longrun_low_freq) / 100);
+		policy->max = longrun_low_freq + msr_hi *
+			((longrun_high_freq - longrun_low_freq) / 100);
+	}
+	policy->cpu = 0;
+}
+
+
+/**
+ * longrun_set_policy - sets a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Sets a new CPUFreq policy on LongRun-capable processors. This function
+ * has to be called with cpufreq_driver locked.
+ */
+static int longrun_set_policy(struct cpufreq_policy *policy)
+{
+	u32 msr_lo, msr_hi;
+	u32 pctg_lo, pctg_hi;
+
+	if (!policy)
+		return -EINVAL;
+
+	if ( longrun_high_freq <= longrun_low_freq ) {
+		/* Assume degenerate Longrun table */
+		pctg_lo = pctg_hi = 100;
+	} else {
+		pctg_lo = (policy->min - longrun_low_freq) /
+			((longrun_high_freq - longrun_low_freq) / 100);
+		pctg_hi = (policy->max - longrun_low_freq) /
+			((longrun_high_freq - longrun_low_freq) / 100);
+	}
+
+	if (pctg_hi > 100)
+		pctg_hi = 100;
+	if (pctg_lo > pctg_hi)
+		pctg_lo = pctg_hi;
+
+	/* performance or economy mode */
+	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+	msr_lo &= 0xFFFFFFFE;
+	switch (policy->policy) {
+	case CPUFREQ_POLICY_PERFORMANCE:
+		msr_lo |= 0x00000001;
+		break;
+	case CPUFREQ_POLICY_POWERSAVE:
+		break;
+	}
+	wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+
+	/* lower and upper boundary */
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	msr_lo &= 0xFFFFFF80;
+	msr_hi &= 0xFFFFFF80;
+	msr_lo |= pctg_lo;
+	msr_hi |= pctg_hi;
+	wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+
+	return 0;
+}
+
+
+/**
+ * longrun_verify_poliy - verifies a new CPUFreq policy
+ * @policy: the policy to verify
+ *
+ * Validates a new CPUFreq policy. This function has to be called with
+ * cpufreq_driver locked.
+ */
+static int longrun_verify_policy(struct cpufreq_policy *policy)
+{
+	if (!policy)
+		return -EINVAL;
+
+	policy->cpu = 0;
+	cpufreq_verify_within_limits(policy,
+		policy->cpuinfo.min_freq,
+		policy->cpuinfo.max_freq);
+
+	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
+	    (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int longrun_get(unsigned int cpu)
+{
+	u32 eax, ebx, ecx, edx;
+
+	if (cpu)
+		return 0;
+
+	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+	dprintk("cpuid eax is %u\n", eax);
+
+	return (eax * 1000);
+}
+
+/**
+ * longrun_determine_freqs - determines the lowest and highest possible core frequency
+ * @low_freq: an int to put the lowest frequency into
+ * @high_freq: an int to put the highest frequency into
+ *
+ * Determines the lowest and highest possible core frequencies on this CPU.
+ * This is necessary to calculate the performance percentage according to
+ * TMTA rules:
+ * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
+ */
+static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
+						   unsigned int *high_freq)
+{
+	u32 msr_lo, msr_hi;
+	u32 save_lo, save_hi;
+	u32 eax, ebx, ecx, edx;
+	u32 try_hi;
+	struct cpuinfo_x86 *c = cpu_data;
+
+	if (!low_freq || !high_freq)
+		return -EINVAL;
+
+	if (cpu_has(c, X86_FEATURE_LRTI)) {
+		/* if the LongRun Table Interface is present, the
+		 * detection is a bit easier:
+		 * For minimum frequency, read out the maximum
+		 * level (msr_hi), write that into "currently
+		 * selected level", and read out the frequency.
+		 * For maximum frequency, read out level zero.
+		 */
+		/* minimum */
+		rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
+		wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
+		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
+		*low_freq = msr_lo * 1000; /* to kHz */
+
+		/* maximum */
+		wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
+		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
+		*high_freq = msr_lo * 1000; /* to kHz */
+
+		dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq);
+
+		if (*low_freq > *high_freq)
+			*low_freq = *high_freq;
+		return 0;
+	}
+
+	/* set the upper border to the value determined during TSC init */
+	*high_freq = (cpu_khz / 1000);
+	*high_freq = *high_freq * 1000;
+	dprintk("high frequency is %u kHz\n", *high_freq);
+
+	/* get current borders */
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	save_lo = msr_lo & 0x0000007F;
+	save_hi = msr_hi & 0x0000007F;
+
+	/* if current perf_pctg is larger than 90%, we need to decrease the
+	 * upper limit to make the calculation more accurate.
+	 */
+	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+	/* try decreasing in 10% steps, some processors react only
+	 * on some barrier values */
+	for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) {
+		/* set to 0 to try_hi perf_pctg */
+		msr_lo &= 0xFFFFFF80;
+		msr_hi &= 0xFFFFFF80;
+		msr_lo |= 0;
+		msr_hi |= try_hi;
+		wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+
+		/* read out current core MHz and current perf_pctg */
+		cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+
+		/* restore values */
+		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
+	}
+	dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
+
+	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
+	 * eqals
+	 * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
+	 *
+	 * high_freq * perf_pctg is stored tempoarily into "ebx".
+	 */
+	ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
+
+	if ((ecx > 95) || (ecx == 0) || (eax < ebx))
+		return -EIO;
+
+	edx = (eax - ebx) / (100 - ecx);
+	*low_freq = edx * 1000; /* back to kHz */
+
+	dprintk("low frequency is %u kHz\n", *low_freq);
+
+	if (*low_freq > *high_freq)
+		*low_freq = *high_freq;
+
+	return 0;
+}
+
+
+static int __init longrun_cpu_init(struct cpufreq_policy *policy)
+{
+	int result = 0;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* detect low and high frequency */
+	result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
+	if (result)
+		return result;
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.min_freq = longrun_low_freq;
+	policy->cpuinfo.max_freq = longrun_high_freq;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	longrun_get_policy(policy);
+
+	return 0;
+}
+
+
+static struct cpufreq_driver longrun_driver = {
+	.flags		= CPUFREQ_CONST_LOOPS,
+	.verify		= longrun_verify_policy,
+	.setpolicy	= longrun_set_policy,
+	.get		= longrun_get,
+	.init		= longrun_cpu_init,
+	.name		= "longrun",
+	.owner		= THIS_MODULE,
+};
+
+
+/**
+ * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
+ *
+ * Initializes the LongRun support.
+ */
+static int __init longrun_init(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+
+	if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
+	    !cpu_has(c, X86_FEATURE_LONGRUN))
+		return -ENODEV;
+
+	return cpufreq_register_driver(&longrun_driver);
+}
+
+
+/**
+ * longrun_exit - unregisters LongRun support
+ */
+static void __exit longrun_exit(void)
+{
+	cpufreq_unregister_driver(&longrun_driver);
+}
+
+
+MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors.");
+MODULE_LICENSE ("GPL");
+
+module_init(longrun_init);
+module_exit(longrun_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
new file mode 100644
index 00000000000..aa622d52c6e
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
@@ -0,0 +1,337 @@
+/*
+ *	Pentium 4/Xeon CPU on demand clock modulation/speed scaling
+ *	(C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ *	(C) 2002 Arjan van de Ven <arjanv@redhat.com>
+ *	(C) 2002 Tora T. Engstad
+ *	All Rights Reserved
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *      The author(s) of this software shall not be held liable for damages
+ *      of any nature resulting due to the use of this software. This
+ *      software is provided AS-IS with no warranties.
+ *	
+ *	Date		Errata			Description
+ *	20020525	N44, O17	12.5% or 25% DC causes lockup
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h> 
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+
+#include <asm/processor.h> 
+#include <asm/msr.h>
+#include <asm/timex.h>
+
+#include "speedstep-lib.h"
+
+#define PFX	"p4-clockmod: "
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg)
+
+/*
+ * Duty Cycle (3bits), note DC_DISABLE is not specified in
+ * intel docs i just use it to mean disable
+ */
+enum {
+	DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
+	DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
+};
+
+#define DC_ENTRIES	8
+
+
+static int has_N44_O17_errata[NR_CPUS];
+static unsigned int stock_freq;
+static struct cpufreq_driver p4clockmod_driver;
+static unsigned int cpufreq_p4_get(unsigned int cpu);
+
+static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
+{
+	u32 l, h;
+
+	if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
+		return -EINVAL;
+
+	rdmsr(MSR_IA32_THERM_STATUS, l, h);
+
+	if (l & 0x01)
+		dprintk("CPU#%d currently thermal throttled\n", cpu);
+
+	if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
+		newstate = DC_38PT;
+
+	rdmsr(MSR_IA32_THERM_CONTROL, l, h);
+	if (newstate == DC_DISABLE) {
+		dprintk("CPU#%d disabling modulation\n", cpu);
+		wrmsr(MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
+	} else {
+		dprintk("CPU#%d setting duty cycle to %d%%\n",
+			cpu, ((125 * newstate) / 10));
+		/* bits 63 - 5	: reserved 
+		 * bit  4	: enable/disable
+		 * bits 3-1	: duty cycle
+		 * bit  0	: reserved
+		 */
+		l = (l & ~14);
+		l = l | (1<<4) | ((newstate & 0x7)<<1);
+		wrmsr(MSR_IA32_THERM_CONTROL, l, h);
+	}
+
+	return 0;
+}
+
+
+static struct cpufreq_frequency_table p4clockmod_table[] = {
+	{DC_RESV, CPUFREQ_ENTRY_INVALID},
+	{DC_DFLT, 0},
+	{DC_25PT, 0},
+	{DC_38PT, 0},
+	{DC_50PT, 0},
+	{DC_64PT, 0},
+	{DC_75PT, 0},
+	{DC_88PT, 0},
+	{DC_DISABLE, 0},
+	{DC_RESV, CPUFREQ_TABLE_END},
+};
+
+
+static int cpufreq_p4_target(struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	unsigned int    newstate = DC_RESV;
+	struct cpufreq_freqs freqs;
+	cpumask_t cpus_allowed;
+	int i;
+
+	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	freqs.old = cpufreq_p4_get(policy->cpu);
+	freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
+
+	if (freqs.new == freqs.old)
+		return 0;
+
+	/* notifiers */
+	for_each_cpu_mask(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	/* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
+	 * Developer's Manual, Volume 3 
+	 */
+	cpus_allowed = current->cpus_allowed;
+
+	for_each_cpu_mask(i, policy->cpus) {
+		cpumask_t this_cpu = cpumask_of_cpu(i);
+
+		set_cpus_allowed(current, this_cpu);
+		BUG_ON(smp_processor_id() != i);
+
+		cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
+	}
+	set_cpus_allowed(current, cpus_allowed);
+
+	/* notifiers */
+	for_each_cpu_mask(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	return 0;
+}
+
+
+static int cpufreq_p4_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
+}
+
+
+static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
+{
+	if ((c->x86 == 0x06) && (c->x86_model == 0x09)) {
+		/* Pentium M (Banias) */
+		printk(KERN_WARNING PFX "Warning: Pentium M detected. "
+		       "The speedstep_centrino module offers voltage scaling"
+		       " in addition of frequency scaling. You should use "
+		       "that instead of p4-clockmod, if possible.\n");
+		return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
+	}
+
+	if ((c->x86 == 0x06) && (c->x86_model == 0x0D)) {
+		/* Pentium M (Dothan) */
+		printk(KERN_WARNING PFX "Warning: Pentium M detected. "
+		       "The speedstep_centrino module offers voltage scaling"
+		       " in addition of frequency scaling. You should use "
+		       "that instead of p4-clockmod, if possible.\n");
+		/* on P-4s, the TSC runs with constant frequency independent whether
+		 * throttling is active or not. */
+		p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+		return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
+	}
+
+	if (c->x86 != 0xF) {
+		printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <linux@brodo.de>\n");
+		return 0;
+	}
+
+	/* on P-4s, the TSC runs with constant frequency independent whether
+	 * throttling is active or not. */
+	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+	if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) {
+		printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
+		       "The speedstep-ich or acpi cpufreq modules offer "
+		       "voltage scaling in addition of frequency scaling. "
+		       "You should use either one instead of p4-clockmod, "
+		       "if possible.\n");
+		return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M);
+	}
+
+	return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D);
+}
+
+ 
+
+static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+	int cpuid = 0;
+	unsigned int i;
+
+#ifdef CONFIG_SMP
+	policy->cpus = cpu_sibling_map[policy->cpu];
+#endif
+
+	/* Errata workaround */
+	cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
+	switch (cpuid) {
+	case 0x0f07:
+	case 0x0f0a:
+	case 0x0f11:
+	case 0x0f12:
+		has_N44_O17_errata[policy->cpu] = 1;
+		dprintk("has errata -- disabling low frequencies\n");
+	}
+	
+	/* get max frequency */
+	stock_freq = cpufreq_p4_get_frequency(c);
+	if (!stock_freq)
+		return -EINVAL;
+
+	/* table init */
+	for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if ((i<2) && (has_N44_O17_errata[policy->cpu]))
+			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+		else
+			p4clockmod_table[i].frequency = (stock_freq * i)/8;
+	}
+	cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
+	
+	/* cpuinfo and default policy values */
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.transition_latency = 1000000; /* assumed */
+	policy->cur = stock_freq;
+
+	return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
+}
+
+
+static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);    
+	return 0;
+}
+
+static unsigned int cpufreq_p4_get(unsigned int cpu)
+{
+	cpumask_t cpus_allowed;
+	u32 l, h;
+
+	cpus_allowed = current->cpus_allowed;
+
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	BUG_ON(smp_processor_id() != cpu);
+
+	rdmsr(MSR_IA32_THERM_CONTROL, l, h);
+
+	set_cpus_allowed(current, cpus_allowed);
+
+	if (l & 0x10) {
+		l = l >> 1;
+		l &= 0x7;
+	} else
+		l = DC_DISABLE;
+
+	if (l != DC_DISABLE)
+		return (stock_freq * l / 8);
+
+	return stock_freq;
+}
+
+static struct freq_attr* p4clockmod_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver p4clockmod_driver = {
+	.verify 	= cpufreq_p4_verify,
+	.target		= cpufreq_p4_target,
+	.init		= cpufreq_p4_cpu_init,
+	.exit		= cpufreq_p4_cpu_exit,
+	.get		= cpufreq_p4_get,
+	.name		= "p4-clockmod",
+	.owner		= THIS_MODULE,
+	.attr		= p4clockmod_attr,
+};
+
+
+static int __init cpufreq_p4_init(void)
+{	
+	struct cpuinfo_x86 *c = cpu_data;
+	int ret;
+
+	/*
+	 * THERM_CONTROL is architectural for IA32 now, so 
+	 * we can rely on the capability checks
+	 */
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return -ENODEV;
+
+	if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) ||
+		!test_bit(X86_FEATURE_ACC, c->x86_capability))
+		return -ENODEV;
+
+	ret = cpufreq_register_driver(&p4clockmod_driver);
+	if (!ret)
+		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
+
+	return (ret);
+}
+
+
+static void __exit cpufreq_p4_exit(void)
+{
+	cpufreq_unregister_driver(&p4clockmod_driver);
+}
+
+
+MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>");
+MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
+MODULE_LICENSE ("GPL");
+
+late_initcall(cpufreq_p4_init);
+module_exit(cpufreq_p4_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
new file mode 100644
index 00000000000..222f8cfe3c5
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
@@ -0,0 +1,256 @@
+/*
+ *  This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
+ *  (C) 2000-2003  Dave Jones, Arjan van de Ven, Janne P�nk�l�, Dominik Brodowski.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h> 
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+
+
+#define POWERNOW_IOPORT 0xfff0         /* it doesn't matter where, as long
+					  as it is unused */
+
+static unsigned int                     busfreq;   /* FSB, in 10 kHz */
+static unsigned int                     max_multiplier;
+
+
+/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
+static struct cpufreq_frequency_table clock_ratio[] = {
+	{45,  /* 000 -> 4.5x */ 0},
+	{50,  /* 001 -> 5.0x */ 0},
+	{40,  /* 010 -> 4.0x */ 0},
+	{55,  /* 011 -> 5.5x */ 0},
+	{20,  /* 100 -> 2.0x */ 0},
+	{30,  /* 101 -> 3.0x */ 0},
+	{60,  /* 110 -> 6.0x */ 0},
+	{35,  /* 111 -> 3.5x */ 0},
+	{0, CPUFREQ_TABLE_END}
+};
+
+
+/**
+ * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
+ *
+ *   Returns the current setting of the frequency multiplier. Core clock
+ * speed is frequency of the Front-Side Bus multiplied with this value.
+ */
+static int powernow_k6_get_cpu_multiplier(void)
+{
+	u64             invalue = 0;
+	u32             msrval;
+	
+	msrval = POWERNOW_IOPORT + 0x1;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
+	invalue=inl(POWERNOW_IOPORT + 0x8);
+	msrval = POWERNOW_IOPORT + 0x0;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
+
+	return clock_ratio[(invalue >> 5)&7].index;
+}
+
+
+/**
+ * powernow_k6_set_state - set the PowerNow! multiplier
+ * @best_i: clock_ratio[best_i] is the target multiplier
+ *
+ *   Tries to change the PowerNow! multiplier
+ */
+static void powernow_k6_set_state (unsigned int best_i)
+{
+	unsigned long           outvalue=0, invalue=0;
+	unsigned long           msrval;
+	struct cpufreq_freqs    freqs;
+
+	if (clock_ratio[best_i].index > max_multiplier) {
+		printk(KERN_ERR "cpufreq: invalid target frequency\n");
+		return;
+	}
+
+	freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
+	freqs.new = busfreq * clock_ratio[best_i].index;
+	freqs.cpu = 0; /* powernow-k6.c is UP only driver */
+	
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* we now need to transform best_i to the BVC format, see AMD#23446 */
+
+	outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
+
+	msrval = POWERNOW_IOPORT + 0x1;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
+	invalue=inl(POWERNOW_IOPORT + 0x8);
+	invalue = invalue & 0xf;
+	outvalue = outvalue | invalue;
+	outl(outvalue ,(POWERNOW_IOPORT + 0x8));
+	msrval = POWERNOW_IOPORT + 0x0;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return;
+}
+
+
+/**
+ * powernow_k6_verify - verifies a new CPUfreq policy
+ * @policy: new policy
+ *
+ * Policy must be within lowest and highest possible CPU Frequency,
+ * and at least one possible state must be within min and max.
+ */
+static int powernow_k6_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
+}
+
+
+/**
+ * powernow_k6_setpolicy - sets a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * sets a new CPUFreq policy
+ */
+static int powernow_k6_target (struct cpufreq_policy *policy,
+			       unsigned int target_freq,
+			       unsigned int relation)
+{
+	unsigned int    newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	powernow_k6_set_state(newstate);
+
+	return 0;
+}
+
+
+static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	int result;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* get frequencies */
+	max_multiplier = powernow_k6_get_cpu_multiplier();
+	busfreq = cpu_khz / max_multiplier;
+
+	/* table init */
+ 	for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if (clock_ratio[i].index > max_multiplier)
+			clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
+		else
+			clock_ratio[i].frequency = busfreq * clock_ratio[i].index;
+	}
+
+	/* cpuinfo and default policy values */
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = busfreq * max_multiplier;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
+	if (result)
+		return (result);
+
+	cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
+
+	return 0;
+}
+
+
+static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	for (i=0; i<8; i++) {
+		if (i==max_multiplier)
+			powernow_k6_set_state(i);
+	}
+	cpufreq_frequency_table_put_attr(policy->cpu);
+ 	return 0;
+}
+
+static unsigned int powernow_k6_get(unsigned int cpu)
+{
+	return busfreq * powernow_k6_get_cpu_multiplier();
+}
+
+static struct freq_attr* powernow_k6_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver powernow_k6_driver = {
+	.verify 	= powernow_k6_verify,
+	.target 	= powernow_k6_target,
+	.init		= powernow_k6_cpu_init,
+	.exit		= powernow_k6_cpu_exit,
+	.get		= powernow_k6_get,
+	.name		= "powernow-k6",
+	.owner		= THIS_MODULE,
+	.attr		= powernow_k6_attr,
+};
+
+
+/**
+ * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
+ *
+ *   Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
+ * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
+ * on success.
+ */
+static int __init powernow_k6_init(void)
+{	
+	struct cpuinfo_x86      *c = cpu_data;
+
+	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
+		((c->x86_model != 12) && (c->x86_model != 13)))
+		return -ENODEV;
+
+	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
+		printk("cpufreq: PowerNow IOPORT region already used.\n");
+		return -EIO;
+	}
+
+	if (cpufreq_register_driver(&powernow_k6_driver)) {
+		release_region (POWERNOW_IOPORT, 16);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+/**
+ * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
+ *
+ *   Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
+ */
+static void __exit powernow_k6_exit(void)
+{
+	cpufreq_unregister_driver(&powernow_k6_driver);
+	release_region (POWERNOW_IOPORT, 16);
+}
+
+
+MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
+MODULE_LICENSE ("GPL");
+
+module_init(powernow_k6_init);
+module_exit(powernow_k6_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
new file mode 100644
index 00000000000..913f652623d
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -0,0 +1,690 @@
+/*
+ *  AMD K7 Powernow driver.
+ *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
+ *  (C) 2003-2004 Dave Jones <davej@redhat.com>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by AMD.
+ *
+ * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt.
+ * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
+ * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect.
+ * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dmi.h>
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+#endif
+
+#include "powernow-k7.h"
+
+#define PFX "powernow: "
+
+
+struct psb_s {
+	u8 signature[10];
+	u8 tableversion;
+	u8 flags;
+	u16 settlingtime;
+	u8 reserved1;
+	u8 numpst;
+};
+
+struct pst_s {
+	u32 cpuid;
+	u8 fsbspeed;
+	u8 maxfid;
+	u8 startvid;
+	u8 numpstates;
+};
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+union powernow_acpi_control_t {
+	struct {
+		unsigned long fid:5,
+		vid:5,
+		sgtc:20,
+		res1:2;
+	} bits;
+	unsigned long val;
+};
+#endif
+
+#ifdef CONFIG_CPU_FREQ_DEBUG
+/* divide by 1000 to get VCore voltage in V. */
+static int mobile_vid_table[32] = {
+    2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
+    1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
+    1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
+    1075, 1050, 1025, 1000, 975, 950, 925, 0,
+};
+#endif
+
+/* divide by 10 to get FID. */
+static int fid_codes[32] = {
+    110, 115, 120, 125, 50, 55, 60, 65,
+    70, 75, 80, 85, 90, 95, 100, 105,
+    30, 190, 40, 200, 130, 135, 140, 210,
+    150, 225, 160, 165, 170, 180, -1, -1,
+};
+
+/* This parameter is used in order to force ACPI instead of legacy method for
+ * configuration purpose.
+ */
+
+static int acpi_force;
+
+static struct cpufreq_frequency_table *powernow_table;
+
+static unsigned int can_scale_bus;
+static unsigned int can_scale_vid;
+static unsigned int minimum_speed=-1;
+static unsigned int maximum_speed;
+static unsigned int number_scales;
+static unsigned int fsb;
+static unsigned int latency;
+static char have_a0;
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg)
+
+static int check_fsb(unsigned int fsbspeed)
+{
+	int delta;
+	unsigned int f = fsb / 1000;
+
+	delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
+	return (delta < 5);
+}
+
+static int check_powernow(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	unsigned int maxei, eax, ebx, ecx, edx;
+
+	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
+#ifdef MODULE
+		printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n");
+#endif
+		return 0;
+	}
+
+	/* Get maximum capabilities */
+	maxei = cpuid_eax (0x80000000);
+	if (maxei < 0x80000007) {	/* Any powernow info ? */
+#ifdef MODULE
+		printk (KERN_INFO PFX "No powernow capabilities detected\n");
+#endif
+		return 0;
+	}
+
+	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
+		printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n");
+		have_a0 = 1;
+	}
+
+	cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+	/* Check we can actually do something before we say anything.*/
+	if (!(edx & (1 << 1 | 1 << 2)))
+		return 0;
+
+	printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+
+	if (edx & 1 << 1) {
+		printk ("frequency");
+		can_scale_bus=1;
+	}
+
+	if ((edx & (1 << 1 | 1 << 2)) == 0x6)
+		printk (" and ");
+
+	if (edx & 1 << 2) {
+		printk ("voltage");
+		can_scale_vid=1;
+	}
+
+	printk (".\n");
+	return 1;
+}
+
+
+static int get_ranges (unsigned char *pst)
+{
+	unsigned int j;
+	unsigned int speed;
+	u8 fid, vid;
+
+	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
+	if (!powernow_table)
+		return -ENOMEM;
+	memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1)));
+
+	for (j=0 ; j < number_scales; j++) {
+		fid = *pst++;
+
+		powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
+		powernow_table[j].index = fid; /* lower 8 bits */
+
+		speed = powernow_table[j].frequency;
+
+		if ((fid_codes[fid] % 10)==5) {
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+			if (have_a0 == 1)
+				powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID;
+#endif
+		}
+
+		if (speed < minimum_speed)
+			minimum_speed = speed;
+		if (speed > maximum_speed)
+			maximum_speed = speed;
+
+		vid = *pst++;
+		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
+
+		dprintk ("   FID: 0x%x (%d.%dx [%dMHz])  "
+			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 
+			 fid_codes[fid] % 10, speed/1000, vid,	
+			 mobile_vid_table[vid]/1000,
+			 mobile_vid_table[vid]%1000);
+	}
+	powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
+	powernow_table[number_scales].index = 0;
+
+	return 0;
+}
+
+
+static void change_FID(int fid)
+{
+	union msr_fidvidctl fidvidctl;
+
+	rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	if (fidvidctl.bits.FID != fid) {
+		fidvidctl.bits.SGTC = latency;
+		fidvidctl.bits.FID = fid;
+		fidvidctl.bits.VIDC = 0;
+		fidvidctl.bits.FIDC = 1;
+		wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	}
+}
+
+
+static void change_VID(int vid)
+{
+	union msr_fidvidctl fidvidctl;
+
+	rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	if (fidvidctl.bits.VID != vid) {
+		fidvidctl.bits.SGTC = latency;
+		fidvidctl.bits.VID = vid;
+		fidvidctl.bits.FIDC = 0;
+		fidvidctl.bits.VIDC = 1;
+		wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	}
+}
+
+
+static void change_speed (unsigned int index)
+{
+	u8 fid, vid;
+	struct cpufreq_freqs freqs;
+	union msr_fidvidstatus fidvidstatus;
+	int cfid;
+
+	/* fid are the lower 8 bits of the index we stored into
+	 * the cpufreq frequency table in powernow_decode_bios,
+	 * vid are the upper 8 bits.
+	 */
+
+	fid = powernow_table[index].index & 0xFF;
+	vid = (powernow_table[index].index & 0xFF00) >> 8;
+
+	freqs.cpu = 0;
+
+	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	cfid = fidvidstatus.bits.CFID;
+	freqs.old = fsb * fid_codes[cfid] / 10;
+
+	freqs.new = powernow_table[index].frequency;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* Now do the magic poking into the MSRs.  */
+
+	if (have_a0 == 1)	/* A0 errata 5 */
+		local_irq_disable();
+
+	if (freqs.old > freqs.new) {
+		/* Going down, so change FID first */
+		change_FID(fid);
+		change_VID(vid);
+	} else {
+		/* Going up, so change VID first */
+		change_VID(vid);
+		change_FID(fid);
+	}
+
+
+	if (have_a0 == 1)
+		local_irq_enable();
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+}
+
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+
+static struct acpi_processor_performance *acpi_processor_perf;
+
+static int powernow_acpi_init(void)
+{
+	int i;
+	int retval = 0;
+	union powernow_acpi_control_t pc;
+
+	if (acpi_processor_perf != NULL && powernow_table != NULL) {
+		retval = -EINVAL;
+		goto err0;
+	}
+
+	acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance),
+				      GFP_KERNEL);
+
+	if (!acpi_processor_perf) {
+		retval = -ENOMEM;
+		goto err0;
+	}
+
+	memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance));
+
+	if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
+		retval = -EIO;
+		goto err1;
+	}
+
+	if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	number_scales = acpi_processor_perf->state_count;
+
+	if (number_scales < 2) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
+	if (!powernow_table) {
+		retval = -ENOMEM;
+		goto err2;
+	}
+
+	memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table)));
+
+	pc.val = (unsigned long) acpi_processor_perf->states[0].control;
+	for (i = 0; i < number_scales; i++) {
+		u8 fid, vid;
+		unsigned int speed;
+
+		pc.val = (unsigned long) acpi_processor_perf->states[i].control;
+		dprintk ("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+			 i,
+			 (u32) acpi_processor_perf->states[i].core_frequency,
+			 (u32) acpi_processor_perf->states[i].power,
+			 (u32) acpi_processor_perf->states[i].transition_latency,
+			 (u32) acpi_processor_perf->states[i].control,
+			 pc.bits.sgtc);
+
+		vid = pc.bits.vid;
+		fid = pc.bits.fid;
+
+		powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
+		powernow_table[i].index = fid; /* lower 8 bits */
+		powernow_table[i].index |= (vid << 8); /* upper 8 bits */
+
+		speed = powernow_table[i].frequency;
+
+		if ((fid_codes[fid] % 10)==5) {
+			if (have_a0 == 1)
+				powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+		}
+
+		dprintk ("   FID: 0x%x (%d.%dx [%dMHz])  "
+			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 
+			 fid_codes[fid] % 10, speed/1000, vid,	
+			 mobile_vid_table[vid]/1000,
+			 mobile_vid_table[vid]%1000);
+
+		if (latency < pc.bits.sgtc)
+			latency = pc.bits.sgtc;
+
+		if (speed < minimum_speed)
+			minimum_speed = speed;
+		if (speed > maximum_speed)
+			maximum_speed = speed;
+	}
+
+	powernow_table[i].frequency = CPUFREQ_TABLE_END;
+	powernow_table[i].index = 0;
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	return 0;
+
+err2:
+	acpi_processor_unregister_performance(acpi_processor_perf, 0);
+err1:
+	kfree(acpi_processor_perf);
+err0:
+	printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
+	acpi_processor_perf = NULL;
+	return retval;
+}
+#else
+static int powernow_acpi_init(void)
+{
+	printk(KERN_INFO PFX "no support for ACPI processor found."
+	       "  Please recompile your kernel with ACPI processor\n");
+	return -EINVAL;
+}
+#endif
+
+static int powernow_decode_bios (int maxfid, int startvid)
+{
+	struct psb_s *psb;
+	struct pst_s *pst;
+	unsigned int i, j;
+	unsigned char *p;
+	unsigned int etuple;
+	unsigned int ret;
+
+	etuple = cpuid_eax(0x80000001);
+
+	for (i=0xC0000; i < 0xffff0 ; i+=16) {
+
+		p = phys_to_virt(i);
+
+		if (memcmp(p, "AMDK7PNOW!",  10) == 0){
+			dprintk ("Found PSB header at %p\n", p);
+			psb = (struct psb_s *) p;
+			dprintk ("Table version: 0x%x\n", psb->tableversion);
+			if (psb->tableversion != 0x12) {
+				printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n");
+				return -ENODEV;
+			}
+
+			dprintk ("Flags: 0x%x\n", psb->flags);
+			if ((psb->flags & 1)==0) {
+				dprintk ("Mobile voltage regulator\n");
+			} else {
+				dprintk ("Desktop voltage regulator\n");
+			}
+
+			latency = psb->settlingtime;
+			if (latency < 100) {
+				printk (KERN_INFO PFX "BIOS set settling time to %d microseconds."
+						"Should be at least 100. Correcting.\n", latency);
+				latency = 100;
+			}
+			dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime);
+			dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst);
+
+			p += sizeof (struct psb_s);
+
+			pst = (struct pst_s *) p;
+
+			for (i = 0 ; i <psb->numpst; i++) {
+				pst = (struct pst_s *) p;
+				number_scales = pst->numpstates;
+
+				if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) &&
+				    (maxfid==pst->maxfid) && (startvid==pst->startvid))
+				{
+					dprintk ("PST:%d (@%p)\n", i, pst);
+					dprintk (" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n", 
+						 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
+
+					ret = get_ranges ((char *) pst + sizeof (struct pst_s));
+					return ret;
+
+				} else {
+					p = (char *) pst + sizeof (struct pst_s);
+					for (j=0 ; j < number_scales; j++)
+						p+=2;
+				}
+			}
+			printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple);
+			printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n");
+
+			return -EINVAL;
+		}
+		p++;
+	}
+
+	return -ENODEV;
+}
+
+
+static int powernow_target (struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int newstate;
+
+	if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate))
+		return -EINVAL;
+
+	change_speed(newstate);
+
+	return 0;
+}
+
+
+static int powernow_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, powernow_table);
+}
+
+/*
+ * We use the fact that the bus frequency is somehow
+ * a multiple of 100000/3 khz, then we compute sgtc according
+ * to this multiple.
+ * That way, we match more how AMD thinks all of that work.
+ * We will then get the same kind of behaviour already tested under
+ * the "well-known" other OS.
+ */
+static int __init fixup_sgtc(void)
+{
+	unsigned int sgtc;
+	unsigned int m;
+
+	m = fsb / 3333;
+	if ((m % 10) >= 5)
+		m += 5;
+
+	m /= 10;
+
+	sgtc = 100 * m * latency;
+	sgtc = sgtc / 3;
+	if (sgtc > 0xfffff) {
+		printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
+		sgtc = 0xfffff;
+	}
+	return sgtc;
+}
+
+static unsigned int powernow_get(unsigned int cpu)
+{
+	union msr_fidvidstatus fidvidstatus;
+	unsigned int cfid;
+
+	if (cpu)
+		return 0;
+	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	cfid = fidvidstatus.bits.CFID;
+
+	return (fsb * fid_codes[cfid] / 10);
+}
+
+
+static int __init acer_cpufreq_pst(struct dmi_system_id *d)
+{
+	printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident);
+	printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
+	printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n");
+	return 0;
+}
+
+/*
+ * Some Athlon laptops have really fucked PST tables.
+ * A BIOS update is all that can save them.
+ * Mention this, and disable cpufreq.
+ */
+static struct dmi_system_id __initdata powernow_dmi_table[] = {
+	{
+		.callback = acer_cpufreq_pst,
+		.ident = "Acer Aspire",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
+			DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
+		},
+	},
+	{ }
+};
+
+static int __init powernow_cpu_init (struct cpufreq_policy *policy)
+{
+	union msr_fidvidstatus fidvidstatus;
+	int result;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+
+	/* A K7 with powernow technology is set to max frequency by BIOS */
+	fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.MFID];
+	if (!fsb) {
+		printk(KERN_WARNING PFX "can not determine bus frequency\n");
+		return -EINVAL;
+	}
+	dprintk("FSB: %3d.%03d MHz\n", fsb/1000, fsb%1000);
+
+	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
+		printk (KERN_INFO PFX "PSB/PST known to be broken.  Trying ACPI instead\n");
+		result = powernow_acpi_init();
+	} else {
+		result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID);
+		if (result) {
+			printk (KERN_INFO PFX "Trying ACPI perflib\n");
+			maximum_speed = 0;
+			minimum_speed = -1;
+			latency = 0;
+			result = powernow_acpi_init();
+			if (result) {
+				printk (KERN_INFO PFX "ACPI and legacy methods failed\n");
+				printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.shtml\n");
+			}
+		} else {
+			/* SGTC use the bus clock as timer */
+			latency = fixup_sgtc();
+			printk(KERN_INFO PFX "SGTC: %d\n", latency);
+		}
+	}
+
+	if (result)
+		return result;
+
+	printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
+				minimum_speed/1000, maximum_speed/1000);
+
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+
+	policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency);
+
+	policy->cur = powernow_get(0);
+
+	cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
+
+	return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
+}
+
+static int powernow_cpu_exit (struct cpufreq_policy *policy) {
+	cpufreq_frequency_table_put_attr(policy->cpu);
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+	if (acpi_processor_perf) {
+		acpi_processor_unregister_performance(acpi_processor_perf, 0);
+		kfree(acpi_processor_perf);
+	}
+#endif
+
+	if (powernow_table)
+		kfree(powernow_table);
+
+	return 0;
+}
+
+static struct freq_attr* powernow_table_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver powernow_driver = {
+	.verify	= powernow_verify,
+	.target	= powernow_target,
+	.get	= powernow_get,
+	.init	= powernow_cpu_init,
+	.exit	= powernow_cpu_exit,
+	.name	= "powernow-k7",
+	.owner	= THIS_MODULE,
+	.attr	= powernow_table_attr,
+};
+
+static int __init powernow_init (void)
+{
+	if (check_powernow()==0)
+		return -ENODEV;
+	return cpufreq_register_driver(&powernow_driver);
+}
+
+
+static void __exit powernow_exit (void)
+{
+	cpufreq_unregister_driver(&powernow_driver);
+}
+
+module_param(acpi_force,  int, 0444);
+MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
+
+MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
+MODULE_LICENSE ("GPL");
+
+late_initcall(powernow_init);
+module_exit(powernow_exit);
+
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h
new file mode 100644
index 00000000000..f8a63b3664e
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h
@@ -0,0 +1,44 @@
+/*
+ *  $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $
+ *  (C) 2003 Dave Jones.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  AMD-specific information
+ *
+ */
+
+union msr_fidvidctl {
+	struct {
+		unsigned FID:5,			// 4:0
+		reserved1:3,	// 7:5
+		VID:5,			// 12:8
+		reserved2:3,	// 15:13
+		FIDC:1,			// 16
+		VIDC:1,			// 17
+		reserved3:2,	// 19:18
+		FIDCHGRATIO:1,	// 20
+		reserved4:11,	// 31-21
+		SGTC:20,		// 32:51
+		reserved5:12;	// 63:52
+	} bits;
+	unsigned long long val;
+};
+
+union msr_fidvidstatus {
+	struct {
+		unsigned CFID:5,			// 4:0
+		reserved1:3,	// 7:5
+		SFID:5,			// 12:8
+		reserved2:3,	// 15:13
+		MFID:5,			// 20:16
+		reserved3:11,	// 31:21
+		CVID:5,			// 36:32
+		reserved4:3,	// 39:37
+		SVID:5,			// 44:40
+		reserved5:3,	// 47:45
+		MVID:5,			// 52:48
+		reserved6:11;	// 63:53
+	} bits;
+	unsigned long long val;
+};
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
new file mode 100644
index 00000000000..a65ff7e32e5
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -0,0 +1,1135 @@
+/*
+ *   (c) 2003, 2004 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ *
+ *  Support : paul.devriendt@amd.com
+ *
+ *  Based on the powernow-k7.c module written by Dave Jones.
+ *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
+ *  (C) 2004 Dominik Brodowski <linux@brodo.de>
+ *  (C) 2004 Pavel Machek <pavel@suse.cz>
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by AMD.
+ *
+ *  Valuable input gratefully received from Dave Jones, Pavel Machek,
+ *  Dominik Brodowski, and others.
+ *  Processor information obtained from Chapter 9 (Power and Thermal Management)
+ *  of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
+ *  Opteron Processors" available for download from www.amd.com
+ *
+ *  Tables for specific CPUs can be infrerred from
+ *	http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+#endif
+
+#define PFX "powernow-k8: "
+#define BFX PFX "BIOS error: "
+#define VERSION "version 1.00.09e"
+#include "powernow-k8.h"
+
+/* serialize freq changes  */
+static DECLARE_MUTEX(fidvid_sem);
+
+static struct powernow_k8_data *powernow_data[NR_CPUS];
+
+/* Return a frequency in MHz, given an input fid */
+static u32 find_freq_from_fid(u32 fid)
+{
+	return 800 + (fid * 100);
+}
+
+/* Return a frequency in KHz, given an input fid */
+static u32 find_khz_freq_from_fid(u32 fid)
+{
+	return 1000 * find_freq_from_fid(fid);
+}
+
+/* Return a voltage in miliVolts, given an input vid */
+static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid)
+{
+	return 1550-vid*25;
+}
+
+/* Return the vco fid for an input fid
+ *
+ * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
+ * only from corresponding high fids. This returns "high" fid corresponding to
+ * "low" one.
+ */
+static u32 convert_fid_to_vco_fid(u32 fid)
+{
+	if (fid < HI_FID_TABLE_BOTTOM) {
+		return 8 + (2 * fid);
+	} else {
+		return fid;
+	}
+}
+
+/*
+ * Return 1 if the pending bit is set. Unless we just instructed the processor
+ * to transition to a new state, seeing this bit set is really bad news.
+ */
+static int pending_bit_stuck(void)
+{
+	u32 lo, hi;
+
+	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
+}
+
+/*
+ * Update the global current fid / vid values from the status msr.
+ * Returns 1 on error.
+ */
+static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
+{
+	u32 lo, hi;
+	u32 i = 0;
+
+	lo = MSR_S_LO_CHANGE_PENDING;
+	while (lo & MSR_S_LO_CHANGE_PENDING) {
+		if (i++ > 0x1000000) {
+			printk(KERN_ERR PFX "detected change pending stuck\n");
+			return 1;
+		}
+		rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	}
+
+	data->currvid = hi & MSR_S_HI_CURRENT_VID;
+	data->currfid = lo & MSR_S_LO_CURRENT_FID;
+
+	return 0;
+}
+
+/* the isochronous relief time */
+static void count_off_irt(struct powernow_k8_data *data)
+{
+	udelay((1 << data->irt) * 10);
+	return;
+}
+
+/* the voltage stabalization time */
+static void count_off_vst(struct powernow_k8_data *data)
+{
+	udelay(data->vstable * VST_UNITS_20US);
+	return;
+}
+
+/* need to init the control msr to a safe value (for each cpu) */
+static void fidvid_msr_init(void)
+{
+	u32 lo, hi;
+	u8 fid, vid;
+
+	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	vid = hi & MSR_S_HI_CURRENT_VID;
+	fid = lo & MSR_S_LO_CURRENT_FID;
+	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
+	hi = MSR_C_HI_STP_GNT_BENIGN;
+	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
+	wrmsr(MSR_FIDVID_CTL, lo, hi);
+}
+
+
+/* write the new fid value along with the other control fields to the msr */
+static int write_new_fid(struct powernow_k8_data *data, u32 fid)
+{
+	u32 lo;
+	u32 savevid = data->currvid;
+
+	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
+		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
+		return 1;
+	}
+
+	lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+
+	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
+		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
+
+	wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	count_off_irt(data);
+
+	if (savevid != data->currvid) {
+		printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n",
+		       savevid, data->currvid);
+		return 1;
+	}
+
+	if (fid != data->currfid) {
+		printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
+		        data->currfid);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Write a new vid to the hardware */
+static int write_new_vid(struct powernow_k8_data *data, u32 vid)
+{
+	u32 lo;
+	u32 savefid = data->currfid;
+
+	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
+		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
+		return 1;
+	}
+
+	lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+
+	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
+		vid, lo, STOP_GRANT_5NS);
+
+	wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (savefid != data->currfid) {
+		printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",
+		       savefid, data->currfid);
+		return 1;
+	}
+
+	if (vid != data->currvid) {
+		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid,
+				data->currvid);
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Reduce the vid by the max of step or reqvid.
+ * Decreasing vid codes represent increasing voltages:
+ * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off.
+ */
+static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
+{
+	if ((data->currvid - reqvid) > step)
+		reqvid = data->currvid - step;
+
+	if (write_new_vid(data, reqvid))
+		return 1;
+
+	count_off_vst(data);
+
+	return 0;
+}
+
+/* Change the fid and vid, by the 3 phases. */
+static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
+{
+	if (core_voltage_pre_transition(data, reqvid))
+		return 1;
+
+	if (core_frequency_transition(data, reqfid))
+		return 1;
+
+	if (core_voltage_post_transition(data, reqvid))
+		return 1;
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
+		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n",
+				smp_processor_id(),
+				reqfid, reqvid, data->currfid, data->currvid);
+		return 1;
+	}
+
+	dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
+		smp_processor_id(), data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 1 - core voltage transition ... setup voltage */
+static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid)
+{
+	u32 rvosteps = data->rvo;
+	u32 savefid = data->currfid;
+
+	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid, reqvid, data->rvo);
+
+	while (data->currvid > reqvid) {
+		dprintk("ph1: curr 0x%x, req vid 0x%x\n",
+			data->currvid, reqvid);
+		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
+			return 1;
+	}
+
+	while ((rvosteps > 0)  && ((data->rvo + data->currvid) > reqvid)) {
+		if (data->currvid == 0) {
+			rvosteps = 0;
+		} else {
+			dprintk("ph1: changing vid for rvo, req 0x%x\n",
+				data->currvid - 1);
+			if (decrease_vid_code_by_step(data, data->currvid - 1, 1))
+				return 1;
+			rvosteps--;
+		}
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (savefid != data->currfid) {
+		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid);
+		return 1;
+	}
+
+	dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 2 - core frequency transition */
+static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
+{
+	u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid;
+
+	if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+		printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
+			reqfid, data->currfid);
+		return 1;
+	}
+
+	if (data->currfid == reqfid) {
+		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
+		return 0;
+	}
+
+	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid, reqfid);
+
+	vcoreqfid = convert_fid_to_vco_fid(reqfid);
+	vcocurrfid = convert_fid_to_vco_fid(data->currfid);
+	vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
+	    : vcoreqfid - vcocurrfid;
+
+	while (vcofiddiff > 2) {
+		if (reqfid > data->currfid) {
+			if (data->currfid > LO_FID_TABLE_TOP) {
+				if (write_new_fid(data, data->currfid + 2)) {
+					return 1;
+				}
+			} else {
+				if (write_new_fid
+				    (data, 2 + convert_fid_to_vco_fid(data->currfid))) {
+					return 1;
+				}
+			}
+		} else {
+			if (write_new_fid(data, data->currfid - 2))
+				return 1;
+		}
+
+		vcocurrfid = convert_fid_to_vco_fid(data->currfid);
+		vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
+		    : vcoreqfid - vcocurrfid;
+	}
+
+	if (write_new_fid(data, reqfid))
+		return 1;
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (data->currfid != reqfid) {
+		printk(KERN_ERR PFX
+			"ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n",
+			data->currfid, reqfid);
+		return 1;
+	}
+
+	if (savevid != data->currvid) {
+		printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
+			savevid, data->currvid);
+		return 1;
+	}
+
+	dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 3 - core voltage transition flow ... jump to the final vid. */
+static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid)
+{
+	u32 savefid = data->currfid;
+	u32 savereqvid = reqvid;
+
+	dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid);
+
+	if (reqvid != data->currvid) {
+		if (write_new_vid(data, reqvid))
+			return 1;
+
+		if (savefid != data->currfid) {
+			printk(KERN_ERR PFX
+			       "ph3: bad fid change, save 0x%x, curr 0x%x\n",
+			       savefid, data->currfid);
+			return 1;
+		}
+
+		if (data->currvid != reqvid) {
+			printk(KERN_ERR PFX
+			       "ph3: failed vid transition\n, req 0x%x, curr 0x%x",
+			       reqvid, data->currvid);
+			return 1;
+		}
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (savereqvid != data->currvid) {
+		dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
+		return 1;
+	}
+
+	if (savefid != data->currfid) {
+		dprintk("ph3 failed, currfid changed 0x%x\n",
+			data->currfid);
+		return 1;
+	}
+
+	dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+static int check_supported_cpu(unsigned int cpu)
+{
+	cpumask_t oldmask = CPU_MASK_ALL;
+	u32 eax, ebx, ecx, edx;
+	unsigned int rc = 0;
+
+	oldmask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	schedule();
+
+	if (smp_processor_id() != cpu) {
+		printk(KERN_ERR "limiting to cpu %u failed\n", cpu);
+		goto out;
+	}
+
+	if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		goto out;
+
+	eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
+	    ((eax & CPUID_XFAM) != CPUID_XFAM_K8) ||
+	    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_E)) {
+		printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
+		goto out;
+	}
+
+	eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
+	if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
+		printk(KERN_INFO PFX
+		       "No frequency change capabilities detected\n");
+		goto out;
+	}
+
+	cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+	if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
+		printk(KERN_INFO PFX "Power state transitions not supported\n");
+		goto out;
+	}
+
+	rc = 1;
+
+out:
+	set_cpus_allowed(current, oldmask);
+	schedule();
+	return rc;
+
+}
+
+static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+{
+	unsigned int j;
+	u8 lastfid = 0xff;
+
+	for (j = 0; j < data->numps; j++) {
+		if (pst[j].vid > LEAST_VID) {
+			printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid);
+			return -EINVAL;
+		}
+		if (pst[j].vid < data->rvo) {	/* vid + rvo >= 0 */
+			printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j);
+			return -ENODEV;
+		}
+		if (pst[j].vid < maxvid + data->rvo) {	/* vid + rvo >= maxvid */
+			printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j);
+			return -ENODEV;
+		}
+		if ((pst[j].fid > MAX_FID)
+		    || (pst[j].fid & 1)
+		    || (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) {
+			/* Only first fid is allowed to be in "low" range */
+			printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid);
+			return -EINVAL;
+		}
+		if (pst[j].fid < lastfid)
+			lastfid = pst[j].fid;
+	}
+	if (lastfid & 1) {
+		printk(KERN_ERR PFX "lastfid invalid\n");
+		return -EINVAL;
+	}
+	if (lastfid > LO_FID_TABLE_TOP)
+		printk(KERN_INFO PFX  "first fid not from lo freq table\n");
+
+	return 0;
+}
+
+static void print_basics(struct powernow_k8_data *data)
+{
+	int j;
+	for (j = 0; j < data->numps; j++) {
+		if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID)
+			printk(KERN_INFO PFX "   %d : fid 0x%x (%d MHz), vid 0x%x (%d mV)\n", j,
+				data->powernow_table[j].index & 0xff,
+				data->powernow_table[j].frequency/1000,
+				data->powernow_table[j].index >> 8,
+				find_millivolts_from_vid(data, data->powernow_table[j].index >> 8));
+	}
+	if (data->batps)
+		printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
+}
+
+static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+{
+	struct cpufreq_frequency_table *powernow_table;
+	unsigned int j;
+
+	if (data->batps) {    /* use ACPI support to get full speed on mains power */
+		printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps);
+		data->numps = data->batps;
+	}
+
+	for ( j=1; j<data->numps; j++ ) {
+		if (pst[j-1].fid >= pst[j].fid) {
+			printk(KERN_ERR PFX "PST out of sequence\n");
+			return -EINVAL;
+		}
+	}
+
+	if (data->numps < 2) {
+		printk(KERN_ERR PFX "no p states to transition\n");
+		return -ENODEV;
+	}
+
+	if (check_pst_table(data, pst, maxvid))
+		return -EINVAL;
+
+	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
+		* (data->numps + 1)), GFP_KERNEL);
+	if (!powernow_table) {
+		printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
+		return -ENOMEM;
+	}
+
+	for (j = 0; j < data->numps; j++) {
+		powernow_table[j].index = pst[j].fid; /* lower 8 bits */
+		powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
+		powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid);
+	}
+	powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
+	powernow_table[data->numps].index = 0;
+
+	if (query_current_values_with_pending_wait(data)) {
+		kfree(powernow_table);
+		return -EIO;
+	}
+
+	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+	data->powernow_table = powernow_table;
+	print_basics(data);
+
+	for (j = 0; j < data->numps; j++)
+		if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid))
+			return 0;
+
+	dprintk("currfid/vid do not match PST, ignoring\n");
+	return 0;
+}
+
+/* Find and validate the PSB/PST table in BIOS. */
+static int find_psb_table(struct powernow_k8_data *data)
+{
+	struct psb_s *psb;
+	unsigned int i;
+	u32 mvs;
+	u8 maxvid;
+	u32 cpst = 0;
+	u32 thiscpuid;
+
+	for (i = 0xc0000; i < 0xffff0; i += 0x10) {
+		/* Scan BIOS looking for the signature. */
+		/* It can not be at ffff0 - it is too big. */
+
+		psb = phys_to_virt(i);
+		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
+			continue;
+
+		dprintk("found PSB header at 0x%p\n", psb);
+
+		dprintk("table vers: 0x%x\n", psb->tableversion);
+		if (psb->tableversion != PSB_VERSION_1_4) {
+			printk(KERN_INFO BFX "PSB table is not v1.4\n");
+			return -ENODEV;
+		}
+
+		dprintk("flags: 0x%x\n", psb->flags1);
+		if (psb->flags1) {
+			printk(KERN_ERR BFX "unknown flags\n");
+			return -ENODEV;
+		}
+
+		data->vstable = psb->vstable;
+		dprintk("voltage stabilization time: %d(*20us)\n", data->vstable);
+
+		dprintk("flags2: 0x%x\n", psb->flags2);
+		data->rvo = psb->flags2 & 3;
+		data->irt = ((psb->flags2) >> 2) & 3;
+		mvs = ((psb->flags2) >> 4) & 3;
+		data->vidmvs = 1 << mvs;
+		data->batps = ((psb->flags2) >> 6) & 3;
+
+		dprintk("ramp voltage offset: %d\n", data->rvo);
+		dprintk("isochronous relief time: %d\n", data->irt);
+		dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
+
+		dprintk("numpst: 0x%x\n", psb->num_tables);
+		cpst = psb->num_tables;
+		if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){
+			thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+			if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) {
+				cpst = 1;
+			}
+		}
+		if (cpst != 1) {
+			printk(KERN_ERR BFX "numpst must be 1\n");
+			return -ENODEV;
+		}
+
+		data->plllock = psb->plllocktime;
+		dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
+		dprintk("maxfid: 0x%x\n", psb->maxfid);
+		dprintk("maxvid: 0x%x\n", psb->maxvid);
+		maxvid = psb->maxvid;
+
+		data->numps = psb->numps;
+		dprintk("numpstates: 0x%x\n", data->numps);
+		return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid);
+	}
+	/*
+	 * If you see this message, complain to BIOS manufacturer. If
+	 * he tells you "we do not support Linux" or some similar
+	 * nonsense, remember that Windows 2000 uses the same legacy
+	 * mechanism that the old Linux PSB driver uses. Tell them it
+	 * is broken with Windows 2000.
+	 *
+	 * The reference to the AMD documentation is chapter 9 in the
+	 * BIOS and Kernel Developer's Guide, which is available on
+	 * www.amd.com
+	 */
+	printk(KERN_ERR PFX "BIOS error - no PSB\n");
+	return -ENODEV;
+}
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
+{
+	if (!data->acpi_data.state_count)
+		return;
+
+	data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
+	data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
+	data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
+	data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
+	data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
+}
+
+static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
+{
+	int i;
+	int cntlofreq = 0;
+	struct cpufreq_frequency_table *powernow_table;
+
+	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
+		dprintk("register performance failed\n");
+		return -EIO;
+	}
+
+	/* verify the data contained in the ACPI structures */
+	if (data->acpi_data.state_count <= 1) {
+		dprintk("No ACPI P-States\n");
+		goto err_out;
+	}
+
+	if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+		(data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+		dprintk("Invalid control/status registers (%x - %x)\n",
+			data->acpi_data.control_register.space_id,
+			data->acpi_data.status_register.space_id);
+		goto err_out;
+	}
+
+	/* fill in data->powernow_table */
+	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
+		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
+	if (!powernow_table) {
+		dprintk("powernow_table memory alloc failure\n");
+		goto err_out;
+	}
+
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		u32 fid = data->acpi_data.states[i].control & FID_MASK;
+		u32 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+
+		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
+
+		powernow_table[i].index = fid; /* lower 8 bits */
+		powernow_table[i].index |= (vid << 8); /* upper 8 bits */
+		powernow_table[i].frequency = find_khz_freq_from_fid(fid);
+
+		/* verify frequency is OK */
+		if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) ||
+			(powernow_table[i].frequency < (MIN_FREQ * 1000))) {
+			dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency);
+			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+
+		/* verify voltage is OK - BIOSs are using "off" to indicate invalid */
+		if (vid == 0x1f) {
+			dprintk("invalid vid %u, ignoring\n", vid);
+			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+
+ 		if (fid < HI_FID_TABLE_BOTTOM) {
+ 			if (cntlofreq) {
+ 				/* if both entries are the same, ignore this
+ 				 * one... 
+ 				 */
+ 				if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
+ 				    (powernow_table[i].index != powernow_table[cntlofreq].index)) {
+ 					printk(KERN_ERR PFX "Too many lo freq table entries\n");
+ 					goto err_out_mem;
+ 				}
+				
+ 				dprintk("double low frequency table entry, ignoring it.\n");
+ 				powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ 				continue;
+ 			} else
+ 				cntlofreq = i;
+		}
+
+		if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
+			printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
+				powernow_table[i].frequency,
+				(unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
+			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+	}
+
+	powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+	powernow_table[data->acpi_data.state_count].index = 0;
+	data->powernow_table = powernow_table;
+
+	/* fill in data */
+	data->numps = data->acpi_data.state_count;
+	print_basics(data);
+	powernow_k8_acpi_pst_values(data, 0);
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	return 0;
+
+err_out_mem:
+	kfree(powernow_table);
+
+err_out:
+	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+
+	/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
+	data->acpi_data.state_count = 0;
+
+	return -ENODEV;
+}
+
+static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
+{
+	if (data->acpi_data.state_count)
+		acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+}
+
+#else
+static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
+static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
+#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
+
+/* Take a frequency, and issue the fid/vid transition command */
+static int transition_frequency(struct powernow_k8_data *data, unsigned int index)
+{
+	u32 fid;
+	u32 vid;
+	int res;
+	struct cpufreq_freqs freqs;
+
+	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+	/* fid are the lower 8 bits of the index we stored into
+	 * the cpufreq frequency table in find_psb_table, vid are 
+	 * the upper 8 bits.
+	 */
+
+	fid = data->powernow_table[index].index & 0xFF;
+	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
+
+	dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if ((data->currvid == vid) && (data->currfid == fid)) {
+		dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
+			fid, vid);
+		return 0;
+	}
+
+	if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+		printk("ignoring illegal change in lo freq table-%x to 0x%x\n",
+		       data->currfid, fid);
+		return 1;
+	}
+
+	dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
+		smp_processor_id(), fid, vid);
+
+	freqs.cpu = data->cpu;
+
+	freqs.old = find_khz_freq_from_fid(data->currfid);
+	freqs.new = find_khz_freq_from_fid(fid);
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	down(&fidvid_sem);
+	res = transition_fid_vid(data, fid, vid);
+	up(&fidvid_sem);
+
+	freqs.new = find_khz_freq_from_fid(data->currfid);
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return res;
+}
+
+/* Driver entry point to switch to the target frequency */
+static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
+{
+	cpumask_t oldmask = CPU_MASK_ALL;
+	struct powernow_k8_data *data = powernow_data[pol->cpu];
+	u32 checkfid = data->currfid;
+	u32 checkvid = data->currvid;
+	unsigned int newstate;
+	int ret = -EIO;
+
+	/* only run on specific CPU from here on */
+	oldmask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
+	schedule();
+
+	if (smp_processor_id() != pol->cpu) {
+		printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
+		goto err_out;
+	}
+
+	if (pending_bit_stuck()) {
+		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
+		goto err_out;
+	}
+
+	dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+		pol->cpu, targfreq, pol->min, pol->max, relation);
+
+	if (query_current_values_with_pending_wait(data)) {
+		ret = -EIO;
+		goto err_out;
+	}
+
+	dprintk("targ: curr fid 0x%x, vid 0x%x\n",
+		data->currfid, data->currvid);
+
+	if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+		printk(KERN_ERR PFX
+		       "error - out of sync, fid 0x%x 0x%x, vid 0x%x 0x%x\n",
+		       checkfid, data->currfid, checkvid, data->currvid);
+	}
+
+	if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
+		goto err_out;
+
+	powernow_k8_acpi_pst_values(data, newstate);
+
+	if (transition_frequency(data, newstate)) {
+		printk(KERN_ERR PFX "transition frequency failed\n");
+		ret = 1;
+		goto err_out;
+	}
+
+	pol->cur = find_khz_freq_from_fid(data->currfid);
+	ret = 0;
+
+err_out:
+	set_cpus_allowed(current, oldmask);
+	schedule();
+
+	return ret;
+}
+
+/* Driver entry point to verify the policy and range of frequencies */
+static int powernowk8_verify(struct cpufreq_policy *pol)
+{
+	struct powernow_k8_data *data = powernow_data[pol->cpu];
+
+	return cpufreq_frequency_table_verify(pol, data->powernow_table);
+}
+
+/* per CPU init entry point to the driver */
+static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
+{
+	struct powernow_k8_data *data;
+	cpumask_t oldmask = CPU_MASK_ALL;
+	int rc;
+
+	if (!check_supported_cpu(pol->cpu))
+		return -ENODEV;
+
+	data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
+	if (!data) {
+		printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
+		return -ENOMEM;
+	}
+	memset(data,0,sizeof(struct powernow_k8_data));
+
+	data->cpu = pol->cpu;
+
+	if (powernow_k8_cpu_init_acpi(data)) {
+		/*
+		 * Use the PSB BIOS structure. This is only availabe on
+		 * an UP version, and is deprecated by AMD.
+		 */
+
+		if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) {
+			printk(KERN_INFO PFX "MP systems not supported by PSB BIOS structure\n");
+			kfree(data);
+			return -ENODEV;
+		}
+		if (pol->cpu != 0) {
+			printk(KERN_ERR PFX "init not cpu 0\n");
+			kfree(data);
+			return -ENODEV;
+		}
+		rc = find_psb_table(data);
+		if (rc) {
+			kfree(data);
+			return -ENODEV;
+		}
+	}
+
+	/* only run on specific CPU from here on */
+	oldmask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
+	schedule();
+
+	if (smp_processor_id() != pol->cpu) {
+		printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
+		goto err_out;
+	}
+
+	if (pending_bit_stuck()) {
+		printk(KERN_ERR PFX "failing init, change pending bit set\n");
+		goto err_out;
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		goto err_out;
+
+	fidvid_msr_init();
+
+	/* run on any CPU again */
+	set_cpus_allowed(current, oldmask);
+	schedule();
+
+	pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
+
+	/* Take a crude guess here. 
+	 * That guess was in microseconds, so multiply with 1000 */
+	pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
+	    + (3 * (1 << data->irt) * 10)) * 1000;
+
+	pol->cur = find_khz_freq_from_fid(data->currfid);
+	dprintk("policy current frequency %d kHz\n", pol->cur);
+
+	/* min/max the cpu is capable of */
+	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
+		printk(KERN_ERR PFX "invalid powernow_table\n");
+		powernow_k8_cpu_exit_acpi(data);
+		kfree(data->powernow_table);
+		kfree(data);
+		return -EINVAL;
+	}
+
+	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
+
+	printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+	       data->currfid, data->currvid);
+
+	powernow_data[pol->cpu] = data;
+
+	return 0;
+
+err_out:
+	set_cpus_allowed(current, oldmask);
+	schedule();
+	powernow_k8_cpu_exit_acpi(data);
+
+	kfree(data);
+	return -ENODEV;
+}
+
+static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
+{
+	struct powernow_k8_data *data = powernow_data[pol->cpu];
+
+	if (!data)
+		return -EINVAL;
+
+	powernow_k8_cpu_exit_acpi(data);
+
+	cpufreq_frequency_table_put_attr(pol->cpu);
+
+	kfree(data->powernow_table);
+	kfree(data);
+
+	return 0;
+}
+
+static unsigned int powernowk8_get (unsigned int cpu)
+{
+	struct powernow_k8_data *data = powernow_data[cpu];
+	cpumask_t oldmask = current->cpus_allowed;
+	unsigned int khz = 0;
+
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (smp_processor_id() != cpu) {
+		printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu);
+		set_cpus_allowed(current, oldmask);
+		return 0;
+	}
+	preempt_disable();
+
+	if (query_current_values_with_pending_wait(data))
+		goto out;
+
+	khz = find_khz_freq_from_fid(data->currfid);
+
+ out:
+	preempt_enable_no_resched();
+	set_cpus_allowed(current, oldmask);
+
+	return khz;
+}
+
+static struct freq_attr* powernow_k8_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver cpufreq_amd64_driver = {
+	.verify = powernowk8_verify,
+	.target = powernowk8_target,
+	.init = powernowk8_cpu_init,
+	.exit = __devexit_p(powernowk8_cpu_exit),
+	.get = powernowk8_get,
+	.name = "powernow-k8",
+	.owner = THIS_MODULE,
+	.attr = powernow_k8_attr,
+};
+
+/* driver entry point for init */
+static int __init powernowk8_init(void)
+{
+	unsigned int i, supported_cpus = 0;
+
+	for (i=0; i<NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		if (check_supported_cpu(i))
+			supported_cpus++;
+	}
+
+	if (supported_cpus == num_online_cpus()) {
+		printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron processors (" VERSION ")\n",
+			supported_cpus);
+		return cpufreq_register_driver(&cpufreq_amd64_driver);
+	}
+
+	return -ENODEV;
+}
+
+/* driver entry point for term */
+static void __exit powernowk8_exit(void)
+{
+	dprintk("exit\n");
+
+	cpufreq_unregister_driver(&cpufreq_amd64_driver);
+}
+
+MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com>");
+MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
+MODULE_LICENSE("GPL");
+
+late_initcall(powernowk8_init);
+module_exit(powernowk8_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
new file mode 100644
index 00000000000..63ebc8470f5
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
@@ -0,0 +1,176 @@
+/*
+ *  (c) 2003, 2004 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ */
+
+struct powernow_k8_data {
+	unsigned int cpu;
+
+	u32 numps;  /* number of p-states */
+	u32 batps;  /* number of p-states supported on battery */
+
+	/* these values are constant when the PSB is used to determine
+	 * vid/fid pairings, but are modified during the ->target() call
+	 * when ACPI is used */
+	u32 rvo;     /* ramp voltage offset */
+	u32 irt;     /* isochronous relief time */
+	u32 vidmvs;  /* usable value calculated from mvs */
+	u32 vstable; /* voltage stabilization time, units 20 us */
+	u32 plllock; /* pll lock time, units 1 us */
+
+	/* keep track of the current fid / vid */
+	u32 currvid, currfid;
+
+	/* the powernow_table includes all frequency and vid/fid pairings:
+	 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
+	 * frequency is in kHz */
+	struct cpufreq_frequency_table  *powernow_table;
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+	/* the acpi table needs to be kept. it's only available if ACPI was
+	 * used to determine valid frequency/vid/fid states */
+	struct acpi_processor_performance acpi_data;
+#endif
+};
+
+
+/* processor's cpuid instruction support */
+#define CPUID_PROCESSOR_SIGNATURE	1	/* function 1 */
+#define CPUID_XFAM			0x0ff00000	/* extended family */
+#define CPUID_XFAM_K8			0
+#define CPUID_XMOD			0x000f0000	/* extended model */
+#define CPUID_XMOD_REV_E		0x00020000
+#define CPUID_USE_XFAM_XMOD		0x00000f00
+#define CPUID_GET_MAX_CAPABILITIES	0x80000000
+#define CPUID_FREQ_VOLT_CAPABILITIES	0x80000007
+#define P_STATE_TRANSITION_CAPABLE	6
+
+/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For     */
+/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and   */
+/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
+/* the register number is placed in ecx, and the data is returned in edx:eax. */
+
+#define MSR_FIDVID_CTL      0xc0010041
+#define MSR_FIDVID_STATUS   0xc0010042
+
+/* Field definitions within the FID VID Low Control MSR : */
+#define MSR_C_LO_INIT_FID_VID     0x00010000
+#define MSR_C_LO_NEW_VID          0x00001f00
+#define MSR_C_LO_NEW_FID          0x0000002f
+#define MSR_C_LO_VID_SHIFT        8
+
+/* Field definitions within the FID VID High Control MSR : */
+#define MSR_C_HI_STP_GNT_TO       0x000fffff
+
+/* Field definitions within the FID VID Low Status MSR : */
+#define MSR_S_LO_CHANGE_PENDING   0x80000000	/* cleared when completed */
+#define MSR_S_LO_MAX_RAMP_VID     0x1f000000
+#define MSR_S_LO_MAX_FID          0x003f0000
+#define MSR_S_LO_START_FID        0x00003f00
+#define MSR_S_LO_CURRENT_FID      0x0000003f
+
+/* Field definitions within the FID VID High Status MSR : */
+#define MSR_S_HI_MAX_WORKING_VID  0x001f0000
+#define MSR_S_HI_START_VID        0x00001f00
+#define MSR_S_HI_CURRENT_VID      0x0000001f
+#define MSR_C_HI_STP_GNT_BENIGN   0x00000001
+
+/*
+ * There are restrictions frequencies have to follow:
+ * - only 1 entry in the low fid table ( <=1.4GHz )
+ * - lowest entry in the high fid table must be >= 2 * the entry in the
+ *   low fid table
+ * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
+ *   in the low fid table
+ * - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid
+ * - lowest frequency must be >= interprocessor hypertransport link speed
+ *   (only applies to MP systems obviously)
+ */
+
+/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
+#define LO_FID_TABLE_TOP     6	/* fid values marking the boundary    */
+#define HI_FID_TABLE_BOTTOM  8	/* between the low and high tables    */
+
+#define LO_VCOFREQ_TABLE_TOP    1400	/* corresponding vco frequency values */
+#define HI_VCOFREQ_TABLE_BOTTOM 1600
+
+#define MIN_FREQ_RESOLUTION  200 /* fids jump by 2 matching freq jumps by 200 */
+
+#define MAX_FID 0x2a	/* Spec only gives FID values as far as 5 GHz */
+#define LEAST_VID 0x1e	/* Lowest (numerically highest) useful vid value */
+
+#define MIN_FREQ 800	/* Min and max freqs, per spec */
+#define MAX_FREQ 5000
+
+#define INVALID_FID_MASK 0xffffffc1  /* not a valid fid if these bits are set */
+#define INVALID_VID_MASK 0xffffffe0  /* not a valid vid if these bits are set */
+
+#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
+
+#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
+
+#define MAXIMUM_VID_STEPS 1  /* Current cpus only allow a single step of 25mV */
+#define VST_UNITS_20US 20   /* Voltage Stabalization Time is in units of 20us */
+
+/*
+ * Most values of interest are enocoded in a single field of the _PSS
+ * entries: the "control" value.
+ */
+                                                                                                    
+#define IRT_SHIFT      30
+#define RVO_SHIFT      28
+#define PLL_L_SHIFT    20
+#define MVS_SHIFT      18
+#define VST_SHIFT      11
+#define VID_SHIFT       6
+#define IRT_MASK        3
+#define RVO_MASK        3
+#define PLL_L_MASK   0x7f
+#define MVS_MASK        3
+#define VST_MASK     0x7f
+#define VID_MASK     0x1f
+#define FID_MASK     0x3f
+
+
+/*
+ * Version 1.4 of the PSB table. This table is constructed by BIOS and is
+ * to tell the OS's power management driver which VIDs and FIDs are
+ * supported by this particular processor.
+ * If the data in the PSB / PST is wrong, then this driver will program the
+ * wrong values into hardware, which is very likely to lead to a crash.
+ */
+
+#define PSB_ID_STRING      "AMDK7PNOW!"
+#define PSB_ID_STRING_LEN  10
+
+#define PSB_VERSION_1_4  0x14
+
+struct psb_s {
+	u8 signature[10];
+	u8 tableversion;
+	u8 flags1;
+	u16 vstable;
+	u8 flags2;
+	u8 num_tables;
+	u32 cpuid;
+	u8 plllocktime;
+	u8 maxfid;
+	u8 maxvid;
+	u8 numps;
+};
+
+/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
+struct pst_s {
+	u8 fid;
+	u8 vid;
+};
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
+
+static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid);
+static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
+static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
+
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
new file mode 100644
index 00000000000..07d5612dc00
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -0,0 +1,715 @@
+/*
+ * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
+ * M (part of the Centrino chipset).
+ *
+ * Despite the "SpeedStep" in the name, this is almost entirely unlike
+ * traditional SpeedStep.
+ *
+ * Modelled on speedstep.c
+ *
+ * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
+ *
+ * WARNING WARNING WARNING
+ *
+ * This driver manipulates the PERF_CTL MSR, which is only somewhat
+ * documented.  While it seems to work on my laptop, it has not been
+ * tested anywhere else, and it may not work for you, do strange
+ * things or simply crash.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/compiler.h>
+
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+#endif
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+
+#include "speedstep-est-common.h"
+
+#define PFX		"speedstep-centrino: "
+#define MAINTAINER	"Jeremy Fitzhardinge <jeremy@goop.org>"
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
+
+
+struct cpu_id
+{
+	__u8	x86;            /* CPU family */
+	__u8	x86_model;	/* model */
+	__u8	x86_mask;	/* stepping */
+};
+
+enum {
+	CPU_BANIAS,
+	CPU_DOTHAN_A1,
+	CPU_DOTHAN_A2,
+	CPU_DOTHAN_B0,
+};
+
+static const struct cpu_id cpu_ids[] = {
+	[CPU_BANIAS]	= { 6,  9, 5 },
+	[CPU_DOTHAN_A1]	= { 6, 13, 1 },
+	[CPU_DOTHAN_A2]	= { 6, 13, 2 },
+	[CPU_DOTHAN_B0]	= { 6, 13, 6 },
+};
+#define N_IDS	(sizeof(cpu_ids)/sizeof(cpu_ids[0]))
+
+struct cpu_model
+{
+	const struct cpu_id *cpu_id;
+	const char	*model_name;
+	unsigned	max_freq; /* max clock in kHz */
+
+	struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
+};
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x);
+
+/* Operating points for current CPU */
+static struct cpu_model *centrino_model[NR_CPUS];
+static const struct cpu_id *centrino_cpu[NR_CPUS];
+
+static struct cpufreq_driver centrino_driver;
+
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
+
+/* Computes the correct form for IA32_PERF_CTL MSR for a particular
+   frequency/voltage operating point; frequency in MHz, volts in mV.
+   This is stored as "index" in the structure. */
+#define OP(mhz, mv)							\
+	{								\
+		.frequency = (mhz) * 1000,				\
+		.index = (((mhz)/100) << 8) | ((mv - 700) / 16)		\
+	}
+
+/*
+ * These voltage tables were derived from the Intel Pentium M
+ * datasheet, document 25261202.pdf, Table 5.  I have verified they
+ * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
+ * M.
+ */
+
+/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
+static struct cpufreq_frequency_table banias_900[] =
+{
+	OP(600,  844),
+	OP(800,  988),
+	OP(900, 1004),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
+static struct cpufreq_frequency_table banias_1000[] =
+{
+	OP(600,   844),
+	OP(800,   972),
+	OP(900,   988),
+	OP(1000, 1004),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
+static struct cpufreq_frequency_table banias_1100[] =
+{
+	OP( 600,  956),
+	OP( 800, 1020),
+	OP( 900, 1100),
+	OP(1000, 1164),
+	OP(1100, 1180),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+
+/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
+static struct cpufreq_frequency_table banias_1200[] =
+{
+	OP( 600,  956),
+	OP( 800, 1004),
+	OP( 900, 1020),
+	OP(1000, 1100),
+	OP(1100, 1164),
+	OP(1200, 1180),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.30GHz (Banias) */
+static struct cpufreq_frequency_table banias_1300[] =
+{
+	OP( 600,  956),
+	OP( 800, 1260),
+	OP(1000, 1292),
+	OP(1200, 1356),
+	OP(1300, 1388),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.40GHz (Banias) */
+static struct cpufreq_frequency_table banias_1400[] =
+{
+	OP( 600,  956),
+	OP( 800, 1180),
+	OP(1000, 1308),
+	OP(1200, 1436),
+	OP(1400, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.50GHz (Banias) */
+static struct cpufreq_frequency_table banias_1500[] =
+{
+	OP( 600,  956),
+	OP( 800, 1116),
+	OP(1000, 1228),
+	OP(1200, 1356),
+	OP(1400, 1452),
+	OP(1500, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.60GHz (Banias) */
+static struct cpufreq_frequency_table banias_1600[] =
+{
+	OP( 600,  956),
+	OP( 800, 1036),
+	OP(1000, 1164),
+	OP(1200, 1276),
+	OP(1400, 1420),
+	OP(1600, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.70GHz (Banias) */
+static struct cpufreq_frequency_table banias_1700[] =
+{
+	OP( 600,  956),
+	OP( 800, 1004),
+	OP(1000, 1116),
+	OP(1200, 1228),
+	OP(1400, 1308),
+	OP(1700, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+#undef OP
+
+#define _BANIAS(cpuid, max, name)	\
+{	.cpu_id		= cpuid,	\
+	.model_name	= "Intel(R) Pentium(R) M processor " name "MHz", \
+	.max_freq	= (max)*1000,	\
+	.op_points	= banias_##max,	\
+}
+#define BANIAS(max)	_BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
+
+/* CPU models, their operating frequency range, and freq/voltage
+   operating points */
+static struct cpu_model models[] =
+{
+	_BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
+	BANIAS(1000),
+	BANIAS(1100),
+	BANIAS(1200),
+	BANIAS(1300),
+	BANIAS(1400),
+	BANIAS(1500),
+	BANIAS(1600),
+	BANIAS(1700),
+
+	/* NULL model_name is a wildcard */
+	{ &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
+	{ &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
+	{ &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
+
+	{ NULL, }
+};
+#undef _BANIAS
+#undef BANIAS
+
+static int centrino_cpu_init_table(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+	struct cpu_model *model;
+
+	for(model = models; model->cpu_id != NULL; model++)
+		if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
+		    (model->model_name == NULL ||
+		     strcmp(cpu->x86_model_id, model->model_name) == 0))
+			break;
+
+	if (model->cpu_id == NULL) {
+		/* No match at all */
+		dprintk(KERN_INFO PFX "no support for CPU model \"%s\": "
+		       "send /proc/cpuinfo to " MAINTAINER "\n",
+		       cpu->x86_model_id);
+		return -ENOENT;
+	}
+
+	if (model->op_points == NULL) {
+		/* Matched a non-match */
+		dprintk(KERN_INFO PFX "no table support for CPU model \"%s\": \n",
+		       cpu->x86_model_id);
+#ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
+		dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
+#endif
+		return -ENOENT;
+	}
+
+	centrino_model[policy->cpu] = model;
+
+	dprintk("found \"%s\": max frequency: %dkHz\n",
+	       model->model_name, model->max_freq);
+
+	return 0;
+}
+
+#else
+static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; }
+#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
+
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x)
+{
+	if ((c->x86 == x->x86) &&
+	    (c->x86_model == x->x86_model) &&
+	    (c->x86_mask == x->x86_mask))
+		return 1;
+	return 0;
+}
+
+/* To be called only after centrino_model is initialized */
+static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
+{
+	int i;
+
+	/*
+	 * Extract clock in kHz from PERF_CTL value
+	 * for centrino, as some DSDTs are buggy.
+	 * Ideally, this can be done using the acpi_data structure.
+	 */
+	if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) ||
+	    (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) ||
+	    (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) {
+		msr = (msr >> 8) & 0xff;
+		return msr * 100000;
+	}
+
+	if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points))
+		return 0;
+
+	msr &= 0xffff;
+	for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) {
+		if (msr == centrino_model[cpu]->op_points[i].index)
+			return centrino_model[cpu]->op_points[i].frequency;
+	}
+	if (failsafe)
+		return centrino_model[cpu]->op_points[i-1].frequency;
+	else
+		return 0;
+}
+
+/* Return the current CPU frequency in kHz */
+static unsigned int get_cur_freq(unsigned int cpu)
+{
+	unsigned l, h;
+	unsigned clock_freq;
+	cpumask_t saved_mask;
+
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (smp_processor_id() != cpu)
+		return 0;
+
+	rdmsr(MSR_IA32_PERF_STATUS, l, h);
+	clock_freq = extract_clock(l, cpu, 0);
+
+	if (unlikely(clock_freq == 0)) {
+		/*
+		 * On some CPUs, we can see transient MSR values (which are
+		 * not present in _PSS), while CPU is doing some automatic
+		 * P-state transition (like TM2). Get the last freq set 
+		 * in PERF_CTL.
+		 */
+		rdmsr(MSR_IA32_PERF_CTL, l, h);
+		clock_freq = extract_clock(l, cpu, 1);
+	}
+
+	set_cpus_allowed(current, saved_mask);
+	return clock_freq;
+}
+
+
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
+
+static struct acpi_processor_performance p;
+
+/*
+ * centrino_cpu_init_acpi - register with ACPI P-States library
+ *
+ * Register with the ACPI P-States library (part of drivers/acpi/processor.c)
+ * in order to determine correct frequency and voltage pairings by reading
+ * the _PSS of the ACPI DSDT or SSDT tables.
+ */
+static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
+{
+	union acpi_object		arg0 = {ACPI_TYPE_BUFFER};
+	u32				arg0_buf[3];
+	struct acpi_object_list		arg_list = {1, &arg0};
+	unsigned long			cur_freq;
+	int				result = 0, i;
+	unsigned int			cpu = policy->cpu;
+
+	/* _PDC settings */
+	arg0.buffer.length = 12;
+	arg0.buffer.pointer = (u8 *) arg0_buf;
+	arg0_buf[0] = ACPI_PDC_REVISION_ID;
+	arg0_buf[1] = 1;
+	arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR;
+
+	p.pdc = &arg_list;
+
+	/* register with ACPI core */
+	if (acpi_processor_register_performance(&p, cpu)) {
+		dprintk(KERN_INFO PFX "obtaining ACPI data failed\n");
+		return -EIO;
+	}
+
+	/* verify the acpi_data */
+	if (p.state_count <= 1) {
+		dprintk("No P-States\n");
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	if ((p.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+	    (p.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+		dprintk("Invalid control/status registers (%x - %x)\n",
+			p.control_register.space_id, p.status_register.space_id);
+		result = -EIO;
+		goto err_unreg;
+	}
+
+	for (i=0; i<p.state_count; i++) {
+		if (p.states[i].control != p.states[i].status) {
+			dprintk("Different control (%x) and status values (%x)\n",
+				p.states[i].control, p.states[i].status);
+			result = -EINVAL;
+			goto err_unreg;
+		}
+
+		if (!p.states[i].core_frequency) {
+			dprintk("Zero core frequency for state %u\n", i);
+			result = -EINVAL;
+			goto err_unreg;
+		}
+
+		if (p.states[i].core_frequency > p.states[0].core_frequency) {
+			dprintk("P%u has larger frequency (%u) than P0 (%u), skipping\n", i,
+				p.states[i].core_frequency, p.states[0].core_frequency);
+			p.states[i].core_frequency = 0;
+			continue;
+		}
+	}
+
+	centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL);
+	if (!centrino_model[cpu]) {
+		result = -ENOMEM;
+		goto err_unreg;
+	}
+	memset(centrino_model[cpu], 0, sizeof(struct cpu_model));
+
+	centrino_model[cpu]->model_name=NULL;
+	centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000;
+	centrino_model[cpu]->op_points =  kmalloc(sizeof(struct cpufreq_frequency_table) *
+					     (p.state_count + 1), GFP_KERNEL);
+        if (!centrino_model[cpu]->op_points) {
+                result = -ENOMEM;
+                goto err_kfree;
+        }
+
+        for (i=0; i<p.state_count; i++) {
+		centrino_model[cpu]->op_points[i].index = p.states[i].control;
+		centrino_model[cpu]->op_points[i].frequency = p.states[i].core_frequency * 1000;
+		dprintk("adding state %i with frequency %u and control value %04x\n", 
+			i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index);
+	}
+	centrino_model[cpu]->op_points[p.state_count].frequency = CPUFREQ_TABLE_END;
+
+	cur_freq = get_cur_freq(cpu);
+
+	for (i=0; i<p.state_count; i++) {
+		if (!p.states[i].core_frequency) {
+			dprintk("skipping state %u\n", i);
+			centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+		
+		if (extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0) !=
+		    (centrino_model[cpu]->op_points[i].frequency)) {
+			dprintk("Invalid encoded frequency (%u vs. %u)\n",
+				extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0),
+				centrino_model[cpu]->op_points[i].frequency);
+			result = -EINVAL;
+			goto err_kfree_all;
+		}
+
+		if (cur_freq == centrino_model[cpu]->op_points[i].frequency)
+			p.state = i;
+	}
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	return 0;
+
+ err_kfree_all:
+	kfree(centrino_model[cpu]->op_points);
+ err_kfree:
+	kfree(centrino_model[cpu]);
+ err_unreg:
+	acpi_processor_unregister_performance(&p, cpu);
+	dprintk(KERN_INFO PFX "invalid ACPI data\n");
+	return (result);
+}
+#else
+static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; }
+#endif
+
+static int centrino_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+	unsigned freq;
+	unsigned l, h;
+	int ret;
+	int i;
+
+	/* Only Intel makes Enhanced Speedstep-capable CPUs */
+	if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST))
+		return -ENODEV;
+
+	for (i = 0; i < N_IDS; i++)
+		if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
+			break;
+
+	if (i != N_IDS)
+		centrino_cpu[policy->cpu] = &cpu_ids[i];
+
+	if (is_const_loops_cpu(policy->cpu)) {
+		centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
+	}
+
+	if (centrino_cpu_init_acpi(policy)) {
+		if (policy->cpu != 0)
+			return -ENODEV;
+
+		if (!centrino_cpu[policy->cpu]) {
+			dprintk(KERN_INFO PFX "found unsupported CPU with "
+			"Enhanced SpeedStep: send /proc/cpuinfo to "
+			MAINTAINER "\n");
+			return -ENODEV;
+		}
+
+		if (centrino_cpu_init_table(policy)) {
+			return -ENODEV;
+		}
+	}
+
+	/* Check to see if Enhanced SpeedStep is enabled, and try to
+	   enable it if not. */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+	if (!(l & (1<<16))) {
+		l |= (1<<16);
+		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
+		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+		/* check to see if it stuck */
+		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+		if (!(l & (1<<16))) {
+			printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n");
+			return -ENODEV;
+		}
+	}
+
+	freq = get_cur_freq(policy->cpu);
+
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */
+	policy->cur = freq;
+
+	dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
+
+	ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points);
+	if (ret)
+		return (ret);
+
+	cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu);
+
+	return 0;
+}
+
+static int centrino_cpu_exit(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+
+	if (!centrino_model[cpu])
+		return -ENODEV;
+
+	cpufreq_frequency_table_put_attr(cpu);
+
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
+	if (!centrino_model[cpu]->model_name) {
+		dprintk("unregistering and freeing ACPI data\n");
+		acpi_processor_unregister_performance(&p, cpu);
+		kfree(centrino_model[cpu]->op_points);
+		kfree(centrino_model[cpu]);
+	}
+#endif
+
+	centrino_model[cpu] = NULL;
+
+	return 0;
+}
+
+/**
+ * centrino_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within this model's frequency range at least one
+ * border included.
+ */
+static int centrino_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points);
+}
+
+/**
+ * centrino_setpolicy - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int centrino_target (struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int    newstate = 0;
+	unsigned int	msr, oldmsr, h, cpu = policy->cpu;
+	struct cpufreq_freqs	freqs;
+	cpumask_t		saved_mask;
+	int			retval;
+
+	if (centrino_model[cpu] == NULL)
+		return -ENODEV;
+
+	/*
+	 * Support for SMP systems.
+	 * Make sure we are running on the CPU that wants to change frequency
+	 */
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed(current, policy->cpus);
+	if (!cpu_isset(smp_processor_id(), policy->cpus)) {
+		dprintk("couldn't limit to CPUs in this domain\n");
+		return(-EAGAIN);
+	}
+
+	if (cpufreq_frequency_table_target(policy, centrino_model[cpu]->op_points, target_freq,
+					   relation, &newstate)) {
+		retval = -EINVAL;
+		goto migrate_end;
+	}
+
+	msr = centrino_model[cpu]->op_points[newstate].index;
+	rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+
+	if (msr == (oldmsr & 0xffff)) {
+		retval = 0;
+		dprintk("no change needed - msr was and needs to be %x\n", oldmsr);
+		goto migrate_end;
+	}
+
+	freqs.cpu = cpu;
+	freqs.old = extract_clock(oldmsr, cpu, 0);
+	freqs.new = extract_clock(msr, cpu, 0);
+
+	dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
+		target_freq, freqs.old, freqs.new, msr);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* all but 16 LSB are "reserved", so treat them with
+	   care */
+	oldmsr &= ~0xffff;
+	msr &= 0xffff;
+	oldmsr |= msr;
+
+	wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	retval = 0;
+migrate_end:
+	set_cpus_allowed(current, saved_mask);
+	return (retval);
+}
+
+static struct freq_attr* centrino_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver centrino_driver = {
+	.name		= "centrino", /* should be speedstep-centrino,
+					 but there's a 16 char limit */
+	.init		= centrino_cpu_init,
+	.exit		= centrino_cpu_exit,
+	.verify		= centrino_verify,
+	.target		= centrino_target,
+	.get		= get_cur_freq,
+	.attr           = centrino_attr,
+	.owner		= THIS_MODULE,
+};
+
+
+/**
+ * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
+ *
+ * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
+ * unsupported devices, -ENOENT if there's no voltage table for this
+ * particular CPU model, -EINVAL on problems during initiatization,
+ * and zero on success.
+ *
+ * This is quite picky.  Not only does the CPU have to advertise the
+ * "est" flag in the cpuid capability flags, we look for a specific
+ * CPU model and stepping, and we need to have the exact model name in
+ * our voltage tables.  That is, be paranoid about not releasing
+ * someone's valuable magic smoke.
+ */
+static int __init centrino_init(void)
+{
+	struct cpuinfo_x86 *cpu = cpu_data;
+
+	if (!cpu_has(cpu, X86_FEATURE_EST))
+		return -ENODEV;
+
+	return cpufreq_register_driver(&centrino_driver);
+}
+
+static void __exit centrino_exit(void)
+{
+	cpufreq_unregister_driver(&centrino_driver);
+}
+
+MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
+MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
+MODULE_LICENSE ("GPL");
+
+late_initcall(centrino_init);
+module_exit(centrino_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h
new file mode 100644
index 00000000000..5ce995c9d86
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h
@@ -0,0 +1,25 @@
+/*
+ * Routines common for drivers handling Enhanced Speedstep Technology
+ *  Copyright (C) 2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2 -- see
+ *  COPYING for details.
+ */
+
+static inline int is_const_loops_cpu(unsigned int cpu)
+{
+	struct cpuinfo_x86 	*c = cpu_data + cpu;
+
+	if (c->x86_vendor != X86_VENDOR_INTEL || !cpu_has(c, X86_FEATURE_EST))
+		return 0;
+
+	/*
+	 * on P-4s, the TSC runs with constant frequency independent of cpu freq
+	 * when we use EST
+	 */
+	if (c->x86 == 0xf)
+		return 1;
+
+	return 0;
+}
+
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
new file mode 100644
index 00000000000..5b7d18a06af
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
@@ -0,0 +1,424 @@
+/*
+ * (C) 2001  Dave Jones, Arjan van de ven.
+ * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon reverse engineered information, and on Intel documentation
+ *  for chipsets ICH2-M and ICH3-M.
+ *
+ *  Many thanks to Ducrot Bruno for finding and fixing the last
+ *  "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
+ *  for extensive testing.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+
+/*********************************************************************
+ *                        SPEEDSTEP - DEFINITIONS                    *
+ *********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include "speedstep-lib.h"
+
+
+/* speedstep_chipset:
+ *   It is necessary to know which chipset is used. As accesses to
+ * this device occur at various places in this module, we need a
+ * static struct pci_dev * pointing to that device.
+ */
+static struct pci_dev *speedstep_chipset_dev;
+
+
+/* speedstep_processor
+ */
+static unsigned int speedstep_processor = 0;
+
+
+/*
+ *   There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+static struct cpufreq_frequency_table speedstep_freqs[] = {
+	{SPEEDSTEP_HIGH,	0},
+	{SPEEDSTEP_LOW,		0},
+	{0,			CPUFREQ_TABLE_END},
+};
+
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg)
+
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ *   Tries to change the SpeedStep state.
+ */
+static void speedstep_set_state (unsigned int state)
+{
+	u32 pmbase;
+	u8 pm2_blk;
+	u8 value;
+	unsigned long flags;
+
+	if (!speedstep_chipset_dev || (state > 0x1))
+		return;
+
+	/* get PMBASE */
+	pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
+	if (!(pmbase & 0x01)) {
+		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		return;
+	}
+
+	pmbase &= 0xFFFFFFFE;
+	if (!pmbase) {
+		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		return;
+	}
+
+	/* Disable IRQs */
+	local_irq_save(flags);
+
+	/* read state */
+	value = inb(pmbase + 0x50);
+
+	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+
+	/* write new state */
+	value &= 0xFE;
+	value |= state;
+
+	dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
+
+	/* Disable bus master arbitration */
+	pm2_blk = inb(pmbase + 0x20);
+	pm2_blk |= 0x01;
+	outb(pm2_blk, (pmbase + 0x20));
+
+	/* Actual transition */
+	outb(value, (pmbase + 0x50));
+
+	/* Restore bus master arbitration */
+	pm2_blk &= 0xfe;
+	outb(pm2_blk, (pmbase + 0x20));
+
+	/* check if transition was successful */
+	value = inb(pmbase + 0x50);
+
+	/* Enable IRQs */
+	local_irq_restore(flags);
+
+	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+
+	if (state == (value & 0x1)) {
+		dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000));
+	} else {
+		printk (KERN_ERR "cpufreq: change failed - I/O error\n");
+	}
+
+	return;
+}
+
+
+/**
+ * speedstep_activate - activate SpeedStep control in the chipset
+ *
+ *   Tries to activate the SpeedStep status and control registers.
+ * Returns -EINVAL on an unsupported chipset, and zero on success.
+ */
+static int speedstep_activate (void)
+{
+	u16 value = 0;
+
+	if (!speedstep_chipset_dev)
+		return -EINVAL;
+
+	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
+	if (!(value & 0x08)) {
+		value |= 0x08;
+		dprintk("activating SpeedStep (TM) registers\n");
+		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
+	}
+
+	return 0;
+}
+
+
+/**
+ * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
+ *
+ *   Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
+ * the LPC bridge / PM module which contains all power-management
+ * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
+ * chipset, or zero on failure.
+ */
+static unsigned int speedstep_detect_chipset (void)
+{
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801DB_12,
+			      PCI_ANY_ID,
+			      PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev)
+		return 4; /* 4-M */
+
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801CA_12,
+			      PCI_ANY_ID,
+			      PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev)
+		return 3; /* 3-M */
+
+
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801BA_10,
+			      PCI_ANY_ID,
+			      PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev) {
+		/* speedstep.c causes lockups on Dell Inspirons 8000 and
+		 * 8100 which use a pretty old revision of the 82815
+		 * host brige. Abort on these systems.
+		 */
+		static struct pci_dev *hostbridge;
+		u8 rev = 0;
+
+		hostbridge  = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82815_MC,
+			      PCI_ANY_ID,
+			      PCI_ANY_ID,
+			      NULL);
+
+		if (!hostbridge)
+			return 2; /* 2-M */
+
+		pci_read_config_byte(hostbridge, PCI_REVISION_ID, &rev);
+		if (rev < 5) {
+			dprintk("hostbridge does not support speedstep\n");
+			speedstep_chipset_dev = NULL;
+			pci_dev_put(hostbridge);
+			return 0;
+		}
+
+		pci_dev_put(hostbridge);
+		return 2; /* 2-M */
+	}
+
+	return 0;
+}
+
+static unsigned int _speedstep_get(cpumask_t cpus)
+{
+	unsigned int speed;
+	cpumask_t cpus_allowed;
+
+	cpus_allowed = current->cpus_allowed;
+	set_cpus_allowed(current, cpus);
+	speed = speedstep_get_processor_frequency(speedstep_processor);
+	set_cpus_allowed(current, cpus_allowed);
+	dprintk("detected %u kHz as current frequency\n", speed);
+	return speed;
+}
+
+static unsigned int speedstep_get(unsigned int cpu)
+{
+	return _speedstep_get(cpumask_of_cpu(cpu));
+}
+
+/**
+ * speedstep_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int speedstep_target (struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	unsigned int newstate = 0;
+	struct cpufreq_freqs freqs;
+	cpumask_t cpus_allowed;
+	int i;
+
+	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	freqs.old = _speedstep_get(policy->cpus);
+	freqs.new = speedstep_freqs[newstate].frequency;
+	freqs.cpu = policy->cpu;
+
+	dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
+
+	/* no transition necessary */
+	if (freqs.old == freqs.new)
+		return 0;
+
+	cpus_allowed = current->cpus_allowed;
+
+	for_each_cpu_mask(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	/* switch to physical CPU where state is to be changed */
+	set_cpus_allowed(current, policy->cpus);
+
+	speedstep_set_state(newstate);
+
+	/* allow to be run on all CPUs */
+	set_cpus_allowed(current, cpus_allowed);
+
+	for_each_cpu_mask(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	return 0;
+}
+
+
+/**
+ * speedstep_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within speedstep_low_freq and speedstep_high_freq, with
+ * at least one border included.
+ */
+static int speedstep_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
+}
+
+
+static int speedstep_cpu_init(struct cpufreq_policy *policy)
+{
+	int result = 0;
+	unsigned int speed;
+	cpumask_t cpus_allowed;
+
+	/* only run on CPU to be set, or on its sibling */
+#ifdef CONFIG_SMP
+	policy->cpus = cpu_sibling_map[policy->cpu];
+#endif
+
+	cpus_allowed = current->cpus_allowed;
+	set_cpus_allowed(current, policy->cpus);
+
+	/* detect low and high frequency */
+	result = speedstep_get_freqs(speedstep_processor,
+				     &speedstep_freqs[SPEEDSTEP_LOW].frequency,
+				     &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+				     &speedstep_set_state);
+	set_cpus_allowed(current, cpus_allowed);
+	if (result)
+		return result;
+
+	/* get current speed setting */
+	speed = _speedstep_get(policy->cpus);
+	if (!speed)
+		return -EIO;
+
+	dprintk("currently at %s speed setting - %i MHz\n",
+		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+		(speed / 1000));
+
+	/* cpuinfo and default policy values */
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = speed;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
+	if (result)
+		return (result);
+
+        cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+
+	return 0;
+}
+
+
+static int speedstep_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static struct freq_attr* speedstep_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver speedstep_driver = {
+	.name	= "speedstep-ich",
+	.verify	= speedstep_verify,
+	.target	= speedstep_target,
+	.init	= speedstep_cpu_init,
+	.exit	= speedstep_cpu_exit,
+	.get	= speedstep_get,
+	.owner	= THIS_MODULE,
+	.attr	= speedstep_attr,
+};
+
+
+/**
+ * speedstep_init - initializes the SpeedStep CPUFreq driver
+ *
+ *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
+ * devices, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init speedstep_init(void)
+{
+	/* detect processor */
+	speedstep_processor = speedstep_detect_processor();
+	if (!speedstep_processor) {
+		dprintk("Intel(R) SpeedStep(TM) capable processor not found\n");
+		return -ENODEV;
+	}
+
+	/* detect chipset */
+	if (!speedstep_detect_chipset()) {
+		dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n");
+		return -ENODEV;
+	}
+
+	/* activate speedstep support */
+	if (speedstep_activate()) {
+		pci_dev_put(speedstep_chipset_dev);
+		return -EINVAL;
+	}
+
+	return cpufreq_register_driver(&speedstep_driver);
+}
+
+
+/**
+ * speedstep_exit - unregisters SpeedStep support
+ *
+ *   Unregisters SpeedStep support.
+ */
+static void __exit speedstep_exit(void)
+{
+	pci_dev_put(speedstep_chipset_dev);
+	cpufreq_unregister_driver(&speedstep_driver);
+}
+
+
+MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
+MODULE_LICENSE ("GPL");
+
+module_init(speedstep_init);
+module_exit(speedstep_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c
new file mode 100644
index 00000000000..8ba430a9c3a
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c
@@ -0,0 +1,385 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  Library for common functions for Intel SpeedStep v.1 and v.2 support
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h> 
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include <asm/msr.h>
+#include "speedstep-lib.h"
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg)
+
+#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
+static int relaxed_check = 0;
+#else
+#define relaxed_check 0
+#endif
+
+/*********************************************************************
+ *                   GET PROCESSOR CORE SPEED IN KHZ                 *
+ *********************************************************************/
+
+static unsigned int pentium3_get_frequency (unsigned int processor)
+{
+        /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
+	struct {
+		unsigned int ratio;	/* Frequency Multiplier (x10) */
+		u8 bitmap;	        /* power on configuration bits
+					   [27, 25:22] (in MSR 0x2a) */
+	} msr_decode_mult [] = {
+		{ 30, 0x01 },
+		{ 35, 0x05 },
+		{ 40, 0x02 },
+		{ 45, 0x06 },
+		{ 50, 0x00 },
+		{ 55, 0x04 },
+		{ 60, 0x0b },
+		{ 65, 0x0f },
+		{ 70, 0x09 },
+		{ 75, 0x0d },
+		{ 80, 0x0a },
+		{ 85, 0x26 },
+		{ 90, 0x20 },
+		{ 100, 0x2b },
+		{ 0, 0xff }     /* error or unknown value */
+	};
+
+	/* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
+	struct {
+		unsigned int value;     /* Front Side Bus speed in MHz */
+		u8 bitmap;              /* power on configuration bits [18: 19]
+					   (in MSR 0x2a) */
+	} msr_decode_fsb [] = {
+		{  66, 0x0 },
+		{ 100, 0x2 },
+		{ 133, 0x1 },
+		{   0, 0xff}
+	};
+
+	u32     msr_lo, msr_tmp;
+	int     i = 0, j = 0;
+
+	/* read MSR 0x2a - we only need the low 32 bits */
+	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+	dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	msr_tmp = msr_lo;
+
+	/* decode the FSB */
+	msr_tmp &= 0x00c0000;
+	msr_tmp >>= 18;
+	while (msr_tmp != msr_decode_fsb[i].bitmap) {
+		if (msr_decode_fsb[i].bitmap == 0xff)
+			return 0;
+		i++;
+	}
+
+	/* decode the multiplier */
+	if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) {
+		dprintk("workaround for early PIIIs\n");
+		msr_lo &= 0x03c00000;
+	} else
+		msr_lo &= 0x0bc00000;
+	msr_lo >>= 22;
+	while (msr_lo != msr_decode_mult[j].bitmap) {
+		if (msr_decode_mult[j].bitmap == 0xff)
+			return 0;
+		j++;
+	}
+
+	dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
+
+	return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100);
+}
+
+
+static unsigned int pentiumM_get_frequency(void)
+{
+	u32     msr_lo, msr_tmp;
+
+	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+	dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+
+	/* see table B-2 of 24547212.pdf */
+	if (msr_lo & 0x00040000) {
+		printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp);
+		return 0;
+	}
+
+	msr_tmp = (msr_lo >> 22) & 0x1f;
+	dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000));
+
+	return (msr_tmp * 100 * 1000);
+}
+
+
+static unsigned int pentium4_get_frequency(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u32 msr_lo, msr_hi, mult;
+	unsigned int fsb = 0;
+
+	rdmsr(0x2c, msr_lo, msr_hi);
+
+	dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
+
+	/* decode the FSB: see IA-32 Intel (C) Architecture Software 
+	 * Developer's Manual, Volume 3: System Prgramming Guide,
+	 * revision #12 in Table B-1: MSRs in the Pentium 4 and
+	 * Intel Xeon Processors, on page B-4 and B-5.
+	 */
+	if (c->x86_model < 2)
+		fsb = 100 * 1000;
+	else {
+		u8 fsb_code = (msr_lo >> 16) & 0x7;
+		switch (fsb_code) {
+		case 0:
+			fsb = 100 * 1000;
+			break;
+		case 1:
+			fsb = 13333 * 10;
+			break;
+		case 2:
+			fsb = 200 * 1000;
+			break;
+		}
+	}
+
+	if (!fsb)
+		printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
+
+	/* Multiplier. */
+	if (c->x86_model < 2)
+		mult = msr_lo >> 27;
+	else
+		mult = msr_lo >> 24;
+
+	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
+
+	return (fsb * mult);
+}
+
+ 
+unsigned int speedstep_get_processor_frequency(unsigned int processor)
+{
+	switch (processor) {
+	case SPEEDSTEP_PROCESSOR_PM:
+		return pentiumM_get_frequency();
+	case SPEEDSTEP_PROCESSOR_P4D:
+	case SPEEDSTEP_PROCESSOR_P4M:
+		return pentium4_get_frequency();
+	case SPEEDSTEP_PROCESSOR_PIII_T:
+	case SPEEDSTEP_PROCESSOR_PIII_C:
+	case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
+		return pentium3_get_frequency(processor);
+	default:
+		return 0;
+	};
+	return 0;
+}
+EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency);
+
+
+/*********************************************************************
+ *                 DETECT SPEEDSTEP-CAPABLE PROCESSOR                *
+ *********************************************************************/
+
+unsigned int speedstep_detect_processor (void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	u32			ebx, msr_lo, msr_hi;
+
+	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
+
+	if ((c->x86_vendor != X86_VENDOR_INTEL) || 
+	    ((c->x86 != 6) && (c->x86 != 0xF)))
+		return 0;
+
+	if (c->x86 == 0xF) {
+		/* Intel Mobile Pentium 4-M
+		 * or Intel Mobile Pentium 4 with 533 MHz FSB */
+		if (c->x86_model != 2)
+			return 0;
+
+		ebx = cpuid_ebx(0x00000001);
+		ebx &= 0x000000FF;
+
+		dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+
+		switch (c->x86_mask) {
+		case 4: 
+			/*
+			 * B-stepping [M-P4-M] 
+			 * sample has ebx = 0x0f, production has 0x0e.
+			 */
+			if ((ebx == 0x0e) || (ebx == 0x0f))
+				return SPEEDSTEP_PROCESSOR_P4M;
+			break;
+		case 7: 
+			/*
+			 * C-stepping [M-P4-M]
+			 * needs to have ebx=0x0e, else it's a celeron:
+			 * cf. 25130917.pdf / page 7, footnote 5 even
+			 * though 25072120.pdf / page 7 doesn't say
+			 * samples are only of B-stepping...
+			 */
+			if (ebx == 0x0e)
+				return SPEEDSTEP_PROCESSOR_P4M;
+			break;
+		case 9:
+			/*
+			 * D-stepping [M-P4-M or M-P4/533]
+			 *
+			 * this is totally strange: CPUID 0x0F29 is
+			 * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
+			 * The latter need to be sorted out as they don't
+			 * support speedstep.
+			 * Celerons with CPUID 0x0F29 may have either
+			 * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
+			 * specific.
+			 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
+			 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
+			 * also, M-P4M HTs have ebx=0x8, too
+			 * For now, they are distinguished by the model_id string
+			 */
+		        if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) 
+				return SPEEDSTEP_PROCESSOR_P4M;
+			break;
+		default:
+			break;
+		}
+		return 0;
+	}
+
+	switch (c->x86_model) {
+	case 0x0B: /* Intel PIII [Tualatin] */
+		/* cpuid_ebx(1) is 0x04 for desktop PIII, 
+		                   0x06 for mobile PIII-M */
+		ebx = cpuid_ebx(0x00000001);
+		dprintk("ebx is %x\n", ebx);
+
+		ebx &= 0x000000FF;
+
+		if (ebx != 0x06)
+			return 0;
+
+		/* So far all PIII-M processors support SpeedStep. See
+		 * Intel's 24540640.pdf of June 2003 
+		 */
+
+		return SPEEDSTEP_PROCESSOR_PIII_T;
+
+	case 0x08: /* Intel PIII [Coppermine] */
+
+		/* all mobile PIII Coppermines have FSB 100 MHz
+		 * ==> sort out a few desktop PIIIs. */
+		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
+		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi);
+		msr_lo &= 0x00c0000;
+		if (msr_lo != 0x0080000)
+			return 0;
+
+		/*
+		 * If the processor is a mobile version,
+		 * platform ID has bit 50 set
+		 * it has SpeedStep technology if either
+		 * bit 56 or 57 is set
+		 */
+		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
+		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi);
+		if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
+			if (c->x86_mask == 0x01) {
+				dprintk("early PIII version\n");
+				return SPEEDSTEP_PROCESSOR_PIII_C_EARLY;
+			} else
+				return SPEEDSTEP_PROCESSOR_PIII_C;
+		}
+
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(speedstep_detect_processor);
+
+
+/*********************************************************************
+ *                     DETECT SPEEDSTEP SPEEDS                       *
+ *********************************************************************/
+
+unsigned int speedstep_get_freqs(unsigned int processor,
+				  unsigned int *low_speed,
+				  unsigned int *high_speed,
+				  void (*set_state) (unsigned int state))
+{
+	unsigned int prev_speed;
+	unsigned int ret = 0;
+	unsigned long flags;
+
+	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
+		return -EINVAL;
+
+	dprintk("trying to determine both speeds\n");
+
+	/* get current speed */
+	prev_speed = speedstep_get_processor_frequency(processor);
+	if (!prev_speed)
+		return -EIO;
+
+	dprintk("previous seped is %u\n", prev_speed);
+	
+	local_irq_save(flags);
+
+	/* switch to low state */
+	set_state(SPEEDSTEP_LOW);
+	*low_speed = speedstep_get_processor_frequency(processor);
+	if (!*low_speed) {
+		ret = -EIO;
+		goto out;
+	}
+
+	dprintk("low seped is %u\n", *low_speed);
+
+	/* switch to high state */
+	set_state(SPEEDSTEP_HIGH);
+	*high_speed = speedstep_get_processor_frequency(processor);
+	if (!*high_speed) {
+		ret = -EIO;
+		goto out;
+	}
+
+	dprintk("high seped is %u\n", *high_speed);
+
+	if (*low_speed == *high_speed) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	/* switch to previous state, if necessary */
+	if (*high_speed != prev_speed)
+		set_state(SPEEDSTEP_LOW);
+
+ out:
+	local_irq_restore(flags);
+	return (ret);
+}
+EXPORT_SYMBOL_GPL(speedstep_get_freqs);
+
+#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
+module_param(relaxed_check, int, 0444);
+MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability.");
+#endif
+
+MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
+MODULE_LICENSE ("GPL");
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h
new file mode 100644
index 00000000000..261a2c9b7f6
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h
@@ -0,0 +1,47 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  Library for common functions for Intel SpeedStep v.1 and v.2 support
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+
+
+/* processors */
+
+#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY	0x00000001  /* Coppermine core */
+#define SPEEDSTEP_PROCESSOR_PIII_C		0x00000002  /* Coppermine core */
+#define SPEEDSTEP_PROCESSOR_PIII_T 		0x00000003  /* Tualatin core */
+#define SPEEDSTEP_PROCESSOR_P4M			0x00000004  /* P4-M  */
+
+/* the following processors are not speedstep-capable and are not auto-detected
+ * in speedstep_detect_processor(). However, their speed can be detected using
+ * the speedstep_get_processor_frequency() call. */
+#define SPEEDSTEP_PROCESSOR_PM			0xFFFFFF03  /* Pentium M  */
+#define SPEEDSTEP_PROCESSOR_P4D			0xFFFFFF04  /* desktop P4  */
+
+/* speedstep states -- only two of them */
+
+#define SPEEDSTEP_HIGH                  0x00000000
+#define SPEEDSTEP_LOW                   0x00000001
+
+
+/* detect a speedstep-capable processor */
+extern unsigned int speedstep_detect_processor (void);
+
+/* detect the current speed (in khz) of the processor */
+extern unsigned int speedstep_get_processor_frequency(unsigned int processor);
+
+
+/* detect the low and high speeds of the processor. The callback 
+ * set_state"'s first argument is either SPEEDSTEP_HIGH or 
+ * SPEEDSTEP_LOW; the second argument is zero so that no 
+ * cpufreq_notify_transition calls are initiated.
+ */
+extern unsigned int speedstep_get_freqs(unsigned int processor,
+	  unsigned int *low_speed,
+	  unsigned int *high_speed,
+	  void (*set_state) (unsigned int state));
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
new file mode 100644
index 00000000000..79440b3f087
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
@@ -0,0 +1,424 @@
+/*
+ * Intel SpeedStep SMI driver.
+ *
+ * (C) 2003  Hiroshi Miura <miura@da-cha.org>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+
+/*********************************************************************
+ *                        SPEEDSTEP - DEFINITIONS                    *
+ *********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h> 
+#include <linux/moduleparam.h> 
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <asm/ist.h>
+
+#include "speedstep-lib.h"
+
+/* speedstep system management interface port/command.
+ *
+ * These parameters are got from IST-SMI BIOS call.
+ * If user gives it, these are used.
+ * 
+ */
+static int		smi_port	= 0;
+static int		smi_cmd		= 0;
+static unsigned int	smi_sig		= 0;
+
+/* info about the processor */
+static unsigned int	speedstep_processor = 0;
+
+/* 
+ *   There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+static struct cpufreq_frequency_table speedstep_freqs[] = {
+	{SPEEDSTEP_HIGH, 	0},
+	{SPEEDSTEP_LOW,		0},
+	{0,			CPUFREQ_TABLE_END},
+};
+
+#define GET_SPEEDSTEP_OWNER 0
+#define GET_SPEEDSTEP_STATE 1
+#define SET_SPEEDSTEP_STATE 2
+#define GET_SPEEDSTEP_FREQS 4
+
+/* how often shall the SMI call be tried if it failed, e.g. because
+ * of DMA activity going on? */
+#define SMI_TRIES 5
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg)
+
+/**
+ * speedstep_smi_ownership
+ */
+static int speedstep_smi_ownership (void)
+{
+	u32 command, result, magic;
+	u32 function = GET_SPEEDSTEP_OWNER;
+	unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+	magic = virt_to_phys(magic_data);
+
+	dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
+
+	__asm__ __volatile__(
+		"out %%al, (%%dx)\n"
+		: "=D" (result)
+		: "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic)
+	);
+
+	dprintk("result is %x\n", result);
+
+	return result;
+}
+
+/**
+ * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
+ * @low: the low frequency value is placed here
+ * @high: the high frequency value is placed here
+ *
+ * Only available on later SpeedStep-enabled systems, returns false results or
+ * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
+ * shows that the latter occurs if !(ist_info.event & 0xFFFF).
+ */
+static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
+{
+	u32 command, result = 0, edi, high_mhz, low_mhz;
+	u32 state=0;
+	u32 function = GET_SPEEDSTEP_FREQS;
+
+	if (!(ist_info.event & 0xFFFF)) {
+		dprintk("bug #1422 -- can't read freqs from BIOS\n", result);
+		return -ENODEV;
+	}
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
+
+	__asm__ __volatile__("movl $0, %%edi\n"
+		"out %%al, (%%dx)\n"
+		: "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi)
+		: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+	);
+
+	dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
+
+	/* abort if results are obviously incorrect... */
+	if ((high_mhz + low_mhz) < 600)
+		return -EINVAL;
+
+	*high = high_mhz * 1000;
+	*low  = low_mhz  * 1000;
+
+	return result;
+} 
+
+/**
+ * speedstep_get_state - set the SpeedStep state
+ * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ */
+static int speedstep_get_state (void)
+{
+	u32 function=GET_SPEEDSTEP_STATE;
+	u32 result, state, edi, command;
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
+
+	__asm__ __volatile__("movl $0, %%edi\n"
+		"out %%al, (%%dx)\n"
+		: "=a" (result), "=b" (state), "=D" (edi)
+		: "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0)
+	);
+
+	dprintk("state is %x, result is %x\n", state, result);
+
+	return (state & 1);
+}
+
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ */
+static void speedstep_set_state (unsigned int state)
+{
+	unsigned int result = 0, command, new_state;
+	unsigned long flags;
+	unsigned int function=SET_SPEEDSTEP_STATE;
+	unsigned int retry = 0;
+
+	if (state > 0x1)
+		return;
+
+	/* Disable IRQs */
+	local_irq_save(flags);
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port);
+
+	do {
+		if (retry) {
+			dprintk("retry %u, previous result %u, waiting...\n", retry, result);
+			mdelay(retry * 50);
+		}
+		retry++;
+		__asm__ __volatile__(
+			"movl $0, %%edi\n"
+			"out %%al, (%%dx)\n"
+			: "=b" (new_state), "=D" (result)
+			: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+			);
+	} while ((new_state != state) && (retry <= SMI_TRIES));
+
+	/* enable IRQs */
+	local_irq_restore(flags);
+
+	if (new_state == state) {
+		dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
+	} else {
+		printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result);
+	}
+
+	return;
+}
+
+
+/**
+ * speedstep_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: new freq
+ * @relation: 
+ *
+ * Sets a new CPUFreq policy/freq.
+ */
+static int speedstep_target (struct cpufreq_policy *policy,
+			unsigned int target_freq, unsigned int relation)
+{
+	unsigned int newstate = 0;
+	struct cpufreq_freqs freqs;
+
+	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
+	freqs.new = speedstep_freqs[newstate].frequency;
+	freqs.cpu = 0; /* speedstep.c is UP only driver */
+
+	if (freqs.old == freqs.new)
+		return 0;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	speedstep_set_state(newstate);
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return 0;
+}
+
+
+/**
+ * speedstep_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within speedstep_low_freq and speedstep_high_freq, with
+ * at least one border included.
+ */
+static int speedstep_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
+}
+
+
+static int speedstep_cpu_init(struct cpufreq_policy *policy)
+{
+	int result;
+	unsigned int speed,state;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	result = speedstep_smi_ownership();
+	if (result) {
+		dprintk("fails in aquiring ownership of a SMI interface.\n");
+		return -EINVAL;
+	}
+
+	/* detect low and high frequency */
+	result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency,
+				&speedstep_freqs[SPEEDSTEP_HIGH].frequency);
+	if (result) {
+		/* fall back to speedstep_lib.c dection mechanism: try both states out */
+		dprintk("could not detect low and high frequencies by SMI call.\n");
+		result = speedstep_get_freqs(speedstep_processor,
+				&speedstep_freqs[SPEEDSTEP_LOW].frequency,
+				&speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+				&speedstep_set_state);
+
+		if (result) {
+			dprintk("could not detect two different speeds -- aborting.\n");
+			return result;
+		} else
+			dprintk("workaround worked.\n");
+	}
+
+	/* get current speed setting */
+	state = speedstep_get_state();
+	speed = speedstep_freqs[state].frequency;
+
+	dprintk("currently at %s speed setting - %i MHz\n", 
+		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+		(speed / 1000));
+
+	/* cpuinfo and default policy values */
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = speed;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
+	if (result)
+		return (result);
+
+        cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+
+	return 0;
+}
+
+static int speedstep_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static unsigned int speedstep_get(unsigned int cpu)
+{
+	if (cpu)
+		return -ENODEV;
+	return speedstep_get_processor_frequency(speedstep_processor);
+}
+
+
+static int speedstep_resume(struct cpufreq_policy *policy)
+{
+	int result = speedstep_smi_ownership();
+
+	if (result)
+		dprintk("fails in re-aquiring ownership of a SMI interface.\n");
+
+	return result;
+}
+
+static struct freq_attr* speedstep_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver speedstep_driver = {
+	.name		= "speedstep-smi",
+	.verify 	= speedstep_verify,
+	.target 	= speedstep_target,
+	.init		= speedstep_cpu_init,
+	.exit		= speedstep_cpu_exit,
+	.get		= speedstep_get,
+	.resume		= speedstep_resume,
+	.owner		= THIS_MODULE,
+	.attr		= speedstep_attr,
+};
+
+/**
+ * speedstep_init - initializes the SpeedStep CPUFreq driver
+ *
+ *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
+ * BIOS, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init speedstep_init(void)
+{
+	speedstep_processor = speedstep_detect_processor();
+
+	switch (speedstep_processor) {
+	case SPEEDSTEP_PROCESSOR_PIII_T:
+	case SPEEDSTEP_PROCESSOR_PIII_C:
+	case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
+		break;
+	default:
+		speedstep_processor = 0;
+	}
+
+	if (!speedstep_processor) {
+		dprintk ("No supported Intel CPU detected.\n");
+		return -ENODEV;
+	}
+
+	dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", 
+		ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level);
+
+
+	/* Error if no IST-SMI BIOS or no PARM 
+		 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
+	if ((ist_info.signature !=  0x47534943) && ( 
+	    (smi_port == 0) || (smi_cmd == 0)))
+		return -ENODEV;
+
+	if (smi_sig == 1)
+		smi_sig = 0x47534943;
+	else
+		smi_sig = ist_info.signature;
+
+	/* setup smi_port from MODLULE_PARM or BIOS */
+	if ((smi_port > 0xff) || (smi_port < 0)) {
+		return -EINVAL;
+	} else if (smi_port == 0) {
+		smi_port = ist_info.command & 0xff;
+	}
+
+	if ((smi_cmd > 0xff) || (smi_cmd < 0)) {
+		return -EINVAL;
+	} else if (smi_cmd == 0) {
+		smi_cmd = (ist_info.command >> 16) & 0xff;
+	}
+
+	return cpufreq_register_driver(&speedstep_driver);
+}
+
+
+/**
+ * speedstep_exit - unregisters SpeedStep support
+ *
+ *   Unregisters SpeedStep support.
+ */
+static void __exit speedstep_exit(void)
+{
+	cpufreq_unregister_driver(&speedstep_driver);
+}
+
+module_param(smi_port,  int, 0444);
+module_param(smi_cmd,   int, 0444);
+module_param(smi_sig,  uint, 0444);
+
+MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2");
+MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82");
+MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface.");
+
+MODULE_AUTHOR ("Hiroshi Miura");
+MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface.");
+MODULE_LICENSE ("GPL");
+
+module_init(speedstep_init);
+module_exit(speedstep_exit);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
new file mode 100644
index 00000000000..ba4b01138c8
--- /dev/null
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -0,0 +1,439 @@
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/timer.h>
+
+#include "cpu.h"
+
+/*
+ * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
+ */
+static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+{
+	unsigned char ccr2, ccr3;
+	unsigned long flags;
+	
+	/* we test for DEVID by checking whether CCR3 is writable */
+	local_irq_save(flags);
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, ccr3 ^ 0x80);
+	getCx86(0xc0);   /* dummy to change bus */
+
+	if (getCx86(CX86_CCR3) == ccr3) {       /* no DEVID regs. */
+		ccr2 = getCx86(CX86_CCR2);
+		setCx86(CX86_CCR2, ccr2 ^ 0x04);
+		getCx86(0xc0);  /* dummy */
+
+		if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */
+			*dir0 = 0xfd;
+		else {                          /* Cx486S A step */
+			setCx86(CX86_CCR2, ccr2);
+			*dir0 = 0xfe;
+		}
+	}
+	else {
+		setCx86(CX86_CCR3, ccr3);  /* restore CCR3 */
+
+		/* read DIR0 and DIR1 CPU registers */
+		*dir0 = getCx86(CX86_DIR0);
+		*dir1 = getCx86(CX86_DIR1);
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
+ * order to identify the Cyrix CPU model after we're out of setup.c
+ *
+ * Actually since bugs.h doesn't even reference this perhaps someone should
+ * fix the documentation ???
+ */
+static unsigned char Cx86_dir0_msb __initdata = 0;
+
+static char Cx86_model[][9] __initdata = {
+	"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
+	"M II ", "Unknown"
+};
+static char Cx486_name[][5] __initdata = {
+	"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
+	"SRx2", "DRx2"
+};
+static char Cx486S_name[][4] __initdata = {
+	"S", "S2", "Se", "S2e"
+};
+static char Cx486D_name[][4] __initdata = {
+	"DX", "DX2", "?", "?", "?", "DX4"
+};
+static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
+static char cyrix_model_mult1[] __initdata = "12??43";
+static char cyrix_model_mult2[] __initdata = "12233445";
+
+/*
+ * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
+ * BIOSes for compatibility with DOS games.  This makes the udelay loop
+ * work correctly, and improves performance.
+ *
+ * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
+ */
+
+extern void calibrate_delay(void) __init;
+
+static void __init check_cx686_slop(struct cpuinfo_x86 *c)
+{
+	unsigned long flags;
+	
+	if (Cx86_dir0_msb == 3) {
+		unsigned char ccr3, ccr5;
+
+		local_irq_save(flags);
+		ccr3 = getCx86(CX86_CCR3);
+		setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
+		ccr5 = getCx86(CX86_CCR5);
+		if (ccr5 & 2)
+			setCx86(CX86_CCR5, ccr5 & 0xfd);  /* reset SLOP */
+		setCx86(CX86_CCR3, ccr3);                 /* disable MAPEN */
+		local_irq_restore(flags);
+
+		if (ccr5 & 2) { /* possible wrong calibration done */
+			printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+			calibrate_delay();
+			c->loops_per_jiffy = loops_per_jiffy;
+		}
+	}
+}
+
+
+static void __init set_cx86_reorder(void)
+{
+	u8 ccr3;
+
+	printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN� */
+
+	/* Load/Store Serialize to mem access disable (=reorder it)� */
+	setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
+	/* set load/store serialize from 1GB to 4GB */
+	ccr3 |= 0xe0;
+	setCx86(CX86_CCR3, ccr3);
+}
+
+static void __init set_cx86_memwb(void)
+{
+	u32 cr0;
+
+	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+
+	/* CCR2 bit 2: unlock NW bit */
+	setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+	/* set 'Not Write-through' */
+	cr0 = 0x20000000;
+	__asm__("movl %%cr0,%%eax\n\t"
+		"orl %0,%%eax\n\t"
+		"movl %%eax,%%cr0\n"
+		: : "r" (cr0)
+		:"ax");
+	/* CCR2 bit 2: lock NW bit and set WT1 */
+	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
+}
+
+static void __init set_cx86_inc(void)
+{
+	unsigned char ccr3;
+
+	printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
+
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN� */
+	/* PCR1 -- Performance Control */
+	/* Incrementor on, whatever that is */
+	setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
+	/* PCR0 -- Performance Control */
+	/* Incrementor Margin 10 */
+	setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); 
+	setCx86(CX86_CCR3, ccr3);	/* disable MAPEN */
+}
+
+/*
+ *	Configure later MediaGX and/or Geode processor.
+ */
+
+static void __init geode_configure(void)
+{
+	unsigned long flags;
+	u8 ccr3, ccr4;
+	local_irq_save(flags);
+
+	/* Suspend on halt power saving and enable #SUSP pin */
+	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
+
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* Enable */
+	
+	ccr4 = getCx86(CX86_CCR4);
+	ccr4 |= 0x38;		/* FPU fast, DTE cache, Mem bypass */
+	
+	setCx86(CX86_CCR3, ccr3);
+	
+	set_cx86_memwb();
+	set_cx86_reorder();	
+	set_cx86_inc();
+	
+	local_irq_restore(flags);
+}
+
+
+#ifdef CONFIG_PCI
+static struct pci_device_id cyrix_55x0[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
+	{ },
+};
+#endif
+
+static void __init init_cyrix(struct cpuinfo_x86 *c)
+{
+	unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
+	char *buf = c->x86_model_id;
+	const char *p = NULL;
+
+	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+	clear_bit(0*32+31, c->x86_capability);
+
+	/* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */
+	if ( test_bit(1*32+24, c->x86_capability) ) {
+		clear_bit(1*32+24, c->x86_capability);
+		set_bit(X86_FEATURE_CXMMX, c->x86_capability);
+	}
+
+	do_cyrix_devid(&dir0, &dir1);
+
+	check_cx686_slop(c);
+
+	Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family"   */
+	dir0_lsn = dir0 & 0xf;                /* model or clock multiplier */
+
+	/* common case step number/rev -- exceptions handled below */
+	c->x86_model = (dir1 >> 4) + 1;
+	c->x86_mask = dir1 & 0xf;
+
+	/* Now cook; the original recipe is by Channing Corn, from Cyrix.
+	 * We do the same thing for each generation: we work out
+	 * the model, multiplier and stepping.  Black magic included,
+	 * to make the silicon step/rev numbers match the printed ones.
+	 */
+	 
+	switch (dir0_msn) {
+		unsigned char tmp;
+
+	case 0: /* Cx486SLC/DLC/SRx/DRx */
+		p = Cx486_name[dir0_lsn & 7];
+		break;
+
+	case 1: /* Cx486S/DX/DX2/DX4 */
+		p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5]
+			: Cx486S_name[dir0_lsn & 3];
+		break;
+
+	case 2: /* 5x86 */
+		Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
+		p = Cx86_cb+2;
+		break;
+
+	case 3: /* 6x86/6x86L */
+		Cx86_cb[1] = ' ';
+		Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
+		if (dir1 > 0x21) { /* 686L */
+			Cx86_cb[0] = 'L';
+			p = Cx86_cb;
+			(c->x86_model)++;
+		} else             /* 686 */
+			p = Cx86_cb+1;
+		/* Emulate MTRRs using Cyrix's ARRs. */
+		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+		/* 6x86's contain this bug */
+		c->coma_bug = 1;
+		break;
+
+	case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+#ifdef CONFIG_PCI
+		/* It isn't really a PCI quirk directly, but the cure is the
+		   same. The MediaGX has deep magic SMM stuff that handles the
+		   SB emulation. It thows away the fifo on disable_dma() which
+		   is wrong and ruins the audio. 
+
+		   Bug2: VSA1 has a wrap bug so that using maximum sized DMA 
+		   causes bad things. According to NatSemi VSA2 has another
+		   bug to do with 'hlt'. I've not seen any boards using VSA2
+		   and X doesn't seem to support it either so who cares 8).
+		   VSA1 we work around however.
+		*/
+
+		printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+		isa_dma_bridge_buggy = 2;
+#endif		
+		c->x86_cache_size=16;	/* Yep 16K integrated cache thats it */
+ 
+		/*
+		 *  The 5510/5520 companion chips have a funky PIT.
+		 */  
+		if (pci_dev_present(cyrix_55x0))
+			pit_latch_buggy = 1;
+
+		/* GXm supports extended cpuid levels 'ala' AMD */
+		if (c->cpuid_level == 2) {
+			/* Enable cxMMX extensions (GX1 Datasheet 54) */
+			setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+			
+			/* GXlv/GXm/GX1 */
+			if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63)
+				geode_configure();
+			get_model_name(c);  /* get CPU marketing name */
+			return;
+		}
+		else {  /* MediaGX */
+			Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
+			p = Cx86_cb+2;
+			c->x86_model = (dir1 & 0x20) ? 1 : 2;
+		}
+		break;
+
+        case 5: /* 6x86MX/M II */
+		if (dir1 > 7)
+		{
+			dir0_msn++;  /* M II */
+			/* Enable MMX extensions (App note 108) */
+			setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+		}
+		else
+		{
+			c->coma_bug = 1;      /* 6x86MX, it has the bug. */
+		}
+		tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
+		Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
+		p = Cx86_cb+tmp;
+        	if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
+			(c->x86_model)++;
+		/* Emulate MTRRs using Cyrix's ARRs. */
+		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+		break;
+
+	case 0xf:  /* Cyrix 486 without DEVID registers */
+		switch (dir0_lsn) {
+		case 0xd:  /* either a 486SLC or DLC w/o DEVID */
+			dir0_msn = 0;
+			p = Cx486_name[(c->hard_math) ? 1 : 0];
+			break;
+
+		case 0xe:  /* a 486S A step */
+			dir0_msn = 0;
+			p = Cx486S_name[0];
+			break;
+		}
+		break;
+
+	default:  /* unknown (shouldn't happen, we know everyone ;-) */
+		dir0_msn = 7;
+		break;
+	}
+	strcpy(buf, Cx86_model[dir0_msn & 7]);
+	if (p) strcat(buf, p);
+	return;
+}
+
+/*
+ * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
+ * by the fact that they preserve the flags across the division of 5/2.
+ * PII and PPro exhibit this behavior too, but they have cpuid available.
+ */
+ 
+/*
+ * Perform the Cyrix 5/2 test. A Cyrix won't change
+ * the flags, while other 486 chips will.
+ */
+static inline int test_cyrix_52div(void)
+{
+	unsigned int test;
+
+	__asm__ __volatile__(
+	     "sahf\n\t"		/* clear flags (%eax = 0x0005) */
+	     "div %b2\n\t"	/* divide 5 by 2 */
+	     "lahf"		/* store flags into %ah */
+	     : "=a" (test)
+	     : "0" (5), "q" (2)
+	     : "cc");
+
+	/* AH is 0x02 on Cyrix after the divide.. */
+	return (unsigned char) (test >> 8) == 0x02;
+}
+
+static void cyrix_identify(struct cpuinfo_x86 * c)
+{
+	/* Detect Cyrix with disabled CPUID */
+	if ( c->x86 == 4 && test_cyrix_52div() ) {
+		unsigned char dir0, dir1;
+		
+		strcpy(c->x86_vendor_id, "CyrixInstead");
+	        c->x86_vendor = X86_VENDOR_CYRIX;
+	        
+	        /* Actually enable cpuid on the older cyrix */
+	    
+	    	/* Retrieve CPU revisions */
+	    	
+		do_cyrix_devid(&dir0, &dir1);
+
+		dir0>>=4;		
+		
+		/* Check it is an affected model */
+		
+   	        if (dir0 == 5 || dir0 == 3)
+   	        {
+			unsigned char ccr3, ccr4;
+			unsigned long flags;
+			printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+			local_irq_save(flags);
+			ccr3 = getCx86(CX86_CCR3);
+			setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
+			ccr4 = getCx86(CX86_CCR4);
+			setCx86(CX86_CCR4, ccr4 | 0x80);          /* enable cpuid  */
+			setCx86(CX86_CCR3, ccr3);                 /* disable MAPEN */
+			local_irq_restore(flags);
+		}
+	}
+	generic_identify(c);
+}
+
+static struct cpu_dev cyrix_cpu_dev __initdata = {
+	.c_vendor	= "Cyrix",
+	.c_ident 	= { "CyrixInstead" },
+	.c_init		= init_cyrix,
+	.c_identify	= cyrix_identify,
+};
+
+int __init cyrix_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev;
+	return 0;
+}
+
+//early_arch_initcall(cyrix_init_cpu);
+
+static struct cpu_dev nsc_cpu_dev __initdata = {
+	.c_vendor	= "NSC",
+	.c_ident 	= { "Geode by NSC" },
+	.c_init		= init_cyrix,
+	.c_identify	= generic_identify,
+};
+
+int __init nsc_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev;
+	return 0;
+}
+
+//early_arch_initcall(nsc_init_cpu);
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
new file mode 100644
index 00000000000..b8d847b850d
--- /dev/null
+++ b/arch/i386/kernel/cpu/intel.c
@@ -0,0 +1,248 @@
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+
+#include "cpu.h"
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#endif
+
+extern int trap_init_f00f_bug(void);
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+/*
+ * Alignment at which movsl is preferred for bulk memory copies.
+ */
+struct movsl_mask movsl_mask;
+#endif
+
+void __init early_intel_workaround(struct cpuinfo_x86 *c)
+{
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return;
+	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
+	if (c->x86 == 15 && c->x86_cache_alignment == 64)
+		c->x86_cache_alignment = 128;
+}
+
+/*
+ *	Early probe support logic for ppro memory erratum #50
+ *
+ *	This is called before we do cpu ident work
+ */
+ 
+int __init ppro_with_ram_bug(void)
+{
+	/* Uses data from early_cpu_detect now */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86 == 6 &&
+	    boot_cpu_data.x86_model == 1 &&
+	    boot_cpu_data.x86_mask < 8) {
+		printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+		return 1;
+	}
+	return 0;
+}
+	
+
+/*
+ * P4 Xeon errata 037 workaround.
+ * Hardware prefetcher may cause stale data to be loaded into the cache.
+ */
+static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
+{
+	unsigned long lo, hi;
+
+	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+		rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+		if ((lo & (1<<9)) == 0) {
+			printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
+			printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
+			lo |= (1<<9);	/* Disable hw prefetching */
+			wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+		}
+	}
+}
+
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+	unsigned int l2 = 0;
+	char *p = NULL;
+
+#ifdef CONFIG_X86_F00F_BUG
+	/*
+	 * All current models of Pentium and Pentium with MMX technology CPUs
+	 * have the F0 0F bug, which lets nonprivileged users lock up the system.
+	 * Note that the workaround only should be initialized once...
+	 */
+	c->f00f_bug = 0;
+	if ( c->x86 == 5 ) {
+		static int f00f_workaround_enabled = 0;
+
+		c->f00f_bug = 1;
+		if ( !f00f_workaround_enabled ) {
+			trap_init_f00f_bug();
+			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+			f00f_workaround_enabled = 1;
+		}
+	}
+#endif
+
+	select_idle_routine(c);
+	l2 = init_intel_cacheinfo(c);
+
+	/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
+	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
+	/* Names for the Pentium II/Celeron processors 
+	   detectable only by also checking the cache size.
+	   Dixon is NOT a Celeron. */
+	if (c->x86 == 6) {
+		switch (c->x86_model) {
+		case 5:
+			if (c->x86_mask == 0) {
+				if (l2 == 0)
+					p = "Celeron (Covington)";
+				else if (l2 == 256)
+					p = "Mobile Pentium II (Dixon)";
+			}
+			break;
+			
+		case 6:
+			if (l2 == 128)
+				p = "Celeron (Mendocino)";
+			else if (c->x86_mask == 0 || c->x86_mask == 5)
+				p = "Celeron-A";
+			break;
+			
+		case 8:
+			if (l2 == 128)
+				p = "Celeron (Coppermine)";
+			break;
+		}
+	}
+
+	if ( p )
+		strcpy(c->x86_model_id, p);
+	
+	detect_ht(c);
+
+	/* Work around errata */
+	Intel_errata_workarounds(c);
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+	/*
+	 * Set up the preferred alignment for movsl bulk memory moves
+	 */
+	switch (c->x86) {
+	case 4:		/* 486: untested */
+		break;
+	case 5:		/* Old Pentia: untested */
+		break;
+	case 6:		/* PII/PIII only like movsl with 8-byte alignment */
+		movsl_mask.mask = 7;
+		break;
+	case 15:	/* P4 is OK down to 8-byte alignment */
+		movsl_mask.mask = 7;
+		break;
+	}
+#endif
+
+	if (c->x86 == 15) 
+		set_bit(X86_FEATURE_P4, c->x86_capability);
+	if (c->x86 == 6) 
+		set_bit(X86_FEATURE_P3, c->x86_capability);
+}
+
+
+static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+	/* Intel PIII Tualatin. This comes in two flavours.
+	 * One has 256kb of cache, the other 512. We have no way
+	 * to determine which, so we use a boottime override
+	 * for the 512kb model, and assume 256 otherwise.
+	 */
+	if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
+		size = 256;
+	return size;
+}
+
+static struct cpu_dev intel_cpu_dev __initdata = {
+	.c_vendor	= "Intel",
+	.c_ident 	= { "GenuineIntel" },
+	.c_models = {
+		{ .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = 
+		  { 
+			  [0] = "486 DX-25/33", 
+			  [1] = "486 DX-50", 
+			  [2] = "486 SX", 
+			  [3] = "486 DX/2", 
+			  [4] = "486 SL", 
+			  [5] = "486 SX/2", 
+			  [7] = "486 DX/2-WB", 
+			  [8] = "486 DX/4", 
+			  [9] = "486 DX/4-WB"
+		  }
+		},
+		{ .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
+		  { 
+			  [0] = "Pentium 60/66 A-step", 
+			  [1] = "Pentium 60/66", 
+			  [2] = "Pentium 75 - 200",
+			  [3] = "OverDrive PODP5V83", 
+			  [4] = "Pentium MMX",
+			  [7] = "Mobile Pentium 75 - 200", 
+			  [8] = "Mobile Pentium MMX"
+		  }
+		},
+		{ .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
+		  { 
+			  [0] = "Pentium Pro A-step",
+			  [1] = "Pentium Pro", 
+			  [3] = "Pentium II (Klamath)", 
+			  [4] = "Pentium II (Deschutes)", 
+			  [5] = "Pentium II (Deschutes)", 
+			  [6] = "Mobile Pentium II",
+			  [7] = "Pentium III (Katmai)", 
+			  [8] = "Pentium III (Coppermine)", 
+			  [10] = "Pentium III (Cascades)",
+			  [11] = "Pentium III (Tualatin)",
+		  }
+		},
+		{ .vendor = X86_VENDOR_INTEL, .family = 15, .model_names =
+		  {
+			  [0] = "Pentium 4 (Unknown)",
+			  [1] = "Pentium 4 (Willamette)",
+			  [2] = "Pentium 4 (Northwood)",
+			  [4] = "Pentium 4 (Foster)",
+			  [5] = "Pentium 4 (Foster)",
+		  }
+		},
+	},
+	.c_init		= init_intel,
+	.c_identify	= generic_identify,
+	.c_size_cache	= intel_size_cache,
+};
+
+__init int intel_cpu_init(void)
+{
+	cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
+	return 0;
+}
+
+// arch_initcall(intel_cpu_init);
+
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
new file mode 100644
index 00000000000..aeb5b4ef8c8
--- /dev/null
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -0,0 +1,598 @@
+/*
+ *      Routines to indentify caches on Intel CPU.
+ *
+ *      Changes:
+ *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+#define LVL_1_INST	1
+#define LVL_1_DATA	2
+#define LVL_2		3
+#define LVL_3		4
+#define LVL_TRACE	5
+
+struct _cache_table
+{
+	unsigned char descriptor;
+	char cache_type;
+	short size;
+};
+
+/* all the cache descriptor types we care about (no TLB or trace cache entries) */
+static struct _cache_table cache_table[] __initdata =
+{
+	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
+	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x23, LVL_3,      1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x25, LVL_3,      2048 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x29, LVL_3,      4096 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x44, LVL_2,      1024 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x45, LVL_2,      2048 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
+	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
+	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
+	{ 0x78, LVL_2,    1024 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x79, LVL_2,     128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7a, LVL_2,     256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7b, LVL_2,     512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7c, LVL_2,    1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7d, LVL_2,    2048 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x7f, LVL_2,     512 },	/* 2-way set assoc, 64 byte line size */
+	{ 0x82, LVL_2,     256 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x83, LVL_2,     512 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x84, LVL_2,    1024 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x85, LVL_2,    2048 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x86, LVL_2,     512 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x87, LVL_2,    1024 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x00, 0, 0}
+};
+
+
+enum _cache_type
+{
+	CACHE_TYPE_NULL	= 0,
+	CACHE_TYPE_DATA = 1,
+	CACHE_TYPE_INST = 2,
+	CACHE_TYPE_UNIFIED = 3
+};
+
+union _cpuid4_leaf_eax {
+	struct {
+		enum _cache_type	type:5;
+		unsigned int		level:3;
+		unsigned int		is_self_initializing:1;
+		unsigned int		is_fully_associative:1;
+		unsigned int		reserved:4;
+		unsigned int		num_threads_sharing:12;
+		unsigned int		num_cores_on_die:6;
+	} split;
+	u32 full;
+};
+
+union _cpuid4_leaf_ebx {
+	struct {
+		unsigned int		coherency_line_size:12;
+		unsigned int		physical_line_partition:10;
+		unsigned int		ways_of_associativity:10;
+	} split;
+	u32 full;
+};
+
+union _cpuid4_leaf_ecx {
+	struct {
+		unsigned int		number_of_sets:32;
+	} split;
+	u32 full;
+};
+
+struct _cpuid4_info {
+	union _cpuid4_leaf_eax eax;
+	union _cpuid4_leaf_ebx ebx;
+	union _cpuid4_leaf_ecx ecx;
+	unsigned long size;
+	cpumask_t shared_cpu_map;
+};
+
+#define MAX_CACHE_LEAVES		4
+static unsigned short __devinitdata	num_cache_leaves;
+
+static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+{
+	unsigned int		eax, ebx, ecx, edx;
+	union _cpuid4_leaf_eax	cache_eax;
+
+	cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
+	cache_eax.full = eax;
+	if (cache_eax.split.type == CACHE_TYPE_NULL)
+		return -1;
+
+	this_leaf->eax.full = eax;
+	this_leaf->ebx.full = ebx;
+	this_leaf->ecx.full = ecx;
+	this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
+		(this_leaf->ebx.split.coherency_line_size + 1) *
+		(this_leaf->ebx.split.physical_line_partition + 1) *
+		(this_leaf->ebx.split.ways_of_associativity + 1);
+	return 0;
+}
+
+static int __init find_num_cache_leaves(void)
+{
+	unsigned int		eax, ebx, ecx, edx;
+	union _cpuid4_leaf_eax	cache_eax;
+	int 			i;
+	int 			retval;
+
+	retval = MAX_CACHE_LEAVES;
+	/* Do cpuid(4) loop to find out num_cache_leaves */
+	for (i = 0; i < MAX_CACHE_LEAVES; i++) {
+		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
+		cache_eax.full = eax;
+		if (cache_eax.split.type == CACHE_TYPE_NULL) {
+			retval = i;
+			break;
+		}
+	}
+	return retval;
+}
+
+unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c)
+{
+	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
+	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+
+	if (c->cpuid_level > 4) {
+		static int is_initialized;
+
+		if (is_initialized == 0) {
+			/* Init num_cache_leaves from boot CPU */
+			num_cache_leaves = find_num_cache_leaves();
+			is_initialized++;
+		}
+
+		/*
+		 * Whenever possible use cpuid(4), deterministic cache
+		 * parameters cpuid leaf to find the cache details
+		 */
+		for (i = 0; i < num_cache_leaves; i++) {
+			struct _cpuid4_info this_leaf;
+
+			int retval;
+
+			retval = cpuid4_cache_lookup(i, &this_leaf);
+			if (retval >= 0) {
+				switch(this_leaf.eax.split.level) {
+				    case 1:
+					if (this_leaf.eax.split.type ==
+							CACHE_TYPE_DATA)
+						new_l1d = this_leaf.size/1024;
+					else if (this_leaf.eax.split.type ==
+							CACHE_TYPE_INST)
+						new_l1i = this_leaf.size/1024;
+					break;
+				    case 2:
+					new_l2 = this_leaf.size/1024;
+					break;
+				    case 3:
+					new_l3 = this_leaf.size/1024;
+					break;
+				    default:
+					break;
+				}
+			}
+		}
+	}
+	if (c->cpuid_level > 1) {
+		/* supports eax=2  call */
+		int i, j, n;
+		int regs[4];
+		unsigned char *dp = (unsigned char *)regs;
+
+		/* Number of times to iterate */
+		n = cpuid_eax(2) & 0xFF;
+
+		for ( i = 0 ; i < n ; i++ ) {
+			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+
+			/* If bit 31 is set, this is an unknown format */
+			for ( j = 0 ; j < 3 ; j++ ) {
+				if ( regs[j] < 0 ) regs[j] = 0;
+			}
+
+			/* Byte 0 is level count, not a descriptor */
+			for ( j = 1 ; j < 16 ; j++ ) {
+				unsigned char des = dp[j];
+				unsigned char k = 0;
+
+				/* look up this descriptor in the table */
+				while (cache_table[k].descriptor != 0)
+				{
+					if (cache_table[k].descriptor == des) {
+						switch (cache_table[k].cache_type) {
+						case LVL_1_INST:
+							l1i += cache_table[k].size;
+							break;
+						case LVL_1_DATA:
+							l1d += cache_table[k].size;
+							break;
+						case LVL_2:
+							l2 += cache_table[k].size;
+							break;
+						case LVL_3:
+							l3 += cache_table[k].size;
+							break;
+						case LVL_TRACE:
+							trace += cache_table[k].size;
+							break;
+						}
+
+						break;
+					}
+
+					k++;
+				}
+			}
+		}
+
+		if (new_l1d)
+			l1d = new_l1d;
+
+		if (new_l1i)
+			l1i = new_l1i;
+
+		if (new_l2)
+			l2 = new_l2;
+
+		if (new_l3)
+			l3 = new_l3;
+
+		if ( trace )
+			printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+		else if ( l1i )
+			printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+		if ( l1d )
+			printk(", L1 D cache: %dK\n", l1d);
+		else
+			printk("\n");
+		if ( l2 )
+			printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+		if ( l3 )
+			printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+		/*
+		 * This assumes the L3 cache is shared; it typically lives in
+		 * the northbridge.  The L1 caches are included by the L2
+		 * cache, and so should not be included for the purpose of
+		 * SMP switching weights.
+		 */
+		c->x86_cache_size = l2 ? l2 : (l1i+l1d);
+	}
+
+	return l2;
+}
+
+/* pointer to _cpuid4_info array (for each cache leaf) */
+static struct _cpuid4_info *cpuid4_info[NR_CPUS];
+#define CPUID4_INFO_IDX(x,y)    (&((cpuid4_info[x])[y]))
+
+#ifdef CONFIG_SMP
+static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+	struct _cpuid4_info	*this_leaf;
+	unsigned long num_threads_sharing;
+
+	this_leaf = CPUID4_INFO_IDX(cpu, index);
+	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
+
+	if (num_threads_sharing == 1)
+		cpu_set(cpu, this_leaf->shared_cpu_map);
+#ifdef CONFIG_X86_HT
+	else if (num_threads_sharing == smp_num_siblings)
+		this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
+#endif
+	else
+		printk(KERN_INFO "Number of CPUs sharing cache didn't match "
+				"any known set of CPUs\n");
+}
+#else
+static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
+#endif
+
+static void free_cache_attributes(unsigned int cpu)
+{
+	kfree(cpuid4_info[cpu]);
+	cpuid4_info[cpu] = NULL;
+}
+
+static int __devinit detect_cache_attributes(unsigned int cpu)
+{
+	struct _cpuid4_info	*this_leaf;
+	unsigned long 		j;
+	int 			retval;
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	cpuid4_info[cpu] = kmalloc(
+	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+	if (unlikely(cpuid4_info[cpu] == NULL))
+		return -ENOMEM;
+	memset(cpuid4_info[cpu], 0,
+	    sizeof(struct _cpuid4_info) * num_cache_leaves);
+
+	/* Do cpuid and store the results */
+	for (j = 0; j < num_cache_leaves; j++) {
+		this_leaf = CPUID4_INFO_IDX(cpu, j);
+		retval = cpuid4_cache_lookup(j, this_leaf);
+		if (unlikely(retval < 0))
+			goto err_out;
+		cache_shared_cpu_map_setup(cpu, j);
+	}
+	return 0;
+
+err_out:
+	free_cache_attributes(cpu);
+	return -ENOMEM;
+}
+
+#ifdef CONFIG_SYSFS
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
+
+/* pointer to kobject for cpuX/cache */
+static struct kobject * cache_kobject[NR_CPUS];
+
+struct _index_kobject {
+	struct kobject kobj;
+	unsigned int cpu;
+	unsigned short index;
+};
+
+/* pointer to array of kobjects for cpuX/cache/indexY */
+static struct _index_kobject *index_kobject[NR_CPUS];
+#define INDEX_KOBJECT_PTR(x,y)    (&((index_kobject[x])[y]))
+
+#define show_one_plus(file_name, object, val)				\
+static ssize_t show_##file_name						\
+			(struct _cpuid4_info *this_leaf, char *buf)	\
+{									\
+	return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
+}
+
+show_one_plus(level, eax.split.level, 0);
+show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
+show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
+show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
+show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
+
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+{
+	return sprintf (buf, "%luK\n", this_leaf->size / 1024);
+}
+
+static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
+{
+	char mask_str[NR_CPUS];
+	cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
+	return sprintf(buf, "%s\n", mask_str);
+}
+
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
+	switch(this_leaf->eax.split.type) {
+	    case CACHE_TYPE_DATA:
+		return sprintf(buf, "Data\n");
+		break;
+	    case CACHE_TYPE_INST:
+		return sprintf(buf, "Instruction\n");
+		break;
+	    case CACHE_TYPE_UNIFIED:
+		return sprintf(buf, "Unified\n");
+		break;
+	    default:
+		return sprintf(buf, "Unknown\n");
+		break;
+	}
+}
+
+struct _cache_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct _cpuid4_info *, char *);
+	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+};
+
+#define define_one_ro(_name) \
+static struct _cache_attr _name = \
+	__ATTR(_name, 0444, show_##_name, NULL)
+
+define_one_ro(level);
+define_one_ro(type);
+define_one_ro(coherency_line_size);
+define_one_ro(physical_line_partition);
+define_one_ro(ways_of_associativity);
+define_one_ro(number_of_sets);
+define_one_ro(size);
+define_one_ro(shared_cpu_map);
+
+static struct attribute * default_attrs[] = {
+	&type.attr,
+	&level.attr,
+	&coherency_line_size.attr,
+	&physical_line_partition.attr,
+	&ways_of_associativity.attr,
+	&number_of_sets.attr,
+	&size.attr,
+	&shared_cpu_map.attr,
+	NULL
+};
+
+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
+static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+	struct _cache_attr *fattr = to_attr(attr);
+	struct _index_kobject *this_leaf = to_object(kobj);
+	ssize_t ret;
+
+	ret = fattr->show ?
+		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+			buf) :
+	       	0;
+	return ret;
+}
+
+static ssize_t store(struct kobject * kobj, struct attribute * attr,
+		     const char * buf, size_t count)
+{
+	return 0;
+}
+
+static struct sysfs_ops sysfs_ops = {
+	.show   = show,
+	.store  = store,
+};
+
+static struct kobj_type ktype_cache = {
+	.sysfs_ops	= &sysfs_ops,
+	.default_attrs	= default_attrs,
+};
+
+static struct kobj_type ktype_percpu_entry = {
+	.sysfs_ops	= &sysfs_ops,
+};
+
+static void cpuid4_cache_sysfs_exit(unsigned int cpu)
+{
+	kfree(cache_kobject[cpu]);
+	kfree(index_kobject[cpu]);
+	cache_kobject[cpu] = NULL;
+	index_kobject[cpu] = NULL;
+	free_cache_attributes(cpu);
+}
+
+static int __devinit cpuid4_cache_sysfs_init(unsigned int cpu)
+{
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	detect_cache_attributes(cpu);
+	if (cpuid4_info[cpu] == NULL)
+		return -ENOENT;
+
+	/* Allocate all required memory */
+	cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
+	if (unlikely(cache_kobject[cpu] == NULL))
+		goto err_out;
+	memset(cache_kobject[cpu], 0, sizeof(struct kobject));
+
+	index_kobject[cpu] = kmalloc(
+	    sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
+	if (unlikely(index_kobject[cpu] == NULL))
+		goto err_out;
+	memset(index_kobject[cpu], 0,
+	    sizeof(struct _index_kobject) * num_cache_leaves);
+
+	return 0;
+
+err_out:
+	cpuid4_cache_sysfs_exit(cpu);
+	return -ENOMEM;
+}
+
+/* Add/Remove cache interface for CPU device */
+static int __devinit cache_add_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i, j;
+	struct _index_kobject *this_object;
+	int retval = 0;
+
+	retval = cpuid4_cache_sysfs_init(cpu);
+	if (unlikely(retval < 0))
+		return retval;
+
+	cache_kobject[cpu]->parent = &sys_dev->kobj;
+	kobject_set_name(cache_kobject[cpu], "%s", "cache");
+	cache_kobject[cpu]->ktype = &ktype_percpu_entry;
+	retval = kobject_register(cache_kobject[cpu]);
+
+	for (i = 0; i < num_cache_leaves; i++) {
+		this_object = INDEX_KOBJECT_PTR(cpu,i);
+		this_object->cpu = cpu;
+		this_object->index = i;
+		this_object->kobj.parent = cache_kobject[cpu];
+		kobject_set_name(&(this_object->kobj), "index%1lu", i);
+		this_object->kobj.ktype = &ktype_cache;
+		retval = kobject_register(&(this_object->kobj));
+		if (unlikely(retval)) {
+			for (j = 0; j < i; j++) {
+				kobject_unregister(
+					&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
+			}
+			kobject_unregister(cache_kobject[cpu]);
+			cpuid4_cache_sysfs_exit(cpu);
+			break;
+		}
+	}
+	return retval;
+}
+
+static int __devexit cache_remove_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i;
+
+	for (i = 0; i < num_cache_leaves; i++)
+		kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
+	kobject_unregister(cache_kobject[cpu]);
+	cpuid4_cache_sysfs_exit(cpu);
+	return 0;
+}
+
+static struct sysdev_driver cache_sysdev_driver = {
+	.add = cache_add_dev,
+	.remove = __devexit_p(cache_remove_dev),
+};
+
+/* Register/Unregister the cpu_cache driver */
+static int __devinit cache_register_driver(void)
+{
+	if (num_cache_leaves == 0)
+		return 0;
+
+	return sysdev_driver_register(&cpu_sysdev_class,&cache_sysdev_driver);
+}
+
+device_initcall(cache_register_driver);
+
+#endif
+
diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile
new file mode 100644
index 00000000000..30808f3d671
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/Makefile
@@ -0,0 +1,2 @@
+obj-y	=	mce.o k7.o p4.o p5.o p6.o winchip.o
+obj-$(CONFIG_X86_MCE_NONFATAL)	+=	non-fatal.o
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
new file mode 100644
index 00000000000..8df52e86c4d
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -0,0 +1,97 @@
+/*
+ * Athlon/Hammer specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine Check Handler For AMD Athlon/Duron */
+static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
+{
+	int recover=1;
+	u32 alow, ahigh, high, low;
+	u32 mcgstl, mcgsth;
+	int i;
+
+	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	if (mcgstl & (1<<0))	/* Recoverable ? */
+		recover=0;
+
+	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+		smp_processor_id(), mcgsth, mcgstl);
+
+	for (i=1; i<nr_mce_banks; i++) {
+		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+		if (high&(1<<31)) {
+			if (high & (1<<29))
+				recover |= 1;
+			if (high & (1<<25))
+				recover |= 2;
+			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+			high &= ~(1<<31);
+			if (high & (1<<27)) {
+				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				printk ("[%08x%08x]", ahigh, alow);
+			}
+			if (high & (1<<26)) {
+				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				printk (" at %08x%08x", ahigh, alow);
+			}
+			printk ("\n");
+			/* Clear it */
+			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+	}
+
+	if (recover&2)
+		panic ("CPU context corrupt");
+	if (recover&1)
+		panic ("Unable to continue");
+	printk (KERN_EMERG "Attempting to continue.\n");
+	mcgstl &= ~(1<<2);
+	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+
+/* AMD K7 machine check is Intel like */
+void __init amd_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int i;
+
+	machine_check_vector = k7_machine_check;
+	wmb();
+
+	printk (KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	if (l & (1<<8))	/* Control register present ? */
+		wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+	nr_mce_banks = l & 0xff;
+
+	/* Clear status for MC index 0 separately, we don't touch CTL,
+	 * as some Athlons cause spurious MCEs when its enabled. */
+	wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+	for (i=1; i<nr_mce_banks; i++) {
+		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	}
+
+	set_in_cr4 (X86_CR4_MCE);
+	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+		smp_processor_id());
+}
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
new file mode 100644
index 00000000000..bf6d1aefafc
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -0,0 +1,77 @@
+/*
+ * mce.c - x86 Machine Check Exception Reporting
+ * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+
+#include "mce.h"
+
+int mce_disabled __initdata = 0;
+int nr_mce_banks;
+
+EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
+
+/* Handle unconfigured int18 (should never happen) */
+static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code)
+{	
+	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+
+/* This has to be run for each processor */
+void __init mcheck_init(struct cpuinfo_x86 *c)
+{
+	if (mce_disabled==1)
+		return;
+
+	switch (c->x86_vendor) {
+		case X86_VENDOR_AMD:
+			if (c->x86==6 || c->x86==15)
+				amd_mcheck_init(c);
+			break;
+
+		case X86_VENDOR_INTEL:
+			if (c->x86==5)
+				intel_p5_mcheck_init(c);
+			if (c->x86==6)
+				intel_p6_mcheck_init(c);
+			if (c->x86==15)
+				intel_p4_mcheck_init(c);
+			break;
+
+		case X86_VENDOR_CENTAUR:
+			if (c->x86==5)
+				winchip_mcheck_init(c);
+			break;
+
+		default:
+			break;
+	}
+}
+
+static int __init mcheck_disable(char *str)
+{
+	mce_disabled = 1;
+	return 0;
+}
+
+static int __init mcheck_enable(char *str)
+{
+	mce_disabled = -1;
+	return 0;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h
new file mode 100644
index 00000000000..dc2416dfef1
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/mce.h
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+
+void amd_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+void winchip_mcheck_init(struct cpuinfo_x86 *c);
+
+/* Call the installed machine check handler for this CPU setup. */
+extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
+
+extern int mce_disabled __initdata;
+extern int nr_mce_banks;
+
diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c
new file mode 100644
index 00000000000..7864ddfccf0
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c
@@ -0,0 +1,93 @@
+/*
+ * Non Fatal Machine Check Exception Reporting
+ *
+ * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
+ *
+ * This file contains routines to check for non-fatal MCEs every 15s
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/config.h>
+#include <linux/irq.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+static int firstbank;
+
+#define MCE_RATE	15*HZ	/* timer rate is 15s */
+
+static void mce_checkregs (void *info)
+{
+	u32 low, high;
+	int i;
+
+	for (i=firstbank; i<nr_mce_banks; i++) {
+		rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
+
+		if (high & (1<<31)) {
+			printk(KERN_INFO "MCE: The hardware reports a non "
+				"fatal, correctable incident occurred on "
+				"CPU %d.\n",
+				smp_processor_id());
+			printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+
+			/* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
+			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+	}
+}
+
+static void mce_work_fn(void *data);
+static DECLARE_WORK(mce_work, mce_work_fn, NULL);
+
+static void mce_work_fn(void *data)
+{ 
+	on_each_cpu(mce_checkregs, NULL, 1, 1);
+	schedule_delayed_work(&mce_work, MCE_RATE);
+} 
+
+static int __init init_nonfatal_mce_checker(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	/* Check for MCE support */
+	if (!cpu_has(c, X86_FEATURE_MCE))
+		return -ENODEV;
+
+	/* Check for PPro style MCA */
+	if (!cpu_has(c, X86_FEATURE_MCA))
+		return -ENODEV;
+
+	/* Some Athlons misbehave when we frob bank 0 */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+		boot_cpu_data.x86 == 6)
+			firstbank = 1;
+	else
+			firstbank = 0;
+
+	/*
+	 * Check for non-fatal errors every MCE_RATE s
+	 */
+	schedule_delayed_work(&mce_work, MCE_RATE);
+	printk(KERN_INFO "Machine check exception polling timer started.\n");
+	return 0;
+}
+module_init(init_nonfatal_mce_checker);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
new file mode 100644
index 00000000000..8b16ceb929b
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -0,0 +1,271 @@
+/*
+ * P4 specific Machine Check Exception Reporting
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+
+#include "mce.h"
+
+/* as supported by the P4/Xeon family */
+struct intel_mce_extended_msrs {
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+	u32 esi;
+	u32 edi;
+	u32 ebp;
+	u32 esp;
+	u32 eflags;
+	u32 eip;
+	/* u32 *reserved[]; */
+};
+
+static int mce_num_extended_msrs = 0;
+
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+static void unexpected_thermal_interrupt(struct pt_regs *regs)
+{	
+	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
+			smp_processor_id());
+	add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct pt_regs *regs)
+{
+	u32 l, h;
+	unsigned int cpu = smp_processor_id();
+	static unsigned long next[NR_CPUS];
+
+	ack_APIC_irq();
+
+	if (time_after(next[cpu], jiffies))
+		return;
+
+	next[cpu] = jiffies + HZ*5;
+	rdmsr(MSR_IA32_THERM_STATUS, l, h);
+	if (l & 0x1) {
+		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
+		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
+				cpu);
+		add_taint(TAINT_MACHINE_CHECK);
+	} else {
+		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+	}
+}
+
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+
+fastcall void smp_thermal_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	vendor_thermal_interrupt(regs);
+	irq_exit();
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+static void __init intel_init_thermal(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	unsigned int cpu = smp_processor_id();
+
+	/* Thermal monitoring */
+	if (!cpu_has(c, X86_FEATURE_ACPI))
+		return;	/* -ENODEV */
+
+	/* Clock modulation */
+	if (!cpu_has(c, X86_FEATURE_ACC))
+		return;	/* -ENODEV */
+
+	/* first check if its enabled already, in which case there might
+	 * be some SMM goo which handles it, so we can't even put a handler
+	 * since it might be delivered via SMI already -zwanem.
+	 */
+	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+	h = apic_read(APIC_LVTTHMR);
+	if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
+				cpu);
+		return; /* -EBUSY */
+	}
+
+	/* check whether a vector already exists, temporarily masked? */	
+	if (h & APIC_VECTOR_MASK) {
+		printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
+				"installed\n",
+			cpu, (h & APIC_VECTOR_MASK));
+		return; /* -EBUSY */
+	}
+
+	/* The temperature transition interrupt handler setup */
+	h = THERMAL_APIC_VECTOR;		/* our delivery vector */
+	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
+	apic_write_around(APIC_LVTTHMR, h);
+
+	rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+	/* ok we're good to go... */
+	vendor_thermal_interrupt = intel_thermal_interrupt;
+	
+	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+	
+	l = apic_read (APIC_LVTTHMR);
+	apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+	printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+	return;
+}
+#endif /* CONFIG_X86_MCE_P4THERMAL */
+
+
+/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
+static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+{
+	u32 h;
+
+	if (mce_num_extended_msrs == 0)
+		goto done;
+
+	rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
+	rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
+	rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
+	rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
+	rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
+	rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
+	rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
+	rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
+	rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
+	rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
+
+	/* can we rely on kmalloc to do a dynamic
+	 * allocation for the reserved registers?
+	 */
+done:
+	return mce_num_extended_msrs;
+}
+
+static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+{
+	int recover=1;
+	u32 alow, ahigh, high, low;
+	u32 mcgstl, mcgsth;
+	int i;
+	struct intel_mce_extended_msrs dbg;
+
+	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	if (mcgstl & (1<<0))	/* Recoverable ? */
+		recover=0;
+
+	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+		smp_processor_id(), mcgsth, mcgstl);
+
+	if (intel_get_extended_msrs(&dbg)) {
+		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
+			smp_processor_id(), dbg.eip, dbg.eflags);
+		printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
+			dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
+		printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
+			dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
+	}
+
+	for (i=0; i<nr_mce_banks; i++) {
+		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+		if (high & (1<<31)) {
+			if (high & (1<<29))
+				recover |= 1;
+			if (high & (1<<25))
+				recover |= 2;
+			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+			high &= ~(1<<31);
+			if (high & (1<<27)) {
+				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				printk ("[%08x%08x]", ahigh, alow);
+			}
+			if (high & (1<<26)) {
+				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				printk (" at %08x%08x", ahigh, alow);
+			}
+			printk ("\n");
+		}
+	}
+
+	if (recover & 2)
+		panic ("CPU context corrupt");
+	if (recover & 1)
+		panic ("Unable to continue");
+
+	printk(KERN_EMERG "Attempting to continue.\n");
+	/* 
+	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+	 * recoverable/continuable.This will allow BIOS to look at the MSRs
+	 * for errors if the OS could not log the error.
+	 */
+	for (i=0; i<nr_mce_banks; i++) {
+		u32 msr;
+		msr = MSR_IA32_MC0_STATUS+i*4;
+		rdmsr (msr, low, high);
+		if (high&(1<<31)) {
+			/* Clear it */
+			wrmsr(msr, 0UL, 0UL);
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+	}
+	mcgstl &= ~(1<<2);
+	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+
+void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int i;
+	
+	machine_check_vector = intel_machine_check;
+	wmb();
+
+	printk (KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	if (l & (1<<8))	/* Control register present ? */
+		wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+	nr_mce_banks = l & 0xff;
+
+	for (i=0; i<nr_mce_banks; i++) {
+		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	}
+
+	set_in_cr4 (X86_CR4_MCE);
+	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+		smp_processor_id());
+
+	/* Check for P4/Xeon extended MCE MSRs */
+	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	if (l & (1<<9))	{/* MCG_EXT_P */
+		mce_num_extended_msrs = (l >> 16) & 0xff;
+		printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
+				" available\n",
+			smp_processor_id(), mce_num_extended_msrs);
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+		/* Check for P4/Xeon Thermal monitor */
+		intel_init_thermal(c);
+#endif
+	}
+}
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
new file mode 100644
index 00000000000..c45a1b485c8
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/p5.c
@@ -0,0 +1,54 @@
+/*
+ * P5 specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine check handler for Pentium class Intel */
+static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code)
+{
+	u32 loaddr, hi, lotype;
+	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
+	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
+	printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
+	if(lotype&(1<<5))
+		printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
+	add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* Set up machine check reporting for processors with Intel style MCE */
+void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	
+	/*Check for MCE support */
+	if( !cpu_has(c, X86_FEATURE_MCE) )
+		return;	
+
+	/* Default P5 to off as its often misconnected */
+	if(mce_disabled != -1)
+		return;
+	machine_check_vector = pentium_machine_check;
+	wmb();
+
+	/* Read registers before enabling */
+	rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
+	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
+	printk(KERN_INFO "Intel old style machine check architecture supported.\n");
+
+ 	/* Enable MCE */
+	set_in_cr4(X86_CR4_MCE);
+	printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+}
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
new file mode 100644
index 00000000000..46640f8c249
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/p6.c
@@ -0,0 +1,115 @@
+/*
+ * P6 specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine Check Handler For PII/PIII */
+static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+{
+	int recover=1;
+	u32 alow, ahigh, high, low;
+	u32 mcgstl, mcgsth;
+	int i;
+
+	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	if (mcgstl & (1<<0))	/* Recoverable ? */
+		recover=0;
+
+	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+		smp_processor_id(), mcgsth, mcgstl);
+
+	for (i=0; i<nr_mce_banks; i++) {
+		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+		if (high & (1<<31)) {
+			if (high & (1<<29))
+				recover |= 1;
+			if (high & (1<<25))
+				recover |= 2;
+			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+			high &= ~(1<<31);
+			if (high & (1<<27)) {
+				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				printk ("[%08x%08x]", ahigh, alow);
+			}
+			if (high & (1<<26)) {
+				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				printk (" at %08x%08x", ahigh, alow);
+			}
+			printk ("\n");
+		}
+	}
+
+	if (recover & 2)
+		panic ("CPU context corrupt");
+	if (recover & 1)
+		panic ("Unable to continue");
+
+	printk (KERN_EMERG "Attempting to continue.\n");
+	/* 
+	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+	 * recoverable/continuable.This will allow BIOS to look at the MSRs
+	 * for errors if the OS could not log the error.
+	 */
+	for (i=0; i<nr_mce_banks; i++) {
+		unsigned int msr;
+		msr = MSR_IA32_MC0_STATUS+i*4;
+		rdmsr (msr,low, high);
+		if (high & (1<<31)) {
+			/* Clear it */
+			wrmsr (msr, 0UL, 0UL);
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+	}
+	mcgstl &= ~(1<<2);
+	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+/* Set up machine check reporting for processors with Intel style MCE */
+void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int i;
+	
+	/* Check for MCE support */
+	if (!cpu_has(c, X86_FEATURE_MCE))
+		return;
+
+	/* Check for PPro style MCA */
+ 	if (!cpu_has(c, X86_FEATURE_MCA))
+		return;
+
+	/* Ok machine check is available */
+	machine_check_vector = intel_machine_check;
+	wmb();
+
+	printk (KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	if (l & (1<<8))	/* Control register present ? */
+		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+	nr_mce_banks = l & 0xff;
+
+	/* Don't enable bank 0 on intel P6 cores, it goes bang quickly. */
+	for (i=1; i<nr_mce_banks; i++) {
+		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	}
+
+	set_in_cr4 (X86_CR4_MCE);
+	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+		smp_processor_id());
+}
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
new file mode 100644
index 00000000000..753fa7acb98
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/winchip.c
@@ -0,0 +1,37 @@
+/*
+ * IDT Winchip specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine check handler for WinChip C6 */
+static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code)
+{
+	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+	add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* Set up machine check reporting on the Winchip C6 series */
+void __init winchip_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 lo, hi;
+	machine_check_vector = winchip_machine_check;
+	wmb();
+	rdmsr(MSR_IDT_FCR1, lo, hi);
+	lo|= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
+	lo&= ~(1<<4);	/* Enable MCE */
+	wrmsr(MSR_IDT_FCR1, lo, hi);
+	set_in_cr4(X86_CR4_MCE);
+	printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
+}
diff --git a/arch/i386/kernel/cpu/mtrr/Makefile b/arch/i386/kernel/cpu/mtrr/Makefile
new file mode 100644
index 00000000000..a25b701ab84
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/Makefile
@@ -0,0 +1,5 @@
+obj-y		:= main.o if.o generic.o state.o
+obj-y		+= amd.o
+obj-y		+= cyrix.o
+obj-y		+= centaur.o
+
diff --git a/arch/i386/kernel/cpu/mtrr/amd.c b/arch/i386/kernel/cpu/mtrr/amd.c
new file mode 100644
index 00000000000..1a1e04b6fd0
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/amd.c
@@ -0,0 +1,121 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+
+#include "mtrr.h"
+
+static void
+amd_get_mtrr(unsigned int reg, unsigned long *base,
+	     unsigned int *size, mtrr_type * type)
+{
+	unsigned long low, high;
+
+	rdmsr(MSR_K6_UWCCR, low, high);
+	/*  Upper dword is region 1, lower is region 0  */
+	if (reg == 1)
+		low = high;
+	/*  The base masks off on the right alignment  */
+	*base = (low & 0xFFFE0000) >> PAGE_SHIFT;
+	*type = 0;
+	if (low & 1)
+		*type = MTRR_TYPE_UNCACHABLE;
+	if (low & 2)
+		*type = MTRR_TYPE_WRCOMB;
+	if (!(low & 3)) {
+		*size = 0;
+		return;
+	}
+	/*
+	 *  This needs a little explaining. The size is stored as an
+	 *  inverted mask of bits of 128K granularity 15 bits long offset
+	 *  2 bits
+	 *
+	 *  So to get a size we do invert the mask and add 1 to the lowest
+	 *  mask bit (4 as its 2 bits in). This gives us a size we then shift
+	 *  to turn into 128K blocks
+	 *
+	 *  eg              111 1111 1111 1100      is 512K
+	 *
+	 *  invert          000 0000 0000 0011
+	 *  +1              000 0000 0000 0100
+	 *  *128K   ...
+	 */
+	low = (~low) & 0x1FFFC;
+	*size = (low + 4) << (15 - PAGE_SHIFT);
+	return;
+}
+
+static void amd_set_mtrr(unsigned int reg, unsigned long base,
+			 unsigned long size, mtrr_type type)
+/*  [SUMMARY] Set variable MTRR register on the local CPU.
+    <reg> The register to set.
+    <base> The base address of the region.
+    <size> The size of the region. If this is 0 the region is disabled.
+    <type> The type of the region.
+    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+    be done externally.
+    [RETURNS] Nothing.
+*/
+{
+	u32 regs[2];
+
+	/*
+	 *  Low is MTRR0 , High MTRR 1
+	 */
+	rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
+	/*
+	 *  Blank to disable
+	 */
+	if (size == 0)
+		regs[reg] = 0;
+	else
+		/* Set the register to the base, the type (off by one) and an
+		   inverted bitmask of the size The size is the only odd
+		   bit. We are fed say 512K We invert this and we get 111 1111
+		   1111 1011 but if you subtract one and invert you get the   
+		   desired 111 1111 1111 1100 mask
+
+		   But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!  */
+		regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
+		    | (base << PAGE_SHIFT) | (type + 1);
+
+	/*
+	 *  The writeback rule is quite specific. See the manual. Its
+	 *  disable local interrupts, write back the cache, set the mtrr
+	 */
+	wbinvd();
+	wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
+}
+
+static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
+{
+	/* Apply the K6 block alignment and size rules
+	   In order
+	   o Uncached or gathering only
+	   o 128K or bigger block
+	   o Power of 2 block
+	   o base suitably aligned to the power
+	*/
+	if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
+	    || (size & ~(size - 1)) - size || (base & (size - 1)))
+		return -EINVAL;
+	return 0;
+}
+
+static struct mtrr_ops amd_mtrr_ops = {
+	.vendor            = X86_VENDOR_AMD,
+	.set               = amd_set_mtrr,
+	.get               = amd_get_mtrr,
+	.get_free_region   = generic_get_free_region,
+	.validate_add_page = amd_validate_add_page,
+	.have_wrcomb       = positive_have_wrcomb,
+};
+
+int __init amd_init_mtrr(void)
+{
+	set_mtrr_ops(&amd_mtrr_ops);
+	return 0;
+}
+
+//arch_initcall(amd_mtrr_init);
diff --git a/arch/i386/kernel/cpu/mtrr/centaur.c b/arch/i386/kernel/cpu/mtrr/centaur.c
new file mode 100644
index 00000000000..33f00ac314e
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/centaur.c
@@ -0,0 +1,223 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include "mtrr.h"
+
+static struct {
+	unsigned long high;
+	unsigned long low;
+} centaur_mcr[8];
+
+static u8 centaur_mcr_reserved;
+static u8 centaur_mcr_type;	/* 0 for winchip, 1 for winchip2 */
+
+/*
+ *	Report boot time MCR setups 
+ */
+
+static int
+centaur_get_free_region(unsigned long base, unsigned long size)
+/*  [SUMMARY] Get a free MTRR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+	int i, max;
+	mtrr_type ltype;
+	unsigned long lbase;
+	unsigned int lsize;
+
+	max = num_var_ranges;
+	for (i = 0; i < max; ++i) {
+		if (centaur_mcr_reserved & (1 << i))
+			continue;
+		mtrr_if->get(i, &lbase, &lsize, &ltype);
+		if (lsize == 0)
+			return i;
+	}
+	return -ENOSPC;
+}
+
+void
+mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
+{
+	centaur_mcr[mcr].low = lo;
+	centaur_mcr[mcr].high = hi;
+}
+
+static void
+centaur_get_mcr(unsigned int reg, unsigned long *base,
+		unsigned int *size, mtrr_type * type)
+{
+	*base = centaur_mcr[reg].high >> PAGE_SHIFT;
+	*size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
+	*type = MTRR_TYPE_WRCOMB;	/*  If it is there, it is write-combining  */
+	if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
+		*type = MTRR_TYPE_UNCACHABLE;
+	if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
+		*type = MTRR_TYPE_WRBACK;
+	if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
+		*type = MTRR_TYPE_WRBACK;
+
+}
+
+static void centaur_set_mcr(unsigned int reg, unsigned long base,
+			    unsigned long size, mtrr_type type)
+{
+	unsigned long low, high;
+
+	if (size == 0) {
+		/*  Disable  */
+		high = low = 0;
+	} else {
+		high = base << PAGE_SHIFT;
+		if (centaur_mcr_type == 0)
+			low = -size << PAGE_SHIFT | 0x1f;	/* only support write-combining... */
+		else {
+			if (type == MTRR_TYPE_UNCACHABLE)
+				low = -size << PAGE_SHIFT | 0x02;	/* NC */
+			else
+				low = -size << PAGE_SHIFT | 0x09;	/* WWO,WC */
+		}
+	}
+	centaur_mcr[reg].high = high;
+	centaur_mcr[reg].low = low;
+	wrmsr(MSR_IDT_MCR0 + reg, low, high);
+}
+
+#if 0
+/*
+ *	Initialise the later (saner) Winchip MCR variant. In this version
+ *	the BIOS can pass us the registers it has used (but not their values)
+ *	and the control register is read/write
+ */
+
+static void __init
+centaur_mcr1_init(void)
+{
+	unsigned i;
+	u32 lo, hi;
+
+	/* Unfortunately, MCR's are read-only, so there is no way to
+	 * find out what the bios might have done.
+	 */
+
+	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	if (((lo >> 17) & 7) == 1) {	/* Type 1 Winchip2 MCR */
+		lo &= ~0x1C0;	/* clear key */
+		lo |= 0x040;	/* set key to 1 */
+		wrmsr(MSR_IDT_MCR_CTRL, lo, hi);	/* unlock MCR */
+	}
+
+	centaur_mcr_type = 1;
+
+	/*
+	 *  Clear any unconfigured MCR's.
+	 */
+
+	for (i = 0; i < 8; ++i) {
+		if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
+			if (!(lo & (1 << (9 + i))))
+				wrmsr(MSR_IDT_MCR0 + i, 0, 0);
+			else
+				/*
+				 *      If the BIOS set up an MCR we cannot see it
+				 *      but we don't wish to obliterate it
+				 */
+				centaur_mcr_reserved |= (1 << i);
+		}
+	}
+	/*  
+	 *  Throw the main write-combining switch... 
+	 *  However if OOSTORE is enabled then people have already done far
+	 *  cleverer things and we should behave. 
+	 */
+
+	lo |= 15;		/* Write combine enables */
+	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+
+/*
+ *	Initialise the original winchip with read only MCR registers
+ *	no used bitmask for the BIOS to pass on and write only control
+ */
+
+static void __init
+centaur_mcr0_init(void)
+{
+	unsigned i;
+
+	/* Unfortunately, MCR's are read-only, so there is no way to
+	 * find out what the bios might have done.
+	 */
+
+	/* Clear any unconfigured MCR's.
+	 * This way we are sure that the centaur_mcr array contains the actual
+	 * values. The disadvantage is that any BIOS tweaks are thus undone.
+	 *
+	 */
+	for (i = 0; i < 8; ++i) {
+		if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
+			wrmsr(MSR_IDT_MCR0 + i, 0, 0);
+	}
+
+	wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);	/* Write only */
+}
+
+/*
+ *	Initialise Winchip series MCR registers
+ */
+
+static void __init
+centaur_mcr_init(void)
+{
+	struct set_mtrr_context ctxt;
+
+	set_mtrr_prepare_save(&ctxt);
+	set_mtrr_cache_disable(&ctxt);
+
+	if (boot_cpu_data.x86_model == 4)
+		centaur_mcr0_init();
+	else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
+		centaur_mcr1_init();
+
+	set_mtrr_done(&ctxt);
+}
+#endif
+
+static int centaur_validate_add_page(unsigned long base, 
+				     unsigned long size, unsigned int type)
+{
+	/*
+	 *  FIXME: Winchip2 supports uncached
+	 */
+	if (type != MTRR_TYPE_WRCOMB && 
+	    (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
+		printk(KERN_WARNING
+		       "mtrr: only write-combining%s supported\n",
+		       centaur_mcr_type ? " and uncacheable are"
+		       : " is");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct mtrr_ops centaur_mtrr_ops = {
+	.vendor            = X86_VENDOR_CENTAUR,
+//	.init              = centaur_mcr_init,
+	.set               = centaur_set_mcr,
+	.get               = centaur_get_mcr,
+	.get_free_region   = centaur_get_free_region,
+	.validate_add_page = centaur_validate_add_page,
+	.have_wrcomb       = positive_have_wrcomb,
+};
+
+int __init centaur_init_mtrr(void)
+{
+	set_mtrr_ops(&centaur_mtrr_ops);
+	return 0;
+}
+
+//arch_initcall(centaur_init_mtrr);
diff --git a/arch/i386/kernel/cpu/mtrr/changelog b/arch/i386/kernel/cpu/mtrr/changelog
new file mode 100644
index 00000000000..af136853595
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/changelog
@@ -0,0 +1,229 @@
+    ChangeLog
+
+    Prehistory Martin Tischh�user <martin@ikcbarka.fzk.de>
+	       Initial register-setting code (from proform-1.0).
+    19971216   Richard Gooch <rgooch@atnf.csiro.au>
+               Original version for /proc/mtrr interface, SMP-safe.
+  v1.0
+    19971217   Richard Gooch <rgooch@atnf.csiro.au>
+               Bug fix for ioctls()'s.
+	       Added sample code in Documentation/mtrr.txt
+  v1.1
+    19971218   Richard Gooch <rgooch@atnf.csiro.au>
+               Disallow overlapping regions.
+    19971219   Jens Maurer <jmaurer@menuett.rhein-main.de>
+               Register-setting fixups.
+  v1.2
+    19971222   Richard Gooch <rgooch@atnf.csiro.au>
+               Fixups for kernel 2.1.75.
+  v1.3
+    19971229   David Wragg <dpw@doc.ic.ac.uk>
+               Register-setting fixups and conformity with Intel conventions.
+    19971229   Richard Gooch <rgooch@atnf.csiro.au>
+               Cosmetic changes and wrote this ChangeLog ;-)
+    19980106   Richard Gooch <rgooch@atnf.csiro.au>
+               Fixups for kernel 2.1.78.
+  v1.4
+    19980119   David Wragg <dpw@doc.ic.ac.uk>
+               Included passive-release enable code (elsewhere in PCI setup).
+  v1.5
+    19980131   Richard Gooch <rgooch@atnf.csiro.au>
+               Replaced global kernel lock with private spinlock.
+  v1.6
+    19980201   Richard Gooch <rgooch@atnf.csiro.au>
+               Added wait for other CPUs to complete changes.
+  v1.7
+    19980202   Richard Gooch <rgooch@atnf.csiro.au>
+               Bug fix in definition of <set_mtrr> for UP.
+  v1.8
+    19980319   Richard Gooch <rgooch@atnf.csiro.au>
+               Fixups for kernel 2.1.90.
+    19980323   Richard Gooch <rgooch@atnf.csiro.au>
+               Move SMP BIOS fixup before secondary CPUs call <calibrate_delay>
+  v1.9
+    19980325   Richard Gooch <rgooch@atnf.csiro.au>
+               Fixed test for overlapping regions: confused by adjacent regions
+    19980326   Richard Gooch <rgooch@atnf.csiro.au>
+               Added wbinvd in <set_mtrr_prepare>.
+    19980401   Richard Gooch <rgooch@atnf.csiro.au>
+               Bug fix for non-SMP compilation.
+    19980418   David Wragg <dpw@doc.ic.ac.uk>
+               Fixed-MTRR synchronisation for SMP and use atomic operations
+	       instead of spinlocks.
+    19980418   Richard Gooch <rgooch@atnf.csiro.au>
+	       Differentiate different MTRR register classes for BIOS fixup.
+  v1.10
+    19980419   David Wragg <dpw@doc.ic.ac.uk>
+	       Bug fix in variable MTRR synchronisation.
+  v1.11
+    19980419   Richard Gooch <rgooch@atnf.csiro.au>
+	       Fixups for kernel 2.1.97.
+  v1.12
+    19980421   Richard Gooch <rgooch@atnf.csiro.au>
+	       Safer synchronisation across CPUs when changing MTRRs.
+  v1.13
+    19980423   Richard Gooch <rgooch@atnf.csiro.au>
+	       Bugfix for SMP systems without MTRR support.
+  v1.14
+    19980427   Richard Gooch <rgooch@atnf.csiro.au>
+	       Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines.
+  v1.15
+    19980427   Richard Gooch <rgooch@atnf.csiro.au>
+	       Use atomic bitops for setting SMP change mask.
+  v1.16
+    19980428   Richard Gooch <rgooch@atnf.csiro.au>
+	       Removed spurious diagnostic message.
+  v1.17
+    19980429   Richard Gooch <rgooch@atnf.csiro.au>
+	       Moved register-setting macros into this file.
+	       Moved setup code from init/main.c to i386-specific areas.
+  v1.18
+    19980502   Richard Gooch <rgooch@atnf.csiro.au>
+	       Moved MTRR detection outside conditionals in <mtrr_init>.
+  v1.19
+    19980502   Richard Gooch <rgooch@atnf.csiro.au>
+	       Documentation improvement: mention Pentium II and AGP.
+  v1.20
+    19980521   Richard Gooch <rgooch@atnf.csiro.au>
+	       Only manipulate interrupt enable flag on local CPU.
+	       Allow enclosed uncachable regions.
+  v1.21
+    19980611   Richard Gooch <rgooch@atnf.csiro.au>
+	       Always define <main_lock>.
+  v1.22
+    19980901   Richard Gooch <rgooch@atnf.csiro.au>
+	       Removed module support in order to tidy up code.
+	       Added sanity check for <mtrr_add>/<mtrr_del> before <mtrr_init>.
+	       Created addition queue for prior to SMP commence.
+  v1.23
+    19980902   Richard Gooch <rgooch@atnf.csiro.au>
+	       Ported patch to kernel 2.1.120-pre3.
+  v1.24
+    19980910   Richard Gooch <rgooch@atnf.csiro.au>
+	       Removed sanity checks and addition queue: Linus prefers an OOPS.
+  v1.25
+    19981001   Richard Gooch <rgooch@atnf.csiro.au>
+	       Fixed harmless compiler warning in include/asm-i386/mtrr.h
+	       Fixed version numbering and history for v1.23 -> v1.24.
+  v1.26
+    19990118   Richard Gooch <rgooch@atnf.csiro.au>
+	       Added devfs support.
+  v1.27
+    19990123   Richard Gooch <rgooch@atnf.csiro.au>
+	       Changed locking to spin with reschedule.
+	       Made use of new <smp_call_function>.
+  v1.28
+    19990201   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+	       Extended the driver to be able to use Cyrix style ARRs.
+    19990204   Richard Gooch <rgooch@atnf.csiro.au>
+	       Restructured Cyrix support.
+  v1.29
+    19990204   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+	       Refined ARR support: enable MAPEN in set_mtrr_prepare()
+	       and disable MAPEN in set_mtrr_done().
+    19990205   Richard Gooch <rgooch@atnf.csiro.au>
+	       Minor cleanups.
+  v1.30
+    19990208   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+               Protect plain 6x86s (and other processors without the
+               Page Global Enable feature) against accessing CR4 in
+               set_mtrr_prepare() and set_mtrr_done().
+    19990210   Richard Gooch <rgooch@atnf.csiro.au>
+	       Turned <set_mtrr_up> and <get_mtrr> into function pointers.
+  v1.31
+    19990212   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+               Major rewrite of cyrix_arr_init(): do not touch ARRs,
+               leave them as the BIOS have set them up.
+               Enable usage of all 8 ARRs.
+               Avoid multiplications by 3 everywhere and other
+               code clean ups/speed ups.
+    19990213   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+               Set up other Cyrix processors identical to the boot cpu.
+               Since Cyrix don't support Intel APIC, this is l'art pour l'art.
+               Weigh ARRs by size:
+               If size <= 32M is given, set up ARR# we were given.
+               If size >  32M is given, set up ARR7 only if it is free,
+               fail otherwise.
+    19990214   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+               Also check for size >= 256K if we are to set up ARR7,
+               mtrr_add() returns the value it gets from set_mtrr()
+    19990218   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+               Remove Cyrix "coma bug" workaround from here.
+               Moved to linux/arch/i386/kernel/setup.c and
+               linux/include/asm-i386/bugs.h
+    19990228   Richard Gooch <rgooch@atnf.csiro.au>
+	       Added MTRRIOC_KILL_ENTRY ioctl(2)
+	       Trap for counter underflow in <mtrr_file_del>.
+	       Trap for 4 MiB aligned regions for PPro, stepping <= 7.
+    19990301   Richard Gooch <rgooch@atnf.csiro.au>
+	       Created <get_free_region> hook.
+    19990305   Richard Gooch <rgooch@atnf.csiro.au>
+	       Temporarily disable AMD support now MTRR capability flag is set.
+  v1.32
+    19990308   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+	       Adjust my changes (19990212-19990218) to Richard Gooch's
+	       latest changes. (19990228-19990305)
+  v1.33
+    19990309   Richard Gooch <rgooch@atnf.csiro.au>
+	       Fixed typo in <printk> message.
+    19990310   Richard Gooch <rgooch@atnf.csiro.au>
+	       Support K6-II/III based on Alan Cox's <alan@redhat.com> patches.
+  v1.34
+    19990511   Bart Hartgers <bart@etpmod.phys.tue.nl>
+	       Support Centaur C6 MCR's.
+    19990512   Richard Gooch <rgooch@atnf.csiro.au>
+	       Minor cleanups.
+  v1.35
+    19990707   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+               Check whether ARR3 is protected in cyrix_get_free_region()
+               and mtrr_del(). The code won't attempt to delete or change it
+               from now on if the BIOS protected ARR3. It silently skips ARR3
+               in cyrix_get_free_region() or returns with an error code from
+               mtrr_del().
+    19990711   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+               Reset some bits in the CCRs in cyrix_arr_init() to disable SMM
+               if ARR3 isn't protected. This is needed because if SMM is active
+               and ARR3 isn't protected then deleting and setting ARR3 again
+               may lock up the processor. With SMM entirely disabled, it does
+               not happen.
+    19990812   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+               Rearrange switch() statements so the driver accomodates to
+               the fact that the AMD Athlon handles its MTRRs the same way
+               as Intel does.
+    19990814   Zolt�n B�sz�rm�nyi <zboszor@mail.externet.hu>
+	       Double check for Intel in mtrr_add()'s big switch() because
+	       that revision check is only valid for Intel CPUs.
+    19990819   Alan Cox <alan@redhat.com>
+               Tested Zoltan's changes on a pre production Athlon - 100%
+               success.
+    19991008   Manfred Spraul <manfreds@colorfullife.com>
+    	       replaced spin_lock_reschedule() with a normal semaphore.
+  v1.36
+    20000221   Richard Gooch <rgooch@atnf.csiro.au>
+               Compile fix if procfs and devfs not enabled.
+	       Formatting changes.
+  v1.37
+    20001109   H. Peter Anvin <hpa@zytor.com>
+	       Use the new centralized CPU feature detects.
+
+  v1.38
+    20010309   Dave Jones <davej@suse.de>
+	       Add support for Cyrix III.
+
+  v1.39
+    20010312   Dave Jones <davej@suse.de>
+               Ugh, I broke AMD support.
+	       Reworked fix by Troels Walsted Hansen <troels@thule.no>
+
+  v1.40
+    20010327   Dave Jones <davej@suse.de>
+	       Adapted Cyrix III support to include VIA C3.
+
+  v2.0
+    20020306   Patrick Mochel <mochel@osdl.org>
+               Split mtrr.c -> mtrr/*.c
+               Converted to Linux Kernel Coding Style
+               Fixed several minor nits in form
+               Moved some SMP-only functions out, so they can be used
+                for power management in the future.
+               TODO: Fix user interface cruft.
diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c
new file mode 100644
index 00000000000..933b0dd62f4
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/cyrix.c
@@ -0,0 +1,364 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include "mtrr.h"
+
+int arr3_protected;
+
+static void
+cyrix_get_arr(unsigned int reg, unsigned long *base,
+	      unsigned int *size, mtrr_type * type)
+{
+	unsigned long flags;
+	unsigned char arr, ccr3, rcr, shift;
+
+	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
+
+	/* Save flags and disable interrupts */
+	local_irq_save(flags);
+
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
+	((unsigned char *) base)[3] = getCx86(arr);
+	((unsigned char *) base)[2] = getCx86(arr + 1);
+	((unsigned char *) base)[1] = getCx86(arr + 2);
+	rcr = getCx86(CX86_RCR_BASE + reg);
+	setCx86(CX86_CCR3, ccr3);	/* disable MAPEN */
+
+	/* Enable interrupts if it was enabled previously */
+	local_irq_restore(flags);
+	shift = ((unsigned char *) base)[1] & 0x0f;
+	*base >>= PAGE_SHIFT;
+
+	/* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
+	 * Note: shift==0xf means 4G, this is unsupported.
+	 */
+	if (shift)
+		*size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1);
+	else
+		*size = 0;
+
+	/* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */
+	if (reg < 7) {
+		switch (rcr) {
+		case 1:
+			*type = MTRR_TYPE_UNCACHABLE;
+			break;
+		case 8:
+			*type = MTRR_TYPE_WRBACK;
+			break;
+		case 9:
+			*type = MTRR_TYPE_WRCOMB;
+			break;
+		case 24:
+		default:
+			*type = MTRR_TYPE_WRTHROUGH;
+			break;
+		}
+	} else {
+		switch (rcr) {
+		case 0:
+			*type = MTRR_TYPE_UNCACHABLE;
+			break;
+		case 8:
+			*type = MTRR_TYPE_WRCOMB;
+			break;
+		case 9:
+			*type = MTRR_TYPE_WRBACK;
+			break;
+		case 25:
+		default:
+			*type = MTRR_TYPE_WRTHROUGH;
+			break;
+		}
+	}
+}
+
+static int
+cyrix_get_free_region(unsigned long base, unsigned long size)
+/*  [SUMMARY] Get a free ARR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+	int i;
+	mtrr_type ltype;
+	unsigned long lbase;
+	unsigned int  lsize;
+
+	/* If we are to set up a region >32M then look at ARR7 immediately */
+	if (size > 0x2000) {
+		cyrix_get_arr(7, &lbase, &lsize, &ltype);
+		if (lsize == 0)
+			return 7;
+		/*  Else try ARR0-ARR6 first  */
+	} else {
+		for (i = 0; i < 7; i++) {
+			cyrix_get_arr(i, &lbase, &lsize, &ltype);
+			if ((i == 3) && arr3_protected)
+				continue;
+			if (lsize == 0)
+				return i;
+		}
+		/* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
+		cyrix_get_arr(i, &lbase, &lsize, &ltype);
+		if ((lsize == 0) && (size >= 0x40))
+			return i;
+	}
+	return -ENOSPC;
+}
+
+static u32 cr4 = 0;
+static u32 ccr3;
+
+static void prepare_set(void)
+{
+	u32 cr0;
+
+	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
+	if ( cpu_has_pge ) {
+		cr4 = read_cr4();
+		write_cr4(cr4 & (unsigned char) ~(1 << 7));
+	}
+
+	/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
+	    a side-effect  */
+	cr0 = read_cr0() | 0x40000000;
+	wbinvd();
+	write_cr0(cr0);
+	wbinvd();
+
+	/* Cyrix ARRs - everything else were excluded at the top */
+	ccr3 = getCx86(CX86_CCR3);
+
+	/* Cyrix ARRs - everything else were excluded at the top */
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
+
+}
+
+static void post_set(void)
+{
+	/*  Flush caches and TLBs  */
+	wbinvd();
+
+	/* Cyrix ARRs - everything else was excluded at the top */
+	setCx86(CX86_CCR3, ccr3);
+		
+	/*  Enable caches  */
+	write_cr0(read_cr0() & 0xbfffffff);
+
+	/*  Restore value of CR4  */
+	if ( cpu_has_pge )
+		write_cr4(cr4);
+}
+
+static void cyrix_set_arr(unsigned int reg, unsigned long base,
+			  unsigned long size, mtrr_type type)
+{
+	unsigned char arr, arr_type, arr_size;
+
+	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
+
+	/* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */
+	if (reg >= 7)
+		size >>= 6;
+
+	size &= 0x7fff;		/* make sure arr_size <= 14 */
+	for (arr_size = 0; size; arr_size++, size >>= 1) ;
+
+	if (reg < 7) {
+		switch (type) {
+		case MTRR_TYPE_UNCACHABLE:
+			arr_type = 1;
+			break;
+		case MTRR_TYPE_WRCOMB:
+			arr_type = 9;
+			break;
+		case MTRR_TYPE_WRTHROUGH:
+			arr_type = 24;
+			break;
+		default:
+			arr_type = 8;
+			break;
+		}
+	} else {
+		switch (type) {
+		case MTRR_TYPE_UNCACHABLE:
+			arr_type = 0;
+			break;
+		case MTRR_TYPE_WRCOMB:
+			arr_type = 8;
+			break;
+		case MTRR_TYPE_WRTHROUGH:
+			arr_type = 25;
+			break;
+		default:
+			arr_type = 9;
+			break;
+		}
+	}
+
+	prepare_set();
+
+	base <<= PAGE_SHIFT;
+	setCx86(arr, ((unsigned char *) &base)[3]);
+	setCx86(arr + 1, ((unsigned char *) &base)[2]);
+	setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
+	setCx86(CX86_RCR_BASE + reg, arr_type);
+
+	post_set();
+}
+
+typedef struct {
+	unsigned long base;
+	unsigned int size;
+	mtrr_type type;
+} arr_state_t;
+
+static arr_state_t arr_state[8] __initdata = {
+	{0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
+	{0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
+};
+
+static unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 };
+
+static void cyrix_set_all(void)
+{
+	int i;
+
+	prepare_set();
+
+	/* the CCRs are not contiguous */
+	for (i = 0; i < 4; i++)
+		setCx86(CX86_CCR0 + i, ccr_state[i]);
+	for (; i < 7; i++)
+		setCx86(CX86_CCR4 + i, ccr_state[i]);
+	for (i = 0; i < 8; i++)
+		cyrix_set_arr(i, arr_state[i].base, 
+			      arr_state[i].size, arr_state[i].type);
+
+	post_set();
+}
+
+#if 0
+/*
+ * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
+ * with the SMM (System Management Mode) mode. So we need the following:
+ * Check whether SMI_LOCK (CCR3 bit 0) is set
+ *   if it is set, write a warning message: ARR3 cannot be changed!
+ *     (it cannot be changed until the next processor reset)
+ *   if it is reset, then we can change it, set all the needed bits:
+ *   - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
+ *   - disable access to SMM memory (CCR1 bit 2 reset)
+ *   - disable SMM mode (CCR1 bit 1 reset)
+ *   - disable write protection of ARR3 (CCR6 bit 1 reset)
+ *   - (maybe) disable ARR3
+ * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
+ */
+static void __init
+cyrix_arr_init(void)
+{
+	struct set_mtrr_context ctxt;
+	unsigned char ccr[7];
+	int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
+#ifdef CONFIG_SMP
+	int i;
+#endif
+
+	/* flush cache and enable MAPEN */
+	set_mtrr_prepare_save(&ctxt);
+	set_mtrr_cache_disable(&ctxt);
+
+	/* Save all CCRs locally */
+	ccr[0] = getCx86(CX86_CCR0);
+	ccr[1] = getCx86(CX86_CCR1);
+	ccr[2] = getCx86(CX86_CCR2);
+	ccr[3] = ctxt.ccr3;
+	ccr[4] = getCx86(CX86_CCR4);
+	ccr[5] = getCx86(CX86_CCR5);
+	ccr[6] = getCx86(CX86_CCR6);
+
+	if (ccr[3] & 1) {
+		ccrc[3] = 1;
+		arr3_protected = 1;
+	} else {
+		/* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
+		 * access to SMM memory through ARR3 (bit 7).
+		 */
+		if (ccr[1] & 0x80) {
+			ccr[1] &= 0x7f;
+			ccrc[1] |= 0x80;
+		}
+		if (ccr[1] & 0x04) {
+			ccr[1] &= 0xfb;
+			ccrc[1] |= 0x04;
+		}
+		if (ccr[1] & 0x02) {
+			ccr[1] &= 0xfd;
+			ccrc[1] |= 0x02;
+		}
+		arr3_protected = 0;
+		if (ccr[6] & 0x02) {
+			ccr[6] &= 0xfd;
+			ccrc[6] = 1;	/* Disable write protection of ARR3 */
+			setCx86(CX86_CCR6, ccr[6]);
+		}
+		/* Disable ARR3. This is safe now that we disabled SMM. */
+		/* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
+	}
+	/* If we changed CCR1 in memory, change it in the processor, too. */
+	if (ccrc[1])
+		setCx86(CX86_CCR1, ccr[1]);
+
+	/* Enable ARR usage by the processor */
+	if (!(ccr[5] & 0x20)) {
+		ccr[5] |= 0x20;
+		ccrc[5] = 1;
+		setCx86(CX86_CCR5, ccr[5]);
+	}
+#ifdef CONFIG_SMP
+	for (i = 0; i < 7; i++)
+		ccr_state[i] = ccr[i];
+	for (i = 0; i < 8; i++)
+		cyrix_get_arr(i,
+			      &arr_state[i].base, &arr_state[i].size,
+			      &arr_state[i].type);
+#endif
+
+	set_mtrr_done(&ctxt);	/* flush cache and disable MAPEN */
+
+	if (ccrc[5])
+		printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
+	if (ccrc[3])
+		printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
+/*
+    if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
+    if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
+    if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
+*/
+	if (ccrc[6])
+		printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
+}
+#endif
+
+static struct mtrr_ops cyrix_mtrr_ops = {
+	.vendor            = X86_VENDOR_CYRIX,
+//	.init              = cyrix_arr_init,
+	.set_all	   = cyrix_set_all,
+	.set               = cyrix_set_arr,
+	.get               = cyrix_get_arr,
+	.get_free_region   = cyrix_get_free_region,
+	.validate_add_page = generic_validate_add_page,
+	.have_wrcomb       = positive_have_wrcomb,
+};
+
+int __init cyrix_init_mtrr(void)
+{
+	set_mtrr_ops(&cyrix_mtrr_ops);
+	return 0;
+}
+
+//arch_initcall(cyrix_init_mtrr);
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
new file mode 100644
index 00000000000..a4cce454d09
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -0,0 +1,417 @@
+/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
+   because MTRRs can span upto 40 bits (36bits on most modern x86) */ 
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include <asm/system.h>
+#include <asm/cpufeature.h>
+#include <asm/tlbflush.h>
+#include "mtrr.h"
+
+struct mtrr_state {
+	struct mtrr_var_range *var_ranges;
+	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+	unsigned char enabled;
+	mtrr_type def_type;
+};
+
+static unsigned long smp_changes_mask;
+static struct mtrr_state mtrr_state = {};
+
+/*  Get the MSR pair relating to a var range  */
+static void __init
+get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
+{
+	rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
+	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
+}
+
+static void __init
+get_fixed_ranges(mtrr_type * frs)
+{
+	unsigned int *p = (unsigned int *) frs;
+	int i;
+
+	rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+
+	for (i = 0; i < 2; i++)
+		rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
+	for (i = 0; i < 8; i++)
+		rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
+}
+
+/*  Grab all of the MTRR state for this CPU into *state  */
+void __init get_mtrr_state(void)
+{
+	unsigned int i;
+	struct mtrr_var_range *vrs;
+	unsigned lo, dummy;
+
+	if (!mtrr_state.var_ranges) {
+		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), 
+						GFP_KERNEL);
+		if (!mtrr_state.var_ranges)
+			return;
+	} 
+	vrs = mtrr_state.var_ranges;
+
+	for (i = 0; i < num_var_ranges; i++)
+		get_mtrr_var_range(i, &vrs[i]);
+	get_fixed_ranges(mtrr_state.fixed_ranges);
+
+	rdmsr(MTRRdefType_MSR, lo, dummy);
+	mtrr_state.def_type = (lo & 0xff);
+	mtrr_state.enabled = (lo & 0xc00) >> 10;
+}
+
+/*  Free resources associated with a struct mtrr_state  */
+void __init finalize_mtrr_state(void)
+{
+	if (mtrr_state.var_ranges)
+		kfree(mtrr_state.var_ranges);
+	mtrr_state.var_ranges = NULL;
+}
+
+/*  Some BIOS's are fucked and don't set all MTRRs the same!  */
+void __init mtrr_state_warn(void)
+{
+	unsigned long mask = smp_changes_mask;
+
+	if (!mask)
+		return;
+	if (mask & MTRR_CHANGE_MASK_FIXED)
+		printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+	if (mask & MTRR_CHANGE_MASK_VARIABLE)
+		printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n");
+	if (mask & MTRR_CHANGE_MASK_DEFTYPE)
+		printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+	printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
+	printk(KERN_INFO "mtrr: corrected configuration.\n");
+}
+
+/* Doesn't attempt to pass an error out to MTRR users
+   because it's quite complicated in some cases and probably not
+   worth it because the best error handling is to ignore it. */
+void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
+{
+	if (wrmsr_safe(msr, a, b) < 0)
+		printk(KERN_ERR
+			"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+			smp_processor_id(), msr, a, b);
+}
+
+int generic_get_free_region(unsigned long base, unsigned long size)
+/*  [SUMMARY] Get a free MTRR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+	int i, max;
+	mtrr_type ltype;
+	unsigned long lbase;
+	unsigned lsize;
+
+	max = num_var_ranges;
+	for (i = 0; i < max; ++i) {
+		mtrr_if->get(i, &lbase, &lsize, &ltype);
+		if (lsize == 0)
+			return i;
+	}
+	return -ENOSPC;
+}
+
+void generic_get_mtrr(unsigned int reg, unsigned long *base,
+		      unsigned int *size, mtrr_type * type)
+{
+	unsigned int mask_lo, mask_hi, base_lo, base_hi;
+
+	rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
+	if ((mask_lo & 0x800) == 0) {
+		/*  Invalid (i.e. free) range  */
+		*base = 0;
+		*size = 0;
+		*type = 0;
+		return;
+	}
+
+	rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
+
+	/* Work out the shifted address mask. */
+	mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
+	    | mask_lo >> PAGE_SHIFT;
+
+	/* This works correctly if size is a power of two, i.e. a
+	   contiguous range. */
+	*size = -mask_lo;
+	*base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+	*type = base_lo & 0xff;
+}
+
+static int set_fixed_ranges(mtrr_type * frs)
+{
+	unsigned int *p = (unsigned int *) frs;
+	int changed = FALSE;
+	int i;
+	unsigned int lo, hi;
+
+	rdmsr(MTRRfix64K_00000_MSR, lo, hi);
+	if (p[0] != lo || p[1] != hi) {
+		mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+		changed = TRUE;
+	}
+
+	for (i = 0; i < 2; i++) {
+		rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
+		if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
+			mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
+			      p[3 + i * 2]);
+			changed = TRUE;
+		}
+	}
+
+	for (i = 0; i < 8; i++) {
+		rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
+		if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
+			mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
+			      p[7 + i * 2]);
+			changed = TRUE;
+		}
+	}
+	return changed;
+}
+
+/*  Set the MSR pair relating to a var range. Returns TRUE if
+    changes are made  */
+static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
+{
+	unsigned int lo, hi;
+	int changed = FALSE;
+
+	rdmsr(MTRRphysBase_MSR(index), lo, hi);
+	if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
+	    || (vr->base_hi & 0xfUL) != (hi & 0xfUL)) {
+		mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
+		changed = TRUE;
+	}
+
+	rdmsr(MTRRphysMask_MSR(index), lo, hi);
+
+	if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
+	    || (vr->mask_hi & 0xfUL) != (hi & 0xfUL)) {
+		mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
+		changed = TRUE;
+	}
+	return changed;
+}
+
+static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi)
+/*  [SUMMARY] Set the MTRR state for this CPU.
+    <state> The MTRR state information to read.
+    <ctxt> Some relevant CPU context.
+    [NOTE] The CPU must already be in a safe state for MTRR changes.
+    [RETURNS] 0 if no changes made, else a mask indication what was changed.
+*/
+{
+	unsigned int i;
+	unsigned long change_mask = 0;
+
+	for (i = 0; i < num_var_ranges; i++)
+		if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
+			change_mask |= MTRR_CHANGE_MASK_VARIABLE;
+
+	if (set_fixed_ranges(mtrr_state.fixed_ranges))
+		change_mask |= MTRR_CHANGE_MASK_FIXED;
+
+	/*  Set_mtrr_restore restores the old value of MTRRdefType,
+	   so to set it we fiddle with the saved value  */
+	if ((deftype_lo & 0xff) != mtrr_state.def_type
+	    || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
+		deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled << 10);
+		change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
+	}
+
+	return change_mask;
+}
+
+
+static unsigned long cr4 = 0;
+static u32 deftype_lo, deftype_hi;
+static DEFINE_SPINLOCK(set_atomicity_lock);
+
+/*
+ * Since we are disabling the cache don't allow any interrupts - they
+ * would run extremely slow and would only increase the pain.  The caller must
+ * ensure that local interrupts are disabled and are reenabled after post_set()
+ * has been called.
+ */
+
+static void prepare_set(void)
+{
+	unsigned long cr0;
+
+	/*  Note that this is not ideal, since the cache is only flushed/disabled
+	   for this CPU while the MTRRs are changed, but changing this requires
+	   more invasive changes to the way the kernel boots  */
+
+	spin_lock(&set_atomicity_lock);
+
+	/*  Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
+	cr0 = read_cr0() | 0x40000000;	/* set CD flag */
+	write_cr0(cr0);
+	wbinvd();
+
+	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
+	if ( cpu_has_pge ) {
+		cr4 = read_cr4();
+		write_cr4(cr4 & ~X86_CR4_PGE);
+	}
+
+	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
+	__flush_tlb();
+
+	/*  Save MTRR state */
+	rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+
+	/*  Disable MTRRs, and set the default type to uncached  */
+	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
+}
+
+static void post_set(void)
+{
+	/*  Flush TLBs (no need to flush caches - they are disabled)  */
+	__flush_tlb();
+
+	/* Intel (P6) standard MTRRs */
+	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+		
+	/*  Enable caches  */
+	write_cr0(read_cr0() & 0xbfffffff);
+
+	/*  Restore value of CR4  */
+	if ( cpu_has_pge )
+		write_cr4(cr4);
+	spin_unlock(&set_atomicity_lock);
+}
+
+static void generic_set_all(void)
+{
+	unsigned long mask, count;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	prepare_set();
+
+	/* Actually set the state */
+	mask = set_mtrr_state(deftype_lo,deftype_hi);
+
+	post_set();
+	local_irq_restore(flags);
+
+	/*  Use the atomic bitops to update the global mask  */
+	for (count = 0; count < sizeof mask * 8; ++count) {
+		if (mask & 0x01)
+			set_bit(count, &smp_changes_mask);
+		mask >>= 1;
+	}
+	
+}
+
+static void generic_set_mtrr(unsigned int reg, unsigned long base,
+			     unsigned long size, mtrr_type type)
+/*  [SUMMARY] Set variable MTRR register on the local CPU.
+    <reg> The register to set.
+    <base> The base address of the region.
+    <size> The size of the region. If this is 0 the region is disabled.
+    <type> The type of the region.
+    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+    be done externally.
+    [RETURNS] Nothing.
+*/
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	prepare_set();
+
+	if (size == 0) {
+		/* The invalid bit is kept in the mask, so we simply clear the
+		   relevant mask register to disable a range. */
+		mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
+	} else {
+		mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type,
+		      (base & size_and_mask) >> (32 - PAGE_SHIFT));
+		mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800,
+		      (-size & size_and_mask) >> (32 - PAGE_SHIFT));
+	}
+
+	post_set();
+	local_irq_restore(flags);
+}
+
+int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
+{
+	unsigned long lbase, last;
+
+	/*  For Intel PPro stepping <= 7, must be 4 MiB aligned 
+	    and not touch 0x70000000->0x7003FFFF */
+	if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
+	    boot_cpu_data.x86_model == 1 &&
+	    boot_cpu_data.x86_mask <= 7) {
+		if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
+			printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+			return -EINVAL;
+		}
+		if (!(base + size < 0x70000000 || base > 0x7003FFFF) &&
+		    (type == MTRR_TYPE_WRCOMB
+		     || type == MTRR_TYPE_WRBACK)) {
+			printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+			return -EINVAL;
+		}
+	}
+
+	if (base + size < 0x100) {
+		printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
+		       base, size);
+		return -EINVAL;
+	}
+	/*  Check upper bits of base and last are equal and lower bits are 0
+	    for base and 1 for last  */
+	last = base + size - 1;
+	for (lbase = base; !(lbase & 1) && (last & 1);
+	     lbase = lbase >> 1, last = last >> 1) ;
+	if (lbase != last) {
+		printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n",
+		       base, size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+
+static int generic_have_wrcomb(void)
+{
+	unsigned long config, dummy;
+	rdmsr(MTRRcap_MSR, config, dummy);
+	return (config & (1 << 10));
+}
+
+int positive_have_wrcomb(void)
+{
+	return 1;
+}
+
+/* generic structure...
+ */
+struct mtrr_ops generic_mtrr_ops = {
+	.use_intel_if      = 1,
+	.set_all	   = generic_set_all,
+	.get               = generic_get_mtrr,
+	.get_free_region   = generic_get_free_region,
+	.set               = generic_set_mtrr,
+	.validate_add_page = generic_validate_add_page,
+	.have_wrcomb       = generic_have_wrcomb,
+};
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
new file mode 100644
index 00000000000..1923e0aed26
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -0,0 +1,374 @@
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+
+#define LINE_SIZE 80
+
+#include <asm/mtrr.h>
+#include "mtrr.h"
+
+/* RED-PEN: this is accessed without any locking */
+extern unsigned int *usage_table;
+
+
+#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
+
+static char *mtrr_strings[MTRR_NUM_TYPES] =
+{
+    "uncachable",               /* 0 */
+    "write-combining",          /* 1 */
+    "?",                        /* 2 */
+    "?",                        /* 3 */
+    "write-through",            /* 4 */
+    "write-protect",            /* 5 */
+    "write-back",               /* 6 */
+};
+
+char *mtrr_attrib_to_str(int x)
+{
+	return (x <= 6) ? mtrr_strings[x] : "?";
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int
+mtrr_file_add(unsigned long base, unsigned long size,
+	      unsigned int type, char increment, struct file *file, int page)
+{
+	int reg, max;
+	unsigned int *fcount = FILE_FCOUNT(file); 
+
+	max = num_var_ranges;
+	if (fcount == NULL) {
+		fcount = kmalloc(max * sizeof *fcount, GFP_KERNEL);
+		if (!fcount)
+			return -ENOMEM;
+		memset(fcount, 0, max * sizeof *fcount);
+		FILE_FCOUNT(file) = fcount;
+	}
+	if (!page) {
+		if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
+			return -EINVAL;
+		base >>= PAGE_SHIFT;
+		size >>= PAGE_SHIFT;
+	}
+	reg = mtrr_add_page(base, size, type, 1);
+	if (reg >= 0)
+		++fcount[reg];
+	return reg;
+}
+
+static int
+mtrr_file_del(unsigned long base, unsigned long size,
+	      struct file *file, int page)
+{
+	int reg;
+	unsigned int *fcount = FILE_FCOUNT(file);
+
+	if (!page) {
+		if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
+			return -EINVAL;
+		base >>= PAGE_SHIFT;
+		size >>= PAGE_SHIFT;
+	}
+	reg = mtrr_del_page(-1, base, size);
+	if (reg < 0)
+		return reg;
+	if (fcount == NULL)
+		return reg;
+	if (fcount[reg] < 1)
+		return -EINVAL;
+	--fcount[reg];
+	return reg;
+}
+
+/* RED-PEN: seq_file can seek now. this is ignored. */
+static ssize_t
+mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
+/*  Format of control line:
+    "base=%Lx size=%Lx type=%s"     OR:
+    "disable=%d"
+*/
+{
+	int i, err;
+	unsigned long reg;
+	unsigned long long base, size;
+	char *ptr;
+	char line[LINE_SIZE];
+	size_t linelen;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!len)
+		return -EINVAL;
+	memset(line, 0, LINE_SIZE);
+	if (len > LINE_SIZE)
+		len = LINE_SIZE;
+	if (copy_from_user(line, buf, len - 1))
+		return -EFAULT;
+	linelen = strlen(line);
+	ptr = line + linelen - 1;
+	if (linelen && *ptr == '\n')
+		*ptr = '\0';
+	if (!strncmp(line, "disable=", 8)) {
+		reg = simple_strtoul(line + 8, &ptr, 0);
+		err = mtrr_del_page(reg, 0, 0);
+		if (err < 0)
+			return err;
+		return len;
+	}
+	if (strncmp(line, "base=", 5))
+		return -EINVAL;
+	base = simple_strtoull(line + 5, &ptr, 0);
+	for (; isspace(*ptr); ++ptr) ;
+	if (strncmp(ptr, "size=", 5))
+		return -EINVAL;
+	size = simple_strtoull(ptr + 5, &ptr, 0);
+	if ((base & 0xfff) || (size & 0xfff))
+		return -EINVAL;
+	for (; isspace(*ptr); ++ptr) ;
+	if (strncmp(ptr, "type=", 5))
+		return -EINVAL;
+	ptr += 5;
+	for (; isspace(*ptr); ++ptr) ;
+	for (i = 0; i < MTRR_NUM_TYPES; ++i) {
+		if (strcmp(ptr, mtrr_strings[i]))
+			continue;
+		base >>= PAGE_SHIFT;
+		size >>= PAGE_SHIFT;
+		err =
+		    mtrr_add_page((unsigned long) base, (unsigned long) size, i,
+				  1);
+		if (err < 0)
+			return err;
+		return len;
+	}
+	return -EINVAL;
+}
+
+static int
+mtrr_ioctl(struct inode *inode, struct file *file,
+	   unsigned int cmd, unsigned long __arg)
+{
+	int err;
+	mtrr_type type;
+	struct mtrr_sentry sentry;
+	struct mtrr_gentry gentry;
+	void __user *arg = (void __user *) __arg;
+
+	switch (cmd) {
+	default:
+		return -ENOTTY;
+	case MTRRIOC_ADD_ENTRY:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		err =
+		    mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+				  file, 0);
+		if (err < 0)
+			return err;
+		break;
+	case MTRRIOC_SET_ENTRY:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
+		if (err < 0)
+			return err;
+		break;
+	case MTRRIOC_DEL_ENTRY:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		err = mtrr_file_del(sentry.base, sentry.size, file, 0);
+		if (err < 0)
+			return err;
+		break;
+	case MTRRIOC_KILL_ENTRY:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		err = mtrr_del(-1, sentry.base, sentry.size);
+		if (err < 0)
+			return err;
+		break;
+	case MTRRIOC_GET_ENTRY:
+		if (copy_from_user(&gentry, arg, sizeof gentry))
+			return -EFAULT;
+		if (gentry.regnum >= num_var_ranges)
+			return -EINVAL;
+		mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type);
+
+		/* Hide entries that go above 4GB */
+		if (gentry.base + gentry.size > 0x100000
+		    || gentry.size == 0x100000)
+			gentry.base = gentry.size = gentry.type = 0;
+		else {
+			gentry.base <<= PAGE_SHIFT;
+			gentry.size <<= PAGE_SHIFT;
+			gentry.type = type;
+		}
+
+		if (copy_to_user(arg, &gentry, sizeof gentry))
+			return -EFAULT;
+		break;
+	case MTRRIOC_ADD_PAGE_ENTRY:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		err =
+		    mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+				  file, 1);
+		if (err < 0)
+			return err;
+		break;
+	case MTRRIOC_SET_PAGE_ENTRY:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
+		if (err < 0)
+			return err;
+		break;
+	case MTRRIOC_DEL_PAGE_ENTRY:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		err = mtrr_file_del(sentry.base, sentry.size, file, 1);
+		if (err < 0)
+			return err;
+		break;
+	case MTRRIOC_KILL_PAGE_ENTRY:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		err = mtrr_del_page(-1, sentry.base, sentry.size);
+		if (err < 0)
+			return err;
+		break;
+	case MTRRIOC_GET_PAGE_ENTRY:
+		if (copy_from_user(&gentry, arg, sizeof gentry))
+			return -EFAULT;
+		if (gentry.regnum >= num_var_ranges)
+			return -EINVAL;
+		mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type);
+		gentry.type = type;
+
+		if (copy_to_user(arg, &gentry, sizeof gentry))
+			return -EFAULT;
+		break;
+	}
+	return 0;
+}
+
+static int
+mtrr_close(struct inode *ino, struct file *file)
+{
+	int i, max;
+	unsigned int *fcount = FILE_FCOUNT(file);
+
+	if (fcount != NULL) {
+		max = num_var_ranges;
+		for (i = 0; i < max; ++i) {
+			while (fcount[i] > 0) {
+				mtrr_del(i, 0, 0);
+				--fcount[i];
+			}
+		}
+		kfree(fcount);
+		FILE_FCOUNT(file) = NULL;
+	}
+	return single_release(ino, file);
+}
+
+static int mtrr_seq_show(struct seq_file *seq, void *offset);
+
+static int mtrr_open(struct inode *inode, struct file *file)
+{
+	if (!mtrr_if) 
+		return -EIO;
+	if (!mtrr_if->get) 
+		return -ENXIO; 
+	return single_open(file, mtrr_seq_show, NULL);
+}
+
+static struct file_operations mtrr_fops = {
+	.owner   = THIS_MODULE,
+	.open	 = mtrr_open, 
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.write   = mtrr_write,
+	.ioctl   = mtrr_ioctl,
+	.release = mtrr_close,
+};
+
+
+static struct proc_dir_entry *proc_root_mtrr;
+
+
+static int mtrr_seq_show(struct seq_file *seq, void *offset)
+{
+	char factor;
+	int i, max, len;
+	mtrr_type type;
+	unsigned long base;
+	unsigned int size;
+
+	len = 0;
+	max = num_var_ranges;
+	for (i = 0; i < max; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (size == 0)
+			usage_table[i] = 0;
+		else {
+			if (size < (0x100000 >> PAGE_SHIFT)) {
+				/* less than 1MB */
+				factor = 'K';
+				size <<= PAGE_SHIFT - 10;
+			} else {
+				factor = 'M';
+				size >>= 20 - PAGE_SHIFT;
+			}
+			/* RED-PEN: base can be > 32bit */ 
+			len += seq_printf(seq, 
+				   "reg%02i: base=0x%05lx000 (%4liMB), size=%4i%cB: %s, count=%d\n",
+			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
+			     mtrr_attrib_to_str(type), usage_table[i]);
+		}
+	}
+	return 0;
+}
+
+static int __init mtrr_if_init(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
+	    (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
+	    (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
+	    (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
+		return -ENODEV;
+
+	proc_root_mtrr =
+	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
+	if (proc_root_mtrr) {
+		proc_root_mtrr->owner = THIS_MODULE;
+		proc_root_mtrr->proc_fops = &mtrr_fops;
+	}
+	return 0;
+}
+
+arch_initcall(mtrr_if_init);
+#endif			/*  CONFIG_PROC_FS  */
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
new file mode 100644
index 00000000000..8f67b490a7f
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -0,0 +1,693 @@
+/*  Generic MTRR (Memory Type Range Register) driver.
+
+    Copyright (C) 1997-2000  Richard Gooch
+    Copyright (c) 2002	     Patrick Mochel
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
+    The postal address is:
+      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+
+    Source: "Pentium Pro Family Developer's Manual, Volume 3:
+    Operating System Writer's Guide" (Intel document number 242692),
+    section 11.11.7
+
+    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 
+    on 6-7 March 2002. 
+    Source: Intel Architecture Software Developers Manual, Volume 3: 
+    System Programming Guide; Section 9.11. (1997 edition - PPro).
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+
+#include <asm/mtrr.h>
+
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include "mtrr.h"
+
+#define MTRR_VERSION            "2.0 (20020519)"
+
+u32 num_var_ranges = 0;
+
+unsigned int *usage_table;
+static DECLARE_MUTEX(main_lock);
+
+u32 size_or_mask, size_and_mask;
+
+static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
+
+struct mtrr_ops * mtrr_if = NULL;
+
+static void set_mtrr(unsigned int reg, unsigned long base,
+		     unsigned long size, mtrr_type type);
+
+extern int arr3_protected;
+
+void set_mtrr_ops(struct mtrr_ops * ops)
+{
+	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
+		mtrr_ops[ops->vendor] = ops;
+}
+
+/*  Returns non-zero if we have the write-combining memory type  */
+static int have_wrcomb(void)
+{
+	struct pci_dev *dev;
+	
+	if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
+		/* ServerWorks LE chipsets have problems with write-combining 
+		   Don't allow it and leave room for other chipsets to be tagged */
+		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
+		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
+			printk(KERN_INFO "mtrr: Serverworks LE detected. Write-combining disabled.\n");
+			pci_dev_put(dev);
+			return 0;
+		}
+		/* Intel 450NX errata # 23. Non ascending cachline evictions to
+		   write combining memory may resulting in data corruption */
+		if (dev->vendor == PCI_VENDOR_ID_INTEL &&
+		    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
+			printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
+			pci_dev_put(dev);
+			return 0;
+		}
+		pci_dev_put(dev);
+	}		
+	return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
+}
+
+/*  This function returns the number of variable MTRRs  */
+static void __init set_num_var_ranges(void)
+{
+	unsigned long config = 0, dummy;
+
+	if (use_intel()) {
+		rdmsr(MTRRcap_MSR, config, dummy);
+	} else if (is_cpu(AMD))
+		config = 2;
+	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
+		config = 8;
+	num_var_ranges = config & 0xff;
+}
+
+static void __init init_table(void)
+{
+	int i, max;
+
+	max = num_var_ranges;
+	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
+	    == NULL) {
+		printk(KERN_ERR "mtrr: could not allocate\n");
+		return;
+	}
+	for (i = 0; i < max; i++)
+		usage_table[i] = 1;
+}
+
+struct set_mtrr_data {
+	atomic_t	count;
+	atomic_t	gate;
+	unsigned long	smp_base;
+	unsigned long	smp_size;
+	unsigned int	smp_reg;
+	mtrr_type	smp_type;
+};
+
+#ifdef CONFIG_SMP
+
+static void ipi_handler(void *info)
+/*  [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
+    [RETURNS] Nothing.
+*/
+{
+	struct set_mtrr_data *data = info;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	atomic_dec(&data->count);
+	while(!atomic_read(&data->gate))
+		cpu_relax();
+
+	/*  The master has cleared me to execute  */
+	if (data->smp_reg != ~0U) 
+		mtrr_if->set(data->smp_reg, data->smp_base, 
+			     data->smp_size, data->smp_type);
+	else
+		mtrr_if->set_all();
+
+	atomic_dec(&data->count);
+	while(atomic_read(&data->gate))
+		cpu_relax();
+
+	atomic_dec(&data->count);
+	local_irq_restore(flags);
+}
+
+#endif
+
+/**
+ * set_mtrr - update mtrrs on all processors
+ * @reg:	mtrr in question
+ * @base:	mtrr base
+ * @size:	mtrr size
+ * @type:	mtrr type
+ *
+ * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
+ * 
+ * 1. Send IPI to do the following:
+ * 2. Disable Interrupts
+ * 3. Wait for all procs to do so 
+ * 4. Enter no-fill cache mode
+ * 5. Flush caches
+ * 6. Clear PGE bit
+ * 7. Flush all TLBs
+ * 8. Disable all range registers
+ * 9. Update the MTRRs
+ * 10. Enable all range registers
+ * 11. Flush all TLBs and caches again
+ * 12. Enter normal cache mode and reenable caching
+ * 13. Set PGE 
+ * 14. Wait for buddies to catch up
+ * 15. Enable interrupts.
+ * 
+ * What does that mean for us? Well, first we set data.count to the number
+ * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
+ * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
+ * Meanwhile, they are waiting for that flag to be set. Once it's set, each 
+ * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it 
+ * differently, so we call mtrr_if->set() callback and let them take care of it.
+ * When they're done, they again decrement data->count and wait for data.gate to 
+ * be reset. 
+ * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
+ * Everyone then enables interrupts and we all continue on.
+ *
+ * Note that the mechanism is the same for UP systems, too; all the SMP stuff
+ * becomes nops.
+ */
+static void set_mtrr(unsigned int reg, unsigned long base,
+		     unsigned long size, mtrr_type type)
+{
+	struct set_mtrr_data data;
+	unsigned long flags;
+
+	data.smp_reg = reg;
+	data.smp_base = base;
+	data.smp_size = size;
+	data.smp_type = type;
+	atomic_set(&data.count, num_booting_cpus() - 1);
+	atomic_set(&data.gate,0);
+
+	/*  Start the ball rolling on other CPUs  */
+	if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+		panic("mtrr: timed out waiting for other CPUs\n");
+
+	local_irq_save(flags);
+
+	while(atomic_read(&data.count))
+		cpu_relax();
+
+	/* ok, reset count and toggle gate */
+	atomic_set(&data.count, num_booting_cpus() - 1);
+	atomic_set(&data.gate,1);
+
+	/* do our MTRR business */
+
+	/* HACK!
+	 * We use this same function to initialize the mtrrs on boot.
+	 * The state of the boot cpu's mtrrs has been saved, and we want
+	 * to replicate across all the APs. 
+	 * If we're doing that @reg is set to something special...
+	 */
+	if (reg != ~0U) 
+		mtrr_if->set(reg,base,size,type);
+
+	/* wait for the others */
+	while(atomic_read(&data.count))
+		cpu_relax();
+
+	atomic_set(&data.count, num_booting_cpus() - 1);
+	atomic_set(&data.gate,0);
+
+	/*
+	 * Wait here for everyone to have seen the gate change
+	 * So we're the last ones to touch 'data'
+	 */
+	while(atomic_read(&data.count))
+		cpu_relax();
+
+	local_irq_restore(flags);
+}
+
+/**
+ *	mtrr_add_page - Add a memory type region
+ *	@base: Physical base address of region in pages (4 KB)
+ *	@size: Physical size of region in pages (4 KB)
+ *	@type: Type of MTRR desired
+ *	@increment: If this is true do usage counting on the region
+ *
+ *	Memory type region registers control the caching on newer Intel and
+ *	non Intel processors. This function allows drivers to request an
+ *	MTRR is added. The details and hardware specifics of each processor's
+ *	implementation are hidden from the caller, but nevertheless the 
+ *	caller should expect to need to provide a power of two size on an
+ *	equivalent power of two boundary.
+ *
+ *	If the region cannot be added either because all regions are in use
+ *	or the CPU cannot support it a negative value is returned. On success
+ *	the register number for this entry is returned, but should be treated
+ *	as a cookie only.
+ *
+ *	On a multiprocessor machine the changes are made to all processors.
+ *	This is required on x86 by the Intel processors.
+ *
+ *	The available types are
+ *
+ *	%MTRR_TYPE_UNCACHABLE	-	No caching
+ *
+ *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
+ *
+ *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
+ *
+ *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
+ *
+ *	BUGS: Needs a quiet flag for the cases where drivers do not mind
+ *	failures and do not wish system log messages to be sent.
+ */
+
+int mtrr_add_page(unsigned long base, unsigned long size, 
+		  unsigned int type, char increment)
+{
+	int i;
+	mtrr_type ltype;
+	unsigned long lbase;
+	unsigned int lsize;
+	int error;
+
+	if (!mtrr_if)
+		return -ENXIO;
+		
+	if ((error = mtrr_if->validate_add_page(base,size,type)))
+		return error;
+
+	if (type >= MTRR_NUM_TYPES) {
+		printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
+		return -EINVAL;
+	}
+
+	/*  If the type is WC, check that this processor supports it  */
+	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
+		printk(KERN_WARNING
+		       "mtrr: your processor doesn't support write-combining\n");
+		return -ENOSYS;
+	}
+
+	if (base & size_or_mask || size & size_or_mask) {
+		printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
+		return -EINVAL;
+	}
+
+	error = -EINVAL;
+
+	/*  Search for existing MTRR  */
+	down(&main_lock);
+	for (i = 0; i < num_var_ranges; ++i) {
+		mtrr_if->get(i, &lbase, &lsize, &ltype);
+		if (base >= lbase + lsize)
+			continue;
+		if ((base < lbase) && (base + size <= lbase))
+			continue;
+		/*  At this point we know there is some kind of overlap/enclosure  */
+		if ((base < lbase) || (base + size > lbase + lsize)) {
+			printk(KERN_WARNING
+			       "mtrr: 0x%lx000,0x%lx000 overlaps existing"
+			       " 0x%lx000,0x%x000\n", base, size, lbase,
+			       lsize);
+			goto out;
+		}
+		/*  New region is enclosed by an existing region  */
+		if (ltype != type) {
+			if (type == MTRR_TYPE_UNCACHABLE)
+				continue;
+			printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+			     base, size, mtrr_attrib_to_str(ltype),
+			     mtrr_attrib_to_str(type));
+			goto out;
+		}
+		if (increment)
+			++usage_table[i];
+		error = i;
+		goto out;
+	}
+	/*  Search for an empty MTRR  */
+	i = mtrr_if->get_free_region(base, size);
+	if (i >= 0) {
+		set_mtrr(i, base, size, type);
+		usage_table[i] = 1;
+	} else
+		printk(KERN_INFO "mtrr: no more MTRRs available\n");
+	error = i;
+ out:
+	up(&main_lock);
+	return error;
+}
+
+/**
+ *	mtrr_add - Add a memory type region
+ *	@base: Physical base address of region
+ *	@size: Physical size of region
+ *	@type: Type of MTRR desired
+ *	@increment: If this is true do usage counting on the region
+ *
+ *	Memory type region registers control the caching on newer Intel and
+ *	non Intel processors. This function allows drivers to request an
+ *	MTRR is added. The details and hardware specifics of each processor's
+ *	implementation are hidden from the caller, but nevertheless the 
+ *	caller should expect to need to provide a power of two size on an
+ *	equivalent power of two boundary.
+ *
+ *	If the region cannot be added either because all regions are in use
+ *	or the CPU cannot support it a negative value is returned. On success
+ *	the register number for this entry is returned, but should be treated
+ *	as a cookie only.
+ *
+ *	On a multiprocessor machine the changes are made to all processors.
+ *	This is required on x86 by the Intel processors.
+ *
+ *	The available types are
+ *
+ *	%MTRR_TYPE_UNCACHABLE	-	No caching
+ *
+ *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
+ *
+ *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
+ *
+ *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
+ *
+ *	BUGS: Needs a quiet flag for the cases where drivers do not mind
+ *	failures and do not wish system log messages to be sent.
+ */
+
+int
+mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+	 char increment)
+{
+	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+		printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n");
+		printk(KERN_DEBUG "mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+		return -EINVAL;
+	}
+	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
+			     increment);
+}
+
+/**
+ *	mtrr_del_page - delete a memory type region
+ *	@reg: Register returned by mtrr_add
+ *	@base: Physical base address
+ *	@size: Size of region
+ *
+ *	If register is supplied then base and size are ignored. This is
+ *	how drivers should call it.
+ *
+ *	Releases an MTRR region. If the usage count drops to zero the 
+ *	register is freed and the region returns to default state.
+ *	On success the register is returned, on failure a negative error
+ *	code.
+ */
+
+int mtrr_del_page(int reg, unsigned long base, unsigned long size)
+{
+	int i, max;
+	mtrr_type ltype;
+	unsigned long lbase;
+	unsigned int lsize;
+	int error = -EINVAL;
+
+	if (!mtrr_if)
+		return -ENXIO;
+
+	max = num_var_ranges;
+	down(&main_lock);
+	if (reg < 0) {
+		/*  Search for existing MTRR  */
+		for (i = 0; i < max; ++i) {
+			mtrr_if->get(i, &lbase, &lsize, &ltype);
+			if (lbase == base && lsize == size) {
+				reg = i;
+				break;
+			}
+		}
+		if (reg < 0) {
+			printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
+			       size);
+			goto out;
+		}
+	}
+	if (reg >= max) {
+		printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
+		goto out;
+	}
+	if (is_cpu(CYRIX) && !use_intel()) {
+		if ((reg == 3) && arr3_protected) {
+			printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
+			goto out;
+		}
+	}
+	mtrr_if->get(reg, &lbase, &lsize, &ltype);
+	if (lsize < 1) {
+		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
+		goto out;
+	}
+	if (usage_table[reg] < 1) {
+		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+		goto out;
+	}
+	if (--usage_table[reg] < 1)
+		set_mtrr(reg, 0, 0, 0);
+	error = reg;
+ out:
+	up(&main_lock);
+	return error;
+}
+/**
+ *	mtrr_del - delete a memory type region
+ *	@reg: Register returned by mtrr_add
+ *	@base: Physical base address
+ *	@size: Size of region
+ *
+ *	If register is supplied then base and size are ignored. This is
+ *	how drivers should call it.
+ *
+ *	Releases an MTRR region. If the usage count drops to zero the 
+ *	register is freed and the region returns to default state.
+ *	On success the register is returned, on failure a negative error
+ *	code.
+ */
+
+int
+mtrr_del(int reg, unsigned long base, unsigned long size)
+{
+	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+		printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
+		printk(KERN_DEBUG "mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+		return -EINVAL;
+	}
+	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(mtrr_add);
+EXPORT_SYMBOL(mtrr_del);
+
+/* HACK ALERT!
+ * These should be called implicitly, but we can't yet until all the initcall
+ * stuff is done...
+ */
+extern void amd_init_mtrr(void);
+extern void cyrix_init_mtrr(void);
+extern void centaur_init_mtrr(void);
+
+static void __init init_ifs(void)
+{
+	amd_init_mtrr();
+	cyrix_init_mtrr();
+	centaur_init_mtrr();
+}
+
+static void __init init_other_cpus(void)
+{
+	if (use_intel())
+		get_mtrr_state();
+
+	/* bring up the other processors */
+	set_mtrr(~0U,0,0,0);
+
+	if (use_intel()) {
+		finalize_mtrr_state();
+		mtrr_state_warn();
+	}
+}
+
+
+struct mtrr_value {
+	mtrr_type	ltype;
+	unsigned long	lbase;
+	unsigned int	lsize;
+};
+
+static struct mtrr_value * mtrr_state;
+
+static int mtrr_save(struct sys_device * sysdev, u32 state)
+{
+	int i;
+	int size = num_var_ranges * sizeof(struct mtrr_value);
+
+	mtrr_state = kmalloc(size,GFP_ATOMIC);
+	if (mtrr_state)
+		memset(mtrr_state,0,size);
+	else
+		return -ENOMEM;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i,
+			     &mtrr_state[i].lbase,
+			     &mtrr_state[i].lsize,
+			     &mtrr_state[i].ltype);
+	}
+	return 0;
+}
+
+static int mtrr_restore(struct sys_device * sysdev)
+{
+	int i;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		if (mtrr_state[i].lsize) 
+			set_mtrr(i,
+				 mtrr_state[i].lbase,
+				 mtrr_state[i].lsize,
+				 mtrr_state[i].ltype);
+	}
+	kfree(mtrr_state);
+	return 0;
+}
+
+
+
+static struct sysdev_driver mtrr_sysdev_driver = {
+	.suspend	= mtrr_save,
+	.resume		= mtrr_restore,
+};
+
+
+/**
+ * mtrr_init - initialize mtrrs on the boot CPU
+ *
+ * This needs to be called early; before any of the other CPUs are 
+ * initialized (i.e. before smp_init()).
+ * 
+ */
+static int __init mtrr_init(void)
+{
+	init_ifs();
+
+	if (cpu_has_mtrr) {
+		mtrr_if = &generic_mtrr_ops;
+		size_or_mask = 0xff000000;	/* 36 bits */
+		size_and_mask = 0x00f00000;
+			
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_AMD:
+			/* The original Athlon docs said that
+			   total addressable memory is 44 bits wide.
+			   It was not really clear whether its MTRRs
+			   follow this or not. (Read: 44 or 36 bits).
+			   However, "x86-64_overview.pdf" explicitly
+			   states that "previous implementations support
+			   36 bit MTRRs" and also provides a way to
+			   query the width (in bits) of the physical
+			   addressable memory on the Hammer family.
+			 */
+			if (boot_cpu_data.x86 == 15
+			    && (cpuid_eax(0x80000000) >= 0x80000008)) {
+				u32 phys_addr;
+				phys_addr = cpuid_eax(0x80000008) & 0xff;
+				size_or_mask =
+				    ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+				size_and_mask = ~size_or_mask & 0xfff00000;
+			}
+			/* Athlon MTRRs use an Intel-compatible interface for 
+			 * getting and setting */
+			break;
+		case X86_VENDOR_CENTAUR:
+			if (boot_cpu_data.x86 == 6) {
+				/* VIA Cyrix family have Intel style MTRRs, but don't support PAE */
+				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_and_mask = 0;
+			}
+			break;
+		
+		default:
+			break;
+		}
+	} else {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_AMD:
+			if (cpu_has_k6_mtrr) {
+				/* Pre-Athlon (K6) AMD CPU MTRRs */
+				mtrr_if = mtrr_ops[X86_VENDOR_AMD];
+				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_and_mask = 0;
+			}
+			break;
+		case X86_VENDOR_CENTAUR:
+			if (cpu_has_centaur_mcr) {
+				mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
+				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_and_mask = 0;
+			}
+			break;
+		case X86_VENDOR_CYRIX:
+			if (cpu_has_cyrix_arr) {
+				mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
+				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_and_mask = 0;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	printk(KERN_INFO "mtrr: v%s\n",MTRR_VERSION);
+
+	if (mtrr_if) {
+		set_num_var_ranges();
+		init_table();
+		init_other_cpus();
+
+		return sysdev_driver_register(&cpu_sysdev_class,
+					      &mtrr_sysdev_driver);
+	}
+	return -ENXIO;
+}
+
+subsys_initcall(mtrr_init);
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
new file mode 100644
index 00000000000..de135124559
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -0,0 +1,98 @@
+/*
+ * local mtrr defines.
+ */
+
+#ifndef TRUE
+#define TRUE  1
+#define FALSE 0
+#endif
+
+#define MTRRcap_MSR     0x0fe
+#define MTRRdefType_MSR 0x2ff
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
+#define NUM_FIXED_RANGES 88
+#define MTRRfix64K_00000_MSR 0x250
+#define MTRRfix16K_80000_MSR 0x258
+#define MTRRfix16K_A0000_MSR 0x259
+#define MTRRfix4K_C0000_MSR 0x268
+#define MTRRfix4K_C8000_MSR 0x269
+#define MTRRfix4K_D0000_MSR 0x26a
+#define MTRRfix4K_D8000_MSR 0x26b
+#define MTRRfix4K_E0000_MSR 0x26c
+#define MTRRfix4K_E8000_MSR 0x26d
+#define MTRRfix4K_F0000_MSR 0x26e
+#define MTRRfix4K_F8000_MSR 0x26f
+
+#define MTRR_CHANGE_MASK_FIXED     0x01
+#define MTRR_CHANGE_MASK_VARIABLE  0x02
+#define MTRR_CHANGE_MASK_DEFTYPE   0x04
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+   an 8 bit field: */
+typedef u8 mtrr_type;
+
+struct mtrr_ops {
+	u32	vendor;
+	u32	use_intel_if;
+//	void	(*init)(void);
+	void	(*set)(unsigned int reg, unsigned long base,
+		       unsigned long size, mtrr_type type);
+	void	(*set_all)(void);
+
+	void	(*get)(unsigned int reg, unsigned long *base,
+		       unsigned int *size, mtrr_type * type);
+	int	(*get_free_region) (unsigned long base, unsigned long size);
+
+	int	(*validate_add_page)(unsigned long base, unsigned long size,
+				     unsigned int type);
+	int	(*have_wrcomb)(void);
+};
+
+extern int generic_get_free_region(unsigned long base, unsigned long size);
+extern int generic_validate_add_page(unsigned long base, unsigned long size,
+				     unsigned int type);
+
+extern struct mtrr_ops generic_mtrr_ops;
+
+extern int positive_have_wrcomb(void);
+
+/* library functions for processor-specific routines */
+struct set_mtrr_context {
+	unsigned long flags;
+	unsigned long deftype_lo;
+	unsigned long deftype_hi;
+	unsigned long cr4val;
+	unsigned long ccr3;
+};
+
+struct mtrr_var_range {
+	unsigned long base_lo;
+	unsigned long base_hi;
+	unsigned long mask_lo;
+	unsigned long mask_hi;
+};
+
+void set_mtrr_done(struct set_mtrr_context *ctxt);
+void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
+void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
+
+void get_mtrr_state(void);
+
+extern void set_mtrr_ops(struct mtrr_ops * ops);
+
+extern u32 size_or_mask, size_and_mask;
+extern struct mtrr_ops * mtrr_if;
+
+#define is_cpu(vnd)	(mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
+#define use_intel()	(mtrr_if && mtrr_if->use_intel_if == 1)
+
+extern unsigned int num_var_ranges;
+
+void finalize_mtrr_state(void);
+void mtrr_state_warn(void);
+char *mtrr_attrib_to_str(int x);
+void mtrr_wrmsr(unsigned, unsigned, unsigned);
+
diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c
new file mode 100644
index 00000000000..f62ecd15811
--- /dev/null
+++ b/arch/i386/kernel/cpu/mtrr/state.c
@@ -0,0 +1,78 @@
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include "mtrr.h"
+
+
+/*  Put the processor into a state where MTRRs can be safely set  */
+void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
+{
+	unsigned int cr0;
+
+	/*  Disable interrupts locally  */
+	local_irq_save(ctxt->flags);
+
+	if (use_intel() || is_cpu(CYRIX)) {
+
+		/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
+		if ( cpu_has_pge ) {
+			ctxt->cr4val = read_cr4();
+			write_cr4(ctxt->cr4val & (unsigned char) ~(1 << 7));
+		}
+
+		/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
+		    a side-effect  */
+		cr0 = read_cr0() | 0x40000000;
+		wbinvd();
+		write_cr0(cr0);
+		wbinvd();
+
+		if (use_intel())
+			/*  Save MTRR state */
+			rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+		else
+			/* Cyrix ARRs - everything else were excluded at the top */
+			ctxt->ccr3 = getCx86(CX86_CCR3);
+	}
+}
+
+void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
+{
+	if (use_intel()) 
+		/*  Disable MTRRs, and set the default type to uncached  */
+		mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
+		      ctxt->deftype_hi);
+	else if (is_cpu(CYRIX))
+		/* Cyrix ARRs - everything else were excluded at the top */
+		setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
+}
+
+/*  Restore the processor after a set_mtrr_prepare  */
+void set_mtrr_done(struct set_mtrr_context *ctxt)
+{
+	if (use_intel() || is_cpu(CYRIX)) {
+
+		/*  Flush caches and TLBs  */
+		wbinvd();
+
+		/*  Restore MTRRdefType  */
+		if (use_intel())
+			/* Intel (P6) standard MTRRs */
+			mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+		else
+			/* Cyrix ARRs - everything else was excluded at the top */
+			setCx86(CX86_CCR3, ctxt->ccr3);
+		
+		/*  Enable caches  */
+		write_cr0(read_cr0() & 0xbfffffff);
+
+		/*  Restore value of CR4  */
+		if ( cpu_has_pge )
+			write_cr4(ctxt->cr4val);
+	}
+	/*  Re-enable interrupts locally (if enabled previously)  */
+	local_irq_restore(ctxt->flags);
+}
+
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c
new file mode 100644
index 00000000000..30898a260a5
--- /dev/null
+++ b/arch/i386/kernel/cpu/nexgen.c
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+/*
+ *	Detect a NexGen CPU running without BIOS hypercode new enough
+ *	to have CPUID. (Thanks to Herbert Oppmann)
+ */
+ 
+static int __init deep_magic_nexgen_probe(void)
+{
+	int ret;
+	
+	__asm__ __volatile__ (
+		"	movw	$0x5555, %%ax\n"
+		"	xorw	%%dx,%%dx\n"
+		"	movw	$2, %%cx\n"
+		"	divw	%%cx\n"
+		"	movl	$0, %%eax\n"
+		"	jnz	1f\n"
+		"	movl	$1, %%eax\n"
+		"1:\n" 
+		: "=a" (ret) : : "cx", "dx" );
+	return  ret;
+}
+
+static void __init init_nexgen(struct cpuinfo_x86 * c)
+{
+	c->x86_cache_size = 256; /* A few had 1 MB... */
+}
+
+static void __init nexgen_identify(struct cpuinfo_x86 * c)
+{
+	/* Detect NexGen with old hypercode */
+	if ( deep_magic_nexgen_probe() ) {
+		strcpy(c->x86_vendor_id, "NexGenDriven");
+	}
+	generic_identify(c);
+}
+
+static struct cpu_dev nexgen_cpu_dev __initdata = {
+	.c_vendor	= "Nexgen",
+	.c_ident	= { "NexGenDriven" },
+	.c_models = {
+			{ .vendor = X86_VENDOR_NEXGEN,
+			  .family = 5,
+			  .model_names = { [1] = "Nx586" }
+			},
+	},
+	.c_init		= init_nexgen,
+	.c_identify	= nexgen_identify,
+};
+
+int __init nexgen_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
+	return 0;
+}
+
+//early_arch_initcall(nexgen_init_cpu);
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
new file mode 100644
index 00000000000..c8d83fdc237
--- /dev/null
+++ b/arch/i386/kernel/cpu/proc.c
@@ -0,0 +1,149 @@
+#include <linux/smp.h>
+#include <linux/timex.h>
+#include <linux/string.h>
+#include <asm/semaphore.h>
+#include <linux/seq_file.h>
+
+/*
+ *	Get CPU information for use by the procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	/* 
+	 * These flag bits must match the definitions in <asm/cpufeature.h>.
+	 * NULL means this bit is undefined or reserved; either way it doesn't
+	 * have meaning as far as Linux is concerned.  Note that it's important
+	 * to realize there is a difference between this table and CPUID -- if
+	 * applications want to get the raw CPUID data, they should access
+	 * /dev/cpu/<cpu_nr>/cpuid instead.
+	 */
+	static char *x86_cap_flags[] = {
+		/* Intel-defined */
+	        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+	        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+	        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+	        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+
+		/* AMD-defined */
+		"pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
+		NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
+
+		/* Transmeta-defined */
+		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* Other (Linux-defined) */
+		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* Intel-defined (#2) */
+		"pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est",
+		"tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* VIA/Cyrix/Centaur-defined */
+		NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* AMD-defined (#2) */
+		"lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	};
+	struct cpuinfo_x86 *c = v;
+	int i, n = c - cpu_data;
+	int fpu_exception;
+
+#ifdef CONFIG_SMP
+	if (!cpu_online(n))
+		return 0;
+#endif
+	seq_printf(m, "processor\t: %d\n"
+		"vendor_id\t: %s\n"
+		"cpu family\t: %d\n"
+		"model\t\t: %d\n"
+		"model name\t: %s\n",
+		n,
+		c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+		c->x86,
+		c->x86_model,
+		c->x86_model_id[0] ? c->x86_model_id : "unknown");
+
+	if (c->x86_mask || c->cpuid_level >= 0)
+		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+	else
+		seq_printf(m, "stepping\t: unknown\n");
+
+	if ( cpu_has(c, X86_FEATURE_TSC) ) {
+		seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
+			cpu_khz / 1000, (cpu_khz % 1000));
+	}
+
+	/* Cache size */
+	if (c->x86_cache_size >= 0)
+		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+#ifdef CONFIG_X86_HT
+	seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
+	seq_printf(m, "siblings\t: %d\n", c->x86_num_cores * smp_num_siblings);
+#endif
+	
+	/* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */
+	fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
+	seq_printf(m, "fdiv_bug\t: %s\n"
+			"hlt_bug\t\t: %s\n"
+			"f00f_bug\t: %s\n"
+			"coma_bug\t: %s\n"
+			"fpu\t\t: %s\n"
+			"fpu_exception\t: %s\n"
+			"cpuid level\t: %d\n"
+			"wp\t\t: %s\n"
+			"flags\t\t:",
+		     c->fdiv_bug ? "yes" : "no",
+		     c->hlt_works_ok ? "no" : "yes",
+		     c->f00f_bug ? "yes" : "no",
+		     c->coma_bug ? "yes" : "no",
+		     c->hard_math ? "yes" : "no",
+		     fpu_exception ? "yes" : "no",
+		     c->cpuid_level,
+		     c->wp_works_ok ? "yes" : "no");
+
+	for ( i = 0 ; i < 32*NCAPINTS ; i++ )
+		if ( test_bit(i, c->x86_capability) &&
+		     x86_cap_flags[i] != NULL )
+			seq_printf(m, " %s", x86_cap_flags[i]);
+
+	seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
+		     c->loops_per_jiffy/(500000/HZ),
+		     (c->loops_per_jiffy/(5000/HZ)) % 100);
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
new file mode 100644
index 00000000000..8602425628c
--- /dev/null
+++ b/arch/i386/kernel/cpu/rise.c
@@ -0,0 +1,53 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+static void __init init_rise(struct cpuinfo_x86 *c)
+{
+	printk("CPU: Rise iDragon");
+	if (c->x86_model > 2)
+		printk(" II");
+	printk("\n");
+
+	/* Unhide possibly hidden capability flags
+	   The mp6 iDragon family don't have MSRs.
+	   We switch on extra features with this cpuid weirdness: */
+	__asm__ (
+		"movl $0x6363452a, %%eax\n\t"
+		"movl $0x3231206c, %%ecx\n\t"
+		"movl $0x2a32313a, %%edx\n\t"
+		"cpuid\n\t"
+		"movl $0x63634523, %%eax\n\t"
+		"movl $0x32315f6c, %%ecx\n\t"
+		"movl $0x2333313a, %%edx\n\t"
+		"cpuid\n\t" : : : "eax", "ebx", "ecx", "edx"
+	);
+	set_bit(X86_FEATURE_CX8, c->x86_capability);
+}
+
+static struct cpu_dev rise_cpu_dev __initdata = {
+	.c_vendor	= "Rise",
+	.c_ident	= { "RiseRiseRise" },
+	.c_models = {
+		{ .vendor = X86_VENDOR_RISE, .family = 5, .model_names = 
+		  { 
+			  [0] = "iDragon", 
+			  [2] = "iDragon", 
+			  [8] = "iDragon II", 
+			  [9] = "iDragon II"
+		  }
+		},
+	},
+	.c_init		= init_rise,
+};
+
+int __init rise_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev;
+	return 0;
+}
+
+//early_arch_initcall(rise_init_cpu);
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
new file mode 100644
index 00000000000..f57e5ee9494
--- /dev/null
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -0,0 +1,107 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include "cpu.h"
+
+static void __init init_transmeta(struct cpuinfo_x86 *c)
+{
+	unsigned int cap_mask, uk, max, dummy;
+	unsigned int cms_rev1, cms_rev2;
+	unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev;
+	char cpu_info[65];
+
+	get_model_name(c);	/* Same as AMD/Cyrix */
+	display_cacheinfo(c);
+
+	/* Print CMS and CPU revision */
+	max = cpuid_eax(0x80860000);
+	cpu_rev = 0;
+	if ( max >= 0x80860001 ) {
+		cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); 
+		if (cpu_rev != 0x02000000) {
+			printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+				(cpu_rev >> 24) & 0xff,
+				(cpu_rev >> 16) & 0xff,
+				(cpu_rev >> 8) & 0xff,
+				cpu_rev & 0xff,
+				cpu_freq);
+		}
+	}
+	if ( max >= 0x80860002 ) {
+		cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
+		if (cpu_rev == 0x02000000) {
+			printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+				new_cpu_rev, cpu_freq);
+		}
+		printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+		       (cms_rev1 >> 24) & 0xff,
+		       (cms_rev1 >> 16) & 0xff,
+		       (cms_rev1 >> 8) & 0xff,
+		       cms_rev1 & 0xff,
+		       cms_rev2);
+	}
+	if ( max >= 0x80860006 ) {
+		cpuid(0x80860003,
+		      (void *)&cpu_info[0],
+		      (void *)&cpu_info[4],
+		      (void *)&cpu_info[8],
+		      (void *)&cpu_info[12]);
+		cpuid(0x80860004,
+		      (void *)&cpu_info[16],
+		      (void *)&cpu_info[20],
+		      (void *)&cpu_info[24],
+		      (void *)&cpu_info[28]);
+		cpuid(0x80860005,
+		      (void *)&cpu_info[32],
+		      (void *)&cpu_info[36],
+		      (void *)&cpu_info[40],
+		      (void *)&cpu_info[44]);
+		cpuid(0x80860006,
+		      (void *)&cpu_info[48],
+		      (void *)&cpu_info[52],
+		      (void *)&cpu_info[56],
+		      (void *)&cpu_info[60]);
+		cpu_info[64] = '\0';
+		printk(KERN_INFO "CPU: %s\n", cpu_info);
+	}
+
+	/* Unhide possibly hidden capability flags */
+	rdmsr(0x80860004, cap_mask, uk);
+	wrmsr(0x80860004, ~0, uk);
+	c->x86_capability[0] = cpuid_edx(0x00000001);
+	wrmsr(0x80860004, cap_mask, uk);
+	
+	/* If we can run i686 user-space code, call us an i686 */
+#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
+        if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 )
+		c->x86 = 6;
+}
+
+static void transmeta_identify(struct cpuinfo_x86 * c)
+{
+	u32 xlvl;
+	generic_identify(c);
+
+	/* Transmeta-defined flags: level 0x80860001 */
+	xlvl = cpuid_eax(0x80860000);
+	if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+		if (  xlvl >= 0x80860001 )
+			c->x86_capability[2] = cpuid_edx(0x80860001);
+	}
+}
+
+static struct cpu_dev transmeta_cpu_dev __initdata = {
+	.c_vendor	= "Transmeta",
+	.c_ident	= { "GenuineTMx86", "TransmetaCPU" },
+	.c_init		= init_transmeta,
+	.c_identify	= transmeta_identify,
+};
+
+int __init transmeta_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
+	return 0;
+}
+
+//early_arch_initcall(transmeta_init_cpu);
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c
new file mode 100644
index 00000000000..264fcad559d
--- /dev/null
+++ b/arch/i386/kernel/cpu/umc.c
@@ -0,0 +1,33 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/processor.h>
+#include "cpu.h"
+
+/* UMC chips appear to be only either 386 or 486, so no special init takes place.
+ */
+static void __init init_umc(struct cpuinfo_x86 * c)
+{
+
+}
+
+static struct cpu_dev umc_cpu_dev __initdata = {
+	.c_vendor	= "UMC",
+	.c_ident 	= { "UMC UMC UMC" },
+	.c_models = {
+		{ .vendor = X86_VENDOR_UMC, .family = 4, .model_names =
+		  { 
+			  [1] = "U5D", 
+			  [2] = "U5S", 
+		  }
+		},
+	},
+	.c_init		= init_umc,
+};
+
+int __init umc_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
+	return 0;
+}
+
+//early_arch_initcall(umc_init_cpu);
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
new file mode 100644
index 00000000000..2e2756345bb
--- /dev/null
+++ b/arch/i386/kernel/cpuid.c
@@ -0,0 +1,246 @@
+/* ----------------------------------------------------------------------- *
+ *   
+ *   Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * cpuid.c
+ *
+ * x86 CPUID access device
+ *
+ * This device is accessed by lseek() to the appropriate CPUID level
+ * and then read in chunks of 16 bytes.  A larger size means multiple
+ * reads of consecutive levels.
+ *
+ * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
+ * an SMP box will direct the access to CPU %d.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/major.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+static struct class_simple *cpuid_class;
+
+#ifdef CONFIG_SMP
+
+struct cpuid_command {
+	int cpu;
+	u32 reg;
+	u32 *data;
+};
+
+static void cpuid_smp_cpuid(void *cmd_block)
+{
+	struct cpuid_command *cmd = (struct cpuid_command *)cmd_block;
+
+	if (cmd->cpu == smp_processor_id())
+		cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
+		      &cmd->data[3]);
+}
+
+static inline void do_cpuid(int cpu, u32 reg, u32 * data)
+{
+	struct cpuid_command cmd;
+
+	preempt_disable();
+	if (cpu == smp_processor_id()) {
+		cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
+	} else {
+		cmd.cpu = cpu;
+		cmd.reg = reg;
+		cmd.data = data;
+
+		smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1);
+	}
+	preempt_enable();
+}
+#else				/* ! CONFIG_SMP */
+
+static inline void do_cpuid(int cpu, u32 reg, u32 * data)
+{
+	cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
+}
+
+#endif				/* ! CONFIG_SMP */
+
+static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
+{
+	loff_t ret;
+
+	lock_kernel();
+
+	switch (orig) {
+	case 0:
+		file->f_pos = offset;
+		ret = file->f_pos;
+		break;
+	case 1:
+		file->f_pos += offset;
+		ret = file->f_pos;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	unlock_kernel();
+	return ret;
+}
+
+static ssize_t cpuid_read(struct file *file, char __user *buf,
+			  size_t count, loff_t * ppos)
+{
+	char __user *tmp = buf;
+	u32 data[4];
+	size_t rv;
+	u32 reg = *ppos;
+	int cpu = iminor(file->f_dentry->d_inode);
+
+	if (count % 16)
+		return -EINVAL;	/* Invalid chunk size */
+
+	for (rv = 0; count; count -= 16) {
+		do_cpuid(cpu, reg, data);
+		if (copy_to_user(tmp, &data, 16))
+			return -EFAULT;
+		tmp += 16;
+		*ppos = reg++;
+	}
+
+	return tmp - buf;
+}
+
+static int cpuid_open(struct inode *inode, struct file *file)
+{
+	unsigned int cpu = iminor(file->f_dentry->d_inode);
+	struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+
+	if (cpu >= NR_CPUS || !cpu_online(cpu))
+		return -ENXIO;	/* No such CPU */
+	if (c->cpuid_level < 0)
+		return -EIO;	/* CPUID not supported */
+
+	return 0;
+}
+
+/*
+ * File operations we support
+ */
+static struct file_operations cpuid_fops = {
+	.owner = THIS_MODULE,
+	.llseek = cpuid_seek,
+	.read = cpuid_read,
+	.open = cpuid_open,
+};
+
+static int cpuid_class_simple_device_add(int i) 
+{
+	int err = 0;
+	struct class_device *class_err;
+
+	class_err = class_simple_device_add(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i);
+	if (IS_ERR(class_err))
+		err = PTR_ERR(class_err);
+	return err;
+}
+
+static int __devinit cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		cpuid_class_simple_device_add(cpu);
+		break;
+	case CPU_DEAD:
+		class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu));
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpuid_class_cpu_notifier =
+{
+	.notifier_call = cpuid_class_cpu_callback,
+};
+
+static int __init cpuid_init(void)
+{
+	int i, err = 0;
+	i = 0;
+
+	if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) {
+		printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n",
+		       CPUID_MAJOR);
+		err = -EBUSY;
+		goto out;
+	}
+	cpuid_class = class_simple_create(THIS_MODULE, "cpuid");
+	if (IS_ERR(cpuid_class)) {
+		err = PTR_ERR(cpuid_class);
+		goto out_chrdev;
+	}
+	for_each_online_cpu(i) {
+		err = cpuid_class_simple_device_add(i);
+		if (err != 0) 
+			goto out_class;
+	}
+	register_cpu_notifier(&cpuid_class_cpu_notifier);
+
+	err = 0;
+	goto out;
+
+out_class:
+	i = 0;
+	for_each_online_cpu(i) {
+		class_simple_device_remove(MKDEV(CPUID_MAJOR, i));
+	}
+	class_simple_destroy(cpuid_class);
+out_chrdev:
+	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");	
+out:
+	return err;
+}
+
+static void __exit cpuid_exit(void)
+{
+	int cpu = 0;
+
+	for_each_online_cpu(cpu)
+		class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu));
+	class_simple_destroy(cpuid_class);
+	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
+	unregister_cpu_notifier(&cpuid_class_cpu_notifier);
+}
+
+module_init(cpuid_init);
+module_exit(cpuid_exit);
+
+MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
+MODULE_DESCRIPTION("x86 generic CPUID driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
new file mode 100644
index 00000000000..6ed7e28f306
--- /dev/null
+++ b/arch/i386/kernel/dmi_scan.c
@@ -0,0 +1,487 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <asm/io.h>
+#include <linux/pm.h>
+#include <asm/system.h>
+#include <linux/dmi.h>
+#include <linux/bootmem.h>
+
+
+struct dmi_header
+{
+	u8	type;
+	u8	length;
+	u16	handle;
+};
+
+#undef DMI_DEBUG
+
+#ifdef DMI_DEBUG
+#define dmi_printk(x) printk x
+#else
+#define dmi_printk(x)
+#endif
+
+static char * __init dmi_string(struct dmi_header *dm, u8 s)
+{
+	u8 *bp=(u8 *)dm;
+	bp+=dm->length;
+	if(!s)
+		return "";
+	s--;
+	while(s>0 && *bp)
+	{
+		bp+=strlen(bp);
+		bp++;
+		s--;
+	}
+	return bp;
+}
+
+/*
+ *	We have to be cautious here. We have seen BIOSes with DMI pointers
+ *	pointing to completely the wrong place for example
+ */
+ 
+static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *))
+{
+	u8 *buf;
+	struct dmi_header *dm;
+	u8 *data;
+	int i=0;
+		
+	buf = bt_ioremap(base, len);
+	if(buf==NULL)
+		return -1;
+
+	data = buf;
+
+	/*
+ 	 *	Stop when we see all the items the table claimed to have
+ 	 *	OR we run off the end of the table (also happens)
+ 	 */
+ 
+	while(i<num && data-buf+sizeof(struct dmi_header)<=len)
+	{
+		dm=(struct dmi_header *)data;
+		/*
+		 *  We want to know the total length (formated area and strings)
+		 *  before decoding to make sure we won't run off the table in
+		 *  dmi_decode or dmi_string
+		 */
+		data+=dm->length;
+		while(data-buf<len-1 && (data[0] || data[1]))
+			data++;
+		if(data-buf<len-1)
+			decode(dm);
+		data+=2;
+		i++;
+	}
+	bt_iounmap(buf, len);
+	return 0;
+}
+
+
+inline static int __init dmi_checksum(u8 *buf)
+{
+	u8 sum=0;
+	int a;
+	
+	for(a=0; a<15; a++)
+		sum+=buf[a];
+	return (sum==0);
+}
+
+static int __init dmi_iterate(void (*decode)(struct dmi_header *))
+{
+	u8 buf[15];
+	char __iomem *p, *q;
+
+	/*
+	 * no iounmap() for that ioremap(); it would be a no-op, but it's
+	 * so early in setup that sucker gets confused into doing what
+	 * it shouldn't if we actually call it.
+	 */
+	p = ioremap(0xF0000, 0x10000);
+	if (p == NULL)
+		return -1;
+	for (q = p; q < p + 0x10000; q += 16) {
+		memcpy_fromio(buf, q, 15);
+		if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf))
+		{
+			u16 num=buf[13]<<8|buf[12];
+			u16 len=buf[7]<<8|buf[6];
+			u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
+
+			/*
+			 * DMI version 0.0 means that the real version is taken from
+			 * the SMBIOS version, which we don't know at this point.
+			 */
+			if(buf[14]!=0)
+				printk(KERN_INFO "DMI %d.%d present.\n",
+					buf[14]>>4, buf[14]&0x0F);
+			else
+				printk(KERN_INFO "DMI present.\n");
+			dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
+				num, len));
+			dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
+				base));
+			if(dmi_table(base,len, num, decode)==0)
+				return 0;
+		}
+	}
+	return -1;
+}
+
+static char *dmi_ident[DMI_STRING_MAX];
+
+/*
+ *	Save a DMI string
+ */
+ 
+static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
+{
+	char *d = (char*)dm;
+	char *p = dmi_string(dm, d[string]);
+	if(p==NULL || *p == 0)
+		return;
+	if (dmi_ident[slot])
+		return;
+	dmi_ident[slot] = alloc_bootmem(strlen(p)+1);
+	if(dmi_ident[slot])
+		strcpy(dmi_ident[slot], p);
+	else
+		printk(KERN_ERR "dmi_save_ident: out of memory.\n");
+}
+
+/*
+ * Ugly compatibility crap.
+ */
+#define dmi_blacklist	dmi_system_id
+#define NO_MATCH	{ DMI_NONE, NULL}
+#define MATCH		DMI_MATCH
+
+/*
+ * Toshiba keyboard likes to repeat keys when they are not repeated.
+ */
+
+static __init int broken_toshiba_keyboard(struct dmi_blacklist *d)
+{
+	printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n");
+	return 0;
+}
+
+
+#ifdef CONFIG_ACPI_SLEEP
+static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
+{
+	/* See acpi_wakeup.S */
+	extern long acpi_video_flags;
+	acpi_video_flags |= 2;
+	return 0;
+}
+#endif
+
+
+#ifdef	CONFIG_ACPI_BOOT
+extern int acpi_force;
+
+static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d) 
+{ 
+	if (!acpi_force) { 
+		printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); 
+		disable_acpi();
+	} else { 
+		printk(KERN_NOTICE 
+		       "Warning: DMI blacklist says broken, but acpi forced\n"); 
+	}
+	return 0;
+} 
+
+/*
+ * Limit ACPI to CPU enumeration for HT
+ */
+static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d) 
+{ 
+	if (!acpi_force) { 
+		printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); 
+		disable_acpi();
+		acpi_ht = 1; 
+	} else { 
+		printk(KERN_NOTICE 
+		       "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); 
+	}
+	return 0;
+} 
+#endif
+
+#ifdef	CONFIG_ACPI_PCI
+static __init int disable_acpi_irq(struct dmi_blacklist *d) 
+{
+	if (!acpi_force) {
+		printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
+		       d->ident); 	
+		acpi_noirq_set();
+	}
+	return 0;
+}
+static __init int disable_acpi_pci(struct dmi_blacklist *d) 
+{
+	if (!acpi_force) {
+		printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
+		       d->ident); 	
+		acpi_disable_pci();
+	}
+	return 0;
+}  
+#endif
+
+/*
+ *	Process the DMI blacklists
+ */
+ 
+
+/*
+ *	This will be expanded over time to force things like the APM 
+ *	interrupt mask settings according to the laptop
+ */
+ 
+static __initdata struct dmi_blacklist dmi_blacklist[]={
+
+	{ broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */
+			MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
+			NO_MATCH, NO_MATCH, NO_MATCH
+			} },
+#ifdef CONFIG_ACPI_SLEEP
+	{ reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */
+			MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
+			NO_MATCH, NO_MATCH, NO_MATCH
+			} },
+#endif
+
+#ifdef	CONFIG_ACPI_BOOT
+	/*
+	 * If your system is blacklisted here, but you find that acpi=force
+	 * works for you, please contact acpi-devel@sourceforge.net
+	 */
+
+	/*
+	 *	Boxes that need ACPI disabled
+	 */
+
+	{ dmi_disable_acpi, "IBM Thinkpad", {
+			MATCH(DMI_BOARD_VENDOR, "IBM"),
+			MATCH(DMI_BOARD_NAME, "2629H1G"),
+			NO_MATCH, NO_MATCH }},
+
+	/*
+	 *	Boxes that need acpi=ht 
+	 */
+
+	{ force_acpi_ht, "FSC Primergy T850", {
+			MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+			MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "DELL GX240", {
+			MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+			MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "HP VISUALIZE NT Workstation", {
+			MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+			MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "Compaq Workstation W8000", {
+			MATCH(DMI_SYS_VENDOR, "Compaq"),
+			MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "ASUS P4B266", {
+			MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+			MATCH(DMI_BOARD_NAME, "P4B266"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "ASUS P2B-DS", {
+			MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+			MATCH(DMI_BOARD_NAME, "P2B-DS"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "ASUS CUR-DLS", {
+			MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+			MATCH(DMI_BOARD_NAME, "CUR-DLS"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "ABIT i440BX-W83977", {
+			MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
+			MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "IBM Bladecenter", {
+			MATCH(DMI_BOARD_VENDOR, "IBM"),
+			MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "IBM eServer xSeries 360", {
+			MATCH(DMI_BOARD_VENDOR, "IBM"),
+			MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "IBM eserver xSeries 330", {
+			MATCH(DMI_BOARD_VENDOR, "IBM"),
+			MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
+			NO_MATCH, NO_MATCH }},
+
+	{ force_acpi_ht, "IBM eserver xSeries 440", {
+			MATCH(DMI_BOARD_VENDOR, "IBM"),
+			MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
+			NO_MATCH, NO_MATCH }},
+
+#endif	// CONFIG_ACPI_BOOT
+
+#ifdef	CONFIG_ACPI_PCI
+	/*
+	 *	Boxes that need ACPI PCI IRQ routing disabled
+	 */
+
+	{ disable_acpi_irq, "ASUS A7V", {
+			MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
+			MATCH(DMI_BOARD_NAME, "<A7V>"),
+			/* newer BIOS, Revision 1011, does work */
+			MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
+			NO_MATCH }},
+
+	/*
+	 *	Boxes that need ACPI PCI IRQ routing and PCI scan disabled
+	 */
+	{ disable_acpi_pci, "ASUS PR-DLS", {	/* _BBN 0 bug */
+			MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+			MATCH(DMI_BOARD_NAME, "PR-DLS"),
+			MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
+			MATCH(DMI_BIOS_DATE, "03/21/2003") }},
+
+ 	{ disable_acpi_pci, "Acer TravelMate 36x Laptop", {
+ 			MATCH(DMI_SYS_VENDOR, "Acer"),
+ 			MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+ 			NO_MATCH, NO_MATCH
+ 			} },
+
+#endif
+
+	{ NULL, }
+};
+
+/*
+ *	Process a DMI table entry. Right now all we care about are the BIOS
+ *	and machine entries. For 2.5 we should pull the smbus controller info
+ *	out of here.
+ */
+
+static void __init dmi_decode(struct dmi_header *dm)
+{
+#ifdef DMI_DEBUG
+	u8 *data = (u8 *)dm;
+#endif
+	
+	switch(dm->type)
+	{
+		case  0:
+			dmi_printk(("BIOS Vendor: %s\n",
+				dmi_string(dm, data[4])));
+			dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
+			dmi_printk(("BIOS Version: %s\n", 
+				dmi_string(dm, data[5])));
+			dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
+			dmi_printk(("BIOS Release: %s\n",
+				dmi_string(dm, data[8])));
+			dmi_save_ident(dm, DMI_BIOS_DATE, 8);
+			break;
+		case 1:
+			dmi_printk(("System Vendor: %s\n",
+				dmi_string(dm, data[4])));
+			dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
+			dmi_printk(("Product Name: %s\n",
+				dmi_string(dm, data[5])));
+			dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
+			dmi_printk(("Version: %s\n",
+				dmi_string(dm, data[6])));
+			dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
+			dmi_printk(("Serial Number: %s\n",
+				dmi_string(dm, data[7])));
+			break;
+		case 2:
+			dmi_printk(("Board Vendor: %s\n",
+				dmi_string(dm, data[4])));
+			dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
+			dmi_printk(("Board Name: %s\n",
+				dmi_string(dm, data[5])));
+			dmi_save_ident(dm, DMI_BOARD_NAME, 5);
+			dmi_printk(("Board Version: %s\n",
+				dmi_string(dm, data[6])));
+			dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
+			break;
+	}
+}
+
+void __init dmi_scan_machine(void)
+{
+	int err = dmi_iterate(dmi_decode);
+	if(err == 0)
+ 		dmi_check_system(dmi_blacklist);
+	else
+		printk(KERN_INFO "DMI not present.\n");
+}
+
+
+/**
+ *	dmi_check_system - check system DMI data
+ *	@list: array of dmi_system_id structures to match against
+ *
+ *	Walk the blacklist table running matching functions until someone
+ *	returns non zero or we hit the end. Callback function is called for
+ *	each successfull match. Returns the number of matches.
+ */
+int dmi_check_system(struct dmi_system_id *list)
+{
+	int i, count = 0;
+	struct dmi_system_id *d = list;
+
+	while (d->ident) {
+		for (i = 0; i < ARRAY_SIZE(d->matches); i++) {
+			int s = d->matches[i].slot;
+			if (s == DMI_NONE)
+				continue;
+			if (dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr))
+				continue;
+			/* No match */
+			goto fail;
+		}
+		if (d->callback && d->callback(d))
+			break;
+		count++;
+fail:		d++;
+	}
+
+	return count;
+}
+
+EXPORT_SYMBOL(dmi_check_system);
+
+/**
+ *	dmi_get_system_info - return DMI data value
+ *	@field: data index (see enum dmi_filed)
+ *
+ *	Returns one DMI data value, can be used to perform
+ *	complex DMI data checks.
+ */
+char * dmi_get_system_info(int field)
+{
+	return dmi_ident[field];
+}
+
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
new file mode 100644
index 00000000000..789af3e9fb1
--- /dev/null
+++ b/arch/i386/kernel/doublefault.c
@@ -0,0 +1,65 @@
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+
+#define DOUBLEFAULT_STACKSIZE (1024)
+static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
+#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
+
+#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
+
+static void doublefault_fn(void)
+{
+	struct Xgt_desc_struct gdt_desc = {0, 0};
+	unsigned long gdt, tss;
+
+	__asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory");
+	gdt = gdt_desc.address;
+
+	printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
+
+	if (ptr_ok(gdt)) {
+		gdt += GDT_ENTRY_TSS << 3;
+		tss = *(u16 *)(gdt+2);
+		tss += *(u8 *)(gdt+4) << 16;
+		tss += *(u8 *)(gdt+7) << 24;
+		printk("double fault, tss at %08lx\n", tss);
+
+		if (ptr_ok(tss)) {
+			struct tss_struct *t = (struct tss_struct *)tss;
+
+			printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
+
+			printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
+				t->eax, t->ebx, t->ecx, t->edx);
+			printk("esi = %08lx, edi = %08lx\n",
+				t->esi, t->edi);
+		}
+	}
+
+	for (;;) /* nothing */;
+}
+
+struct tss_struct doublefault_tss __cacheline_aligned = {
+	.esp0		= STACK_START,
+	.ss0		= __KERNEL_DS,
+	.ldt		= 0,
+	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
+
+	.eip		= (unsigned long) doublefault_fn,
+	.eflags		= X86_EFLAGS_SF | 0x2,	/* 0x2 bit is always set */
+	.esp		= STACK_START,
+	.es		= __USER_DS,
+	.cs		= __KERNEL_CS,
+	.ss		= __KERNEL_DS,
+	.ds		= __USER_DS,
+
+	.__cr3		= __pa(swapper_pg_dir)
+};
diff --git a/arch/i386/kernel/early_printk.c b/arch/i386/kernel/early_printk.c
new file mode 100644
index 00000000000..92f812ba275
--- /dev/null
+++ b/arch/i386/kernel/early_printk.c
@@ -0,0 +1,2 @@
+
+#include "../../x86_64/kernel/early_printk.c"
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
new file mode 100644
index 00000000000..9e5e0d8bd36
--- /dev/null
+++ b/arch/i386/kernel/efi.c
@@ -0,0 +1,635 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version.  --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls.  --davidm
+ *
+ * Goutham Rao: <goutham.rao@intel.com>
+ *	Skip non-WB memory and ignore empty memory ranges.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+
+#include <asm/setup.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/tlbflush.h>
+
+#define EFI_DEBUG	0
+#define PFX 		"EFI: "
+
+extern efi_status_t asmlinkage efi_call_phys(void *, ...);
+
+struct efi efi;
+EXPORT_SYMBOL(efi);
+static struct efi efi_phys __initdata;
+struct efi_memory_map memmap __initdata;
+
+/*
+ * We require an early boot_ioremap mapping mechanism initially
+ */
+extern void * boot_ioremap(unsigned long, unsigned long);
+
+/*
+ * To make EFI call EFI runtime service in physical addressing mode we need
+ * prelog/epilog before/after the invocation to disable interrupt, to
+ * claim EFI runtime service handler exclusively and to duplicate a memory in
+ * low memory space say 0 - 3G.
+ */
+
+static unsigned long efi_rt_eflags;
+static DEFINE_SPINLOCK(efi_rt_lock);
+static pgd_t efi_bak_pg_dir_pointer[2];
+
+static void efi_call_phys_prelog(void)
+{
+	unsigned long cr4;
+	unsigned long temp;
+
+	spin_lock(&efi_rt_lock);
+	local_irq_save(efi_rt_eflags);
+
+	/*
+	 * If I don't have PSE, I should just duplicate two entries in page
+	 * directory. If I have PSE, I just need to duplicate one entry in
+	 * page directory.
+	 */
+	__asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
+
+	if (cr4 & X86_CR4_PSE) {
+		efi_bak_pg_dir_pointer[0].pgd =
+		    swapper_pg_dir[pgd_index(0)].pgd;
+		swapper_pg_dir[0].pgd =
+		    swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
+	} else {
+		efi_bak_pg_dir_pointer[0].pgd =
+		    swapper_pg_dir[pgd_index(0)].pgd;
+		efi_bak_pg_dir_pointer[1].pgd =
+		    swapper_pg_dir[pgd_index(0x400000)].pgd;
+		swapper_pg_dir[pgd_index(0)].pgd =
+		    swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
+		temp = PAGE_OFFSET + 0x400000;
+		swapper_pg_dir[pgd_index(0x400000)].pgd =
+		    swapper_pg_dir[pgd_index(temp)].pgd;
+	}
+
+	/*
+	 * After the lock is released, the original page table is restored.
+	 */
+	local_flush_tlb();
+
+	cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
+	__asm__ __volatile__("lgdt %0":"=m"
+			    (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])));
+}
+
+static void efi_call_phys_epilog(void)
+{
+	unsigned long cr4;
+
+	cpu_gdt_descr[0].address =
+		(unsigned long) __va(cpu_gdt_descr[0].address);
+	__asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr));
+	__asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
+
+	if (cr4 & X86_CR4_PSE) {
+		swapper_pg_dir[pgd_index(0)].pgd =
+		    efi_bak_pg_dir_pointer[0].pgd;
+	} else {
+		swapper_pg_dir[pgd_index(0)].pgd =
+		    efi_bak_pg_dir_pointer[0].pgd;
+		swapper_pg_dir[pgd_index(0x400000)].pgd =
+		    efi_bak_pg_dir_pointer[1].pgd;
+	}
+
+	/*
+	 * After the lock is released, the original page table is restored.
+	 */
+	local_flush_tlb();
+
+	local_irq_restore(efi_rt_eflags);
+	spin_unlock(&efi_rt_lock);
+}
+
+static efi_status_t
+phys_efi_set_virtual_address_map(unsigned long memory_map_size,
+				 unsigned long descriptor_size,
+				 u32 descriptor_version,
+				 efi_memory_desc_t *virtual_map)
+{
+	efi_status_t status;
+
+	efi_call_phys_prelog();
+	status = efi_call_phys(efi_phys.set_virtual_address_map,
+				     memory_map_size, descriptor_size,
+				     descriptor_version, virtual_map);
+	efi_call_phys_epilog();
+	return status;
+}
+
+static efi_status_t
+phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+	efi_status_t status;
+
+	efi_call_phys_prelog();
+	status = efi_call_phys(efi_phys.get_time, tm, tc);
+	efi_call_phys_epilog();
+	return status;
+}
+
+inline int efi_set_rtc_mmss(unsigned long nowtime)
+{
+	int real_seconds, real_minutes;
+	efi_status_t 	status;
+	efi_time_t 	eft;
+	efi_time_cap_t 	cap;
+
+	spin_lock(&efi_rt_lock);
+	status = efi.get_time(&eft, &cap);
+	spin_unlock(&efi_rt_lock);
+	if (status != EFI_SUCCESS)
+		panic("Ooops, efitime: can't read time!\n");
+	real_seconds = nowtime % 60;
+	real_minutes = nowtime / 60;
+
+	if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
+		real_minutes += 30;
+	real_minutes %= 60;
+
+	eft.minute = real_minutes;
+	eft.second = real_seconds;
+
+	if (status != EFI_SUCCESS) {
+		printk("Ooops: efitime: can't read time!\n");
+		return -1;
+	}
+	return 0;
+}
+/*
+ * This should only be used during kernel init and before runtime
+ * services have been remapped, therefore, we'll need to call in physical
+ * mode.  Note, this call isn't used later, so mark it __init.
+ */
+inline unsigned long __init efi_get_time(void)
+{
+	efi_status_t status;
+	efi_time_t eft;
+	efi_time_cap_t cap;
+
+	status = phys_efi_get_time(&eft, &cap);
+	if (status != EFI_SUCCESS)
+		printk("Oops: efitime: can't read time status: 0x%lx\n",status);
+
+	return mktime(eft.year, eft.month, eft.day, eft.hour,
+			eft.minute, eft.second);
+}
+
+int is_available_memory(efi_memory_desc_t * md)
+{
+	if (!(md->attribute & EFI_MEMORY_WB))
+		return 0;
+
+	switch (md->type) {
+		case EFI_LOADER_CODE:
+		case EFI_LOADER_DATA:
+		case EFI_BOOT_SERVICES_CODE:
+		case EFI_BOOT_SERVICES_DATA:
+		case EFI_CONVENTIONAL_MEMORY:
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * We need to map the EFI memory map again after paging_init().
+ */
+void __init efi_map_memmap(void)
+{
+	memmap.map = NULL;
+
+	memmap.map = (efi_memory_desc_t *)
+		bt_ioremap((unsigned long) memmap.phys_map,
+			(memmap.nr_map * sizeof(efi_memory_desc_t)));
+
+	if (memmap.map == NULL)
+		printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
+}
+
+#if EFI_DEBUG
+static void __init print_efi_memmap(void)
+{
+	efi_memory_desc_t *md;
+	int i;
+
+	for (i = 0; i < memmap.nr_map; i++) {
+		md = &memmap.map[i];
+		printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
+			"range=[0x%016llx-0x%016llx) (%lluMB)\n",
+			i, md->type, md->attribute, md->phys_addr,
+			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
+	}
+}
+#endif  /*  EFI_DEBUG  */
+
+/*
+ * Walks the EFI memory map and calls CALLBACK once for each EFI
+ * memory descriptor that has memory that is available for kernel use.
+ */
+void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
+{
+	int prev_valid = 0;
+	struct range {
+		unsigned long start;
+		unsigned long end;
+	} prev, curr;
+	efi_memory_desc_t *md;
+	unsigned long start, end;
+	int i;
+
+	for (i = 0; i < memmap.nr_map; i++) {
+		md = &memmap.map[i];
+
+		if ((md->num_pages == 0) || (!is_available_memory(md)))
+			continue;
+
+		curr.start = md->phys_addr;
+		curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
+
+		if (!prev_valid) {
+			prev = curr;
+			prev_valid = 1;
+		} else {
+			if (curr.start < prev.start)
+				printk(KERN_INFO PFX "Unordered memory map\n");
+			if (prev.end == curr.start)
+				prev.end = curr.end;
+			else {
+				start =
+				    (unsigned long) (PAGE_ALIGN(prev.start));
+				end = (unsigned long) (prev.end & PAGE_MASK);
+				if ((end > start)
+				    && (*callback) (start, end, arg) < 0)
+					return;
+				prev = curr;
+			}
+		}
+	}
+	if (prev_valid) {
+		start = (unsigned long) PAGE_ALIGN(prev.start);
+		end = (unsigned long) (prev.end & PAGE_MASK);
+		if (end > start)
+			(*callback) (start, end, arg);
+	}
+}
+
+void __init efi_init(void)
+{
+	efi_config_table_t *config_tables;
+	efi_runtime_services_t *runtime;
+	efi_char16_t *c16;
+	char vendor[100] = "unknown";
+	unsigned long num_config_tables;
+	int i = 0;
+
+	memset(&efi, 0, sizeof(efi) );
+	memset(&efi_phys, 0, sizeof(efi_phys));
+
+	efi_phys.systab = EFI_SYSTAB;
+	memmap.phys_map = EFI_MEMMAP;
+	memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
+	memmap.desc_version = EFI_MEMDESC_VERSION;
+
+	efi.systab = (efi_system_table_t *)
+		boot_ioremap((unsigned long) efi_phys.systab,
+			sizeof(efi_system_table_t));
+	/*
+	 * Verify the EFI Table
+	 */
+	if (efi.systab == NULL)
+		printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n");
+	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+		printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n");
+	if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
+		printk(KERN_ERR PFX
+		       "Warning: EFI system table major version mismatch: "
+		       "got %d.%02d, expected %d.%02d\n",
+		       efi.systab->hdr.revision >> 16,
+		       efi.systab->hdr.revision & 0xffff,
+		       EFI_SYSTEM_TABLE_REVISION >> 16,
+		       EFI_SYSTEM_TABLE_REVISION & 0xffff);
+	/*
+	 * Grab some details from the system table
+	 */
+	num_config_tables = efi.systab->nr_tables;
+	config_tables = (efi_config_table_t *)efi.systab->tables;
+	runtime = efi.systab->runtime;
+
+	/*
+	 * Show what we know for posterity
+	 */
+	c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2);
+	if (c16) {
+		for (i = 0; i < sizeof(vendor) && *c16; ++i)
+			vendor[i] = *c16++;
+		vendor[i] = '\0';
+	} else
+		printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
+
+	printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n",
+	       efi.systab->hdr.revision >> 16,
+	       efi.systab->hdr.revision & 0xffff, vendor);
+
+	/*
+	 * Let's see what config tables the firmware passed to us.
+	 */
+	config_tables = (efi_config_table_t *)
+				boot_ioremap((unsigned long) config_tables,
+			        num_config_tables * sizeof(efi_config_table_t));
+
+	if (config_tables == NULL)
+		printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n");
+
+	for (i = 0; i < num_config_tables; i++) {
+		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
+			efi.mps = (void *)config_tables[i].table;
+			printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table);
+		} else
+		    if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
+			efi.acpi20 = __va(config_tables[i].table);
+			printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table);
+		} else
+		    if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
+			efi.acpi = __va(config_tables[i].table);
+			printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table);
+		} else
+		    if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
+			efi.smbios = (void *) config_tables[i].table;
+			printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table);
+		} else
+		    if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
+			efi.hcdp = (void *)config_tables[i].table;
+			printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table);
+		} else
+		    if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) {
+			efi.uga = (void *)config_tables[i].table;
+			printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table);
+		}
+	}
+	printk("\n");
+
+	/*
+	 * Check out the runtime services table. We need to map
+	 * the runtime services table so that we can grab the physical
+	 * address of several of the EFI runtime functions, needed to
+	 * set the firmware into virtual mode.
+	 */
+
+	runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long)
+						runtime,
+				      		sizeof(efi_runtime_services_t));
+	if (runtime != NULL) {
+		/*
+	 	 * We will only need *early* access to the following
+		 * two EFI runtime services before set_virtual_address_map
+		 * is invoked.
+ 	 	 */
+		efi_phys.get_time = (efi_get_time_t *) runtime->get_time;
+		efi_phys.set_virtual_address_map =
+			(efi_set_virtual_address_map_t *)
+				runtime->set_virtual_address_map;
+	} else
+		printk(KERN_ERR PFX "Could not map the runtime service table!\n");
+
+	/* Map the EFI memory map for use until paging_init() */
+
+	memmap.map = (efi_memory_desc_t *)
+		boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
+
+	if (memmap.map == NULL)
+		printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
+
+	if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) {
+		printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't "
+			   "match the one from EFI!\n");
+	}
+#if EFI_DEBUG
+	print_efi_memmap();
+#endif
+}
+
+/*
+ * This function will switch the EFI runtime services to virtual mode.
+ * Essentially, look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor and update
+ * that memory descriptor with the virtual address obtained from ioremap().
+ * This enables the runtime services to be called without having to
+ * thunk back into physical mode for every invocation.
+ */
+
+void __init efi_enter_virtual_mode(void)
+{
+	efi_memory_desc_t *md;
+	efi_status_t status;
+	int i;
+
+	efi.systab = NULL;
+
+	for (i = 0; i < memmap.nr_map; i++) {
+		md = &memmap.map[i];
+
+		if (md->attribute & EFI_MEMORY_RUNTIME) {
+			md->virt_addr =
+				(unsigned long)ioremap(md->phys_addr,
+					md->num_pages << EFI_PAGE_SHIFT);
+			if (!(unsigned long)md->virt_addr) {
+				printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
+					(unsigned long)md->phys_addr);
+			}
+
+			if (((unsigned long)md->phys_addr <=
+					(unsigned long)efi_phys.systab) &&
+				((unsigned long)efi_phys.systab <
+					md->phys_addr +
+					((unsigned long)md->num_pages <<
+						EFI_PAGE_SHIFT))) {
+				unsigned long addr;
+
+				addr = md->virt_addr - md->phys_addr +
+						(unsigned long)efi_phys.systab;
+				efi.systab = (efi_system_table_t *)addr;
+			}
+		}
+	}
+
+	if (!efi.systab)
+		BUG();
+
+	status = phys_efi_set_virtual_address_map(
+			sizeof(efi_memory_desc_t) * memmap.nr_map,
+			sizeof(efi_memory_desc_t),
+			memmap.desc_version,
+		       	memmap.phys_map);
+
+	if (status != EFI_SUCCESS) {
+		printk (KERN_ALERT "You are screwed! "
+			"Unable to switch EFI into virtual mode "
+			"(status=%lx)\n", status);
+		panic("EFI call to SetVirtualAddressMap() failed!");
+	}
+
+	/*
+	 * Now that EFI is in virtual mode, update the function
+	 * pointers in the runtime service table to the new virtual addresses.
+	 */
+
+	efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time;
+	efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time;
+	efi.get_wakeup_time = (efi_get_wakeup_time_t *)
+					efi.systab->runtime->get_wakeup_time;
+	efi.set_wakeup_time = (efi_set_wakeup_time_t *)
+					efi.systab->runtime->set_wakeup_time;
+	efi.get_variable = (efi_get_variable_t *)
+					efi.systab->runtime->get_variable;
+	efi.get_next_variable = (efi_get_next_variable_t *)
+					efi.systab->runtime->get_next_variable;
+	efi.set_variable = (efi_set_variable_t *)
+					efi.systab->runtime->set_variable;
+	efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *)
+					efi.systab->runtime->get_next_high_mono_count;
+	efi.reset_system = (efi_reset_system_t *)
+					efi.systab->runtime->reset_system;
+}
+
+void __init
+efi_initialize_iomem_resources(struct resource *code_resource,
+			       struct resource *data_resource)
+{
+	struct resource *res;
+	efi_memory_desc_t *md;
+	int i;
+
+	for (i = 0; i < memmap.nr_map; i++) {
+		md = &memmap.map[i];
+
+		if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
+		    0x100000000ULL)
+			continue;
+		res = alloc_bootmem_low(sizeof(struct resource));
+		switch (md->type) {
+		case EFI_RESERVED_TYPE:
+			res->name = "Reserved Memory";
+			break;
+		case EFI_LOADER_CODE:
+			res->name = "Loader Code";
+			break;
+		case EFI_LOADER_DATA:
+			res->name = "Loader Data";
+			break;
+		case EFI_BOOT_SERVICES_DATA:
+			res->name = "BootServices Data";
+			break;
+		case EFI_BOOT_SERVICES_CODE:
+			res->name = "BootServices Code";
+			break;
+		case EFI_RUNTIME_SERVICES_CODE:
+			res->name = "Runtime Service Code";
+			break;
+		case EFI_RUNTIME_SERVICES_DATA:
+			res->name = "Runtime Service Data";
+			break;
+		case EFI_CONVENTIONAL_MEMORY:
+			res->name = "Conventional Memory";
+			break;
+		case EFI_UNUSABLE_MEMORY:
+			res->name = "Unusable Memory";
+			break;
+		case EFI_ACPI_RECLAIM_MEMORY:
+			res->name = "ACPI Reclaim";
+			break;
+		case EFI_ACPI_MEMORY_NVS:
+			res->name = "ACPI NVS";
+			break;
+		case EFI_MEMORY_MAPPED_IO:
+			res->name = "Memory Mapped IO";
+			break;
+		case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
+			res->name = "Memory Mapped IO Port Space";
+			break;
+		default:
+			res->name = "Reserved";
+			break;
+		}
+		res->start = md->phys_addr;
+		res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1);
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		if (request_resource(&iomem_resource, res) < 0)
+			printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n",
+				res->name, res->start, res->end);
+		/*
+		 * We don't know which region contains kernel data so we try
+		 * it repeatedly and let the resource manager test it.
+		 */
+		if (md->type == EFI_CONVENTIONAL_MEMORY) {
+			request_resource(res, code_resource);
+			request_resource(res, data_resource);
+		}
+	}
+}
+
+/*
+ * Convenience functions to obtain memory types and attributes
+ */
+
+u32 efi_mem_type(unsigned long phys_addr)
+{
+	efi_memory_desc_t *md;
+	int i;
+
+	for (i = 0; i < memmap.nr_map; i++) {
+		md = &memmap.map[i];
+		if ((md->phys_addr <= phys_addr) && (phys_addr <
+			(md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
+			return md->type;
+	}
+	return 0;
+}
+
+u64 efi_mem_attributes(unsigned long phys_addr)
+{
+	efi_memory_desc_t *md;
+	int i;
+
+	for (i = 0; i < memmap.nr_map; i++) {
+		md = &memmap.map[i];
+		if ((md->phys_addr <= phys_addr) && (phys_addr <
+			(md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
+			return md->attribute;
+	}
+	return 0;
+}
diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S
new file mode 100644
index 00000000000..08c0312d9b6
--- /dev/null
+++ b/arch/i386/kernel/efi_stub.S
@@ -0,0 +1,124 @@
+/*
+ * EFI call stub for IA32.
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off.
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+/*
+ * efi_call_phys(void *, ...) is a function with variable parameters.
+ * All the callers of this function assure that all the parameters are 4-bytes.
+ */
+
+/*
+ * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
+ * So we'd better save all of them at the beginning of this function and restore
+ * at the end no matter how many we use, because we can not assure EFI runtime
+ * service functions will comply with gcc calling convention, too.
+ */
+
+.text
+ENTRY(efi_call_phys)
+	/*
+	 * 0. The function can only be called in Linux kernel. So CS has been
+	 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
+	 * the values of these registers are the same. And, the corresponding
+	 * GDT entries are identical. So I will do nothing about segment reg
+	 * and GDT, but change GDT base register in prelog and epilog.
+	 */
+
+	/*
+	 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
+	 * But to make it smoothly switch from virtual mode to flat mode.
+	 * The mapping of lower virtual memory has been created in prelog and
+	 * epilog.
+	 */
+	movl	$1f, %edx
+	subl	$__PAGE_OFFSET, %edx
+	jmp	*%edx
+1:
+
+	/*
+	 * 2. Now on the top of stack is the return
+	 * address in the caller of efi_call_phys(), then parameter 1,
+	 * parameter 2, ..., param n. To make things easy, we save the return
+	 * address of efi_call_phys in a global variable.
+	 */
+	popl	%edx
+	movl	%edx, saved_return_addr
+	/* get the function pointer into ECX*/
+	popl	%ecx
+	movl	%ecx, efi_rt_function_ptr
+	movl	$2f, %edx
+	subl	$__PAGE_OFFSET, %edx
+	pushl	%edx
+
+	/*
+	 * 3. Clear PG bit in %CR0.
+	 */
+	movl	%cr0, %edx
+	andl	$0x7fffffff, %edx
+	movl	%edx, %cr0
+	jmp	1f
+1:
+
+	/*
+	 * 4. Adjust stack pointer.
+	 */
+	subl	$__PAGE_OFFSET, %esp
+
+	/*
+	 * 5. Call the physical function.
+	 */
+	jmp	*%ecx
+
+2:
+	/*
+	 * 6. After EFI runtime service returns, control will return to
+	 * following instruction. We'd better readjust stack pointer first.
+	 */
+	addl	$__PAGE_OFFSET, %esp
+
+	/*
+	 * 7. Restore PG bit
+	 */
+	movl	%cr0, %edx
+	orl	$0x80000000, %edx
+	movl	%edx, %cr0
+	jmp	1f
+1:
+	/*
+	 * 8. Now restore the virtual mode from flat mode by
+	 * adding EIP with PAGE_OFFSET.
+	 */
+	movl	$1f, %edx
+	jmp	*%edx
+1:
+
+	/*
+	 * 9. Balance the stack. And because EAX contain the return value,
+	 * we'd better not clobber it.
+	 */
+	leal	efi_rt_function_ptr, %edx
+	movl	(%edx), %ecx
+	pushl	%ecx
+
+	/*
+	 * 10. Push the saved return address onto the stack and return.
+	 */
+	leal	saved_return_addr, %edx
+	movl	(%edx), %ecx
+	pushl	%ecx
+	ret
+.previous
+
+.data
+saved_return_addr:
+	.long 0
+efi_rt_function_ptr:
+	.long 0
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
new file mode 100644
index 00000000000..1e45ff292bc
--- /dev/null
+++ b/arch/i386/kernel/entry.S
@@ -0,0 +1,950 @@
+/*
+ *  linux/arch/i386/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+ * Stack layout in 'ret_from_system_call':
+ * 	ptrace needs to have all regs on the stack.
+ *	if the order here is changed, it needs to be
+ *	updated in fork.c:copy_process, signal.c:do_signal,
+ *	ptrace.c and ptrace.h
+ *
+ *	 0(%esp) - %ebx
+ *	 4(%esp) - %ecx
+ *	 8(%esp) - %edx
+ *       C(%esp) - %esi
+ *	10(%esp) - %edi
+ *	14(%esp) - %ebp
+ *	18(%esp) - %eax
+ *	1C(%esp) - %ds
+ *	20(%esp) - %es
+ *	24(%esp) - orig_eax
+ *	28(%esp) - %eip
+ *	2C(%esp) - %cs
+ *	30(%esp) - %eflags
+ *	34(%esp) - %oldesp
+ *	38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/desc.h>
+#include "irq_vectors.h"
+
+#define nr_syscalls ((syscall_table_size)/4)
+
+EBX		= 0x00
+ECX		= 0x04
+EDX		= 0x08
+ESI		= 0x0C
+EDI		= 0x10
+EBP		= 0x14
+EAX		= 0x18
+DS		= 0x1C
+ES		= 0x20
+ORIG_EAX	= 0x24
+EIP		= 0x28
+CS		= 0x2C
+EFLAGS		= 0x30
+OLDESP		= 0x34
+OLDSS		= 0x38
+
+CF_MASK		= 0x00000001
+TF_MASK		= 0x00000100
+IF_MASK		= 0x00000200
+DF_MASK		= 0x00000400 
+NT_MASK		= 0x00004000
+VM_MASK		= 0x00020000
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop		cli
+#else
+#define preempt_stop
+#define resume_kernel		restore_nocheck
+#endif
+
+#define SAVE_ALL \
+	cld; \
+	pushl %es; \
+	pushl %ds; \
+	pushl %eax; \
+	pushl %ebp; \
+	pushl %edi; \
+	pushl %esi; \
+	pushl %edx; \
+	pushl %ecx; \
+	pushl %ebx; \
+	movl $(__USER_DS), %edx; \
+	movl %edx, %ds; \
+	movl %edx, %es;
+
+#define RESTORE_INT_REGS \
+	popl %ebx;	\
+	popl %ecx;	\
+	popl %edx;	\
+	popl %esi;	\
+	popl %edi;	\
+	popl %ebp;	\
+	popl %eax
+
+#define RESTORE_REGS	\
+	RESTORE_INT_REGS; \
+1:	popl %ds;	\
+2:	popl %es;	\
+.section .fixup,"ax";	\
+3:	movl $0,(%esp);	\
+	jmp 1b;		\
+4:	movl $0,(%esp);	\
+	jmp 2b;		\
+.previous;		\
+.section __ex_table,"a";\
+	.align 4;	\
+	.long 1b,3b;	\
+	.long 2b,4b;	\
+.previous
+
+
+ENTRY(ret_from_fork)
+	pushl %eax
+	call schedule_tail
+	GET_THREAD_INFO(%ebp)
+	popl %eax
+	jmp syscall_exit
+
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
+
+	# userspace resumption stub bypassing syscall exit tracing
+	ALIGN
+ret_from_exception:
+	preempt_stop
+ret_from_intr:
+	GET_THREAD_INFO(%ebp)
+	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
+	movb CS(%esp), %al
+	testl $(VM_MASK | 3), %eax
+	jz resume_kernel
+ENTRY(resume_userspace)
+ 	cli				# make sure we don't miss an interrupt
+					# setting need_resched or sigpending
+					# between sampling and the iret
+	movl TI_flags(%ebp), %ecx
+	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
+					# int/exception return?
+	jne work_pending
+	jmp restore_all
+
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
+	cli
+	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
+	jnz restore_nocheck
+need_resched:
+	movl TI_flags(%ebp), %ecx	# need_resched set ?
+	testb $_TIF_NEED_RESCHED, %cl
+	jz restore_all
+	testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
+	jz restore_all
+	call preempt_schedule_irq
+	jmp need_resched
+#endif
+
+/* SYSENTER_RETURN points to after the "sysenter" instruction in
+   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
+
+	# sysenter call handler stub
+ENTRY(sysenter_entry)
+	movl TSS_sysenter_esp0(%esp),%esp
+sysenter_past_esp:
+	sti
+	pushl $(__USER_DS)
+	pushl %ebp
+	pushfl
+	pushl $(__USER_CS)
+	pushl $SYSENTER_RETURN
+
+/*
+ * Load the potential sixth argument from user stack.
+ * Careful about security.
+ */
+	cmpl $__PAGE_OFFSET-3,%ebp
+	jae syscall_fault
+1:	movl (%ebp),%ebp
+.section __ex_table,"a"
+	.align 4
+	.long 1b,syscall_fault
+.previous
+
+	pushl %eax
+	SAVE_ALL
+	GET_THREAD_INFO(%ebp)
+
+	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
+	testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
+	jnz syscall_trace_entry
+	cmpl $(nr_syscalls), %eax
+	jae syscall_badsys
+	call *sys_call_table(,%eax,4)
+	movl %eax,EAX(%esp)
+	cli
+	movl TI_flags(%ebp), %ecx
+	testw $_TIF_ALLWORK_MASK, %cx
+	jne syscall_exit_work
+/* if something modifies registers it must also disable sysexit */
+	movl EIP(%esp), %edx
+	movl OLDESP(%esp), %ecx
+	xorl %ebp,%ebp
+	sti
+	sysexit
+
+
+	# system call handler stub
+ENTRY(system_call)
+	pushl %eax			# save orig_eax
+	SAVE_ALL
+	GET_THREAD_INFO(%ebp)
+					# system call tracing in operation
+	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
+	testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
+	jnz syscall_trace_entry
+	cmpl $(nr_syscalls), %eax
+	jae syscall_badsys
+syscall_call:
+	call *sys_call_table(,%eax,4)
+	movl %eax,EAX(%esp)		# store the return value
+syscall_exit:
+	cli				# make sure we don't miss an interrupt
+					# setting need_resched or sigpending
+					# between sampling and the iret
+	movl TI_flags(%ebp), %ecx
+	testw $_TIF_ALLWORK_MASK, %cx	# current->work
+	jne syscall_exit_work
+
+restore_all:
+	movl EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
+	movb OLDSS(%esp), %ah
+	movb CS(%esp), %al
+	andl $(VM_MASK | (4 << 8) | 3), %eax
+	cmpl $((4 << 8) | 3), %eax
+	je ldt_ss			# returning to user-space with LDT SS
+restore_nocheck:
+	RESTORE_REGS
+	addl $4, %esp
+1:	iret
+.section .fixup,"ax"
+iret_exc:
+	sti
+	movl $__USER_DS, %edx
+	movl %edx, %ds
+	movl %edx, %es
+	movl $11,%eax
+	call do_exit
+.previous
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
+
+ldt_ss:
+	larl OLDSS(%esp), %eax
+	jnz restore_nocheck
+	testl $0x00400000, %eax		# returning to 32bit stack?
+	jnz restore_nocheck		# allright, normal return
+	/* If returning to userspace with 16bit stack,
+	 * try to fix the higher word of ESP, as the CPU
+	 * won't restore it.
+	 * This is an "official" bug of all the x86-compatible
+	 * CPUs, which we can try to work around to make
+	 * dosemu and wine happy. */
+	subl $8, %esp		# reserve space for switch16 pointer
+	cli
+	movl %esp, %eax
+	/* Set up the 16bit stack frame with switch32 pointer on top,
+	 * and a switch16 pointer on top of the current frame. */
+	call setup_x86_bogus_stack
+	RESTORE_REGS
+	lss 20+4(%esp), %esp	# switch to 16bit stack
+1:	iret
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
+
+	# perform work that needs to be done immediately before resumption
+	ALIGN
+work_pending:
+	testb $_TIF_NEED_RESCHED, %cl
+	jz work_notifysig
+work_resched:
+	call schedule
+	cli				# make sure we don't miss an interrupt
+					# setting need_resched or sigpending
+					# between sampling and the iret
+	movl TI_flags(%ebp), %ecx
+	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
+					# than syscall tracing?
+	jz restore_all
+	testb $_TIF_NEED_RESCHED, %cl
+	jnz work_resched
+
+work_notifysig:				# deal with pending signals and
+					# notify-resume requests
+	testl $VM_MASK, EFLAGS(%esp)
+	movl %esp, %eax
+	jne work_notifysig_v86		# returning to kernel-space or
+					# vm86-space
+	xorl %edx, %edx
+	call do_notify_resume
+	jmp restore_all
+
+	ALIGN
+work_notifysig_v86:
+	pushl %ecx			# save ti_flags for do_notify_resume
+	call save_v86_state		# %eax contains pt_regs pointer
+	popl %ecx
+	movl %eax, %esp
+	xorl %edx, %edx
+	call do_notify_resume
+	jmp restore_all
+
+	# perform syscall exit tracing
+	ALIGN
+syscall_trace_entry:
+	movl $-ENOSYS,EAX(%esp)
+	movl %esp, %eax
+	xorl %edx,%edx
+	call do_syscall_trace
+	movl ORIG_EAX(%esp), %eax
+	cmpl $(nr_syscalls), %eax
+	jnae syscall_call
+	jmp syscall_exit
+
+	# perform syscall exit tracing
+	ALIGN
+syscall_exit_work:
+	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+	jz work_pending
+	sti				# could let do_syscall_trace() call
+					# schedule() instead
+	movl %esp, %eax
+	movl $1, %edx
+	call do_syscall_trace
+	jmp resume_userspace
+
+	ALIGN
+syscall_fault:
+	pushl %eax			# save orig_eax
+	SAVE_ALL
+	GET_THREAD_INFO(%ebp)
+	movl $-EFAULT,EAX(%esp)
+	jmp resume_userspace
+
+	ALIGN
+syscall_badsys:
+	movl $-ENOSYS,EAX(%esp)
+	jmp resume_userspace
+
+#define FIXUP_ESPFIX_STACK \
+	movl %esp, %eax; \
+	/* switch to 32bit stack using the pointer on top of 16bit stack */ \
+	lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
+	/* copy data from 16bit stack to 32bit stack */ \
+	call fixup_x86_bogus_stack; \
+	/* put ESP to the proper location */ \
+	movl %eax, %esp;
+#define UNWIND_ESPFIX_STACK \
+	pushl %eax; \
+	movl %ss, %eax; \
+	/* see if on 16bit stack */ \
+	cmpw $__ESPFIX_SS, %ax; \
+	jne 28f; \
+	movl $__KERNEL_DS, %edx; \
+	movl %edx, %ds; \
+	movl %edx, %es; \
+	/* switch to 32bit stack */ \
+	FIXUP_ESPFIX_STACK \
+28:	popl %eax;
+
+/*
+ * Build the entry stubs and pointer table with
+ * some assembler magic.
+ */
+.data
+ENTRY(interrupt)
+.text
+
+vector=0
+ENTRY(irq_entries_start)
+.rept NR_IRQS
+	ALIGN
+1:	pushl $vector-256
+	jmp common_interrupt
+.data
+	.long 1b
+.text
+vector=vector+1
+.endr
+
+	ALIGN
+common_interrupt:
+	SAVE_ALL
+	movl %esp,%eax
+	call do_IRQ
+	jmp ret_from_intr
+
+#define BUILD_INTERRUPT(name, nr)	\
+ENTRY(name)				\
+	pushl $nr-256;			\
+	SAVE_ALL			\
+	movl %esp,%eax;			\
+	call smp_/**/name;		\
+	jmp ret_from_intr;
+
+/* The include is where all of the SMP etc. interrupts come from */
+#include "entry_arch.h"
+
+ENTRY(divide_error)
+	pushl $0			# no error code
+	pushl $do_divide_error
+	ALIGN
+error_code:
+	pushl %ds
+	pushl %eax
+	xorl %eax, %eax
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %edx
+	decl %eax			# eax = -1
+	pushl %ecx
+	pushl %ebx
+	cld
+	pushl %es
+	UNWIND_ESPFIX_STACK
+	popl %ecx
+	movl ES(%esp), %edi		# get the function address
+	movl ORIG_EAX(%esp), %edx	# get the error code
+	movl %eax, ORIG_EAX(%esp)
+	movl %ecx, ES(%esp)
+	movl $(__USER_DS), %ecx
+	movl %ecx, %ds
+	movl %ecx, %es
+	movl %esp,%eax			# pt_regs pointer
+	call *%edi
+	jmp ret_from_exception
+
+ENTRY(coprocessor_error)
+	pushl $0
+	pushl $do_coprocessor_error
+	jmp error_code
+
+ENTRY(simd_coprocessor_error)
+	pushl $0
+	pushl $do_simd_coprocessor_error
+	jmp error_code
+
+ENTRY(device_not_available)
+	pushl $-1			# mark this as an int
+	SAVE_ALL
+	movl %cr0, %eax
+	testl $0x4, %eax		# EM (math emulation bit)
+	jne device_not_available_emulate
+	preempt_stop
+	call math_state_restore
+	jmp ret_from_exception
+device_not_available_emulate:
+	pushl $0			# temporary storage for ORIG_EIP
+	call math_emulate
+	addl $4, %esp
+	jmp ret_from_exception
+
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+#define FIX_STACK(offset, ok, label)		\
+	cmpw $__KERNEL_CS,4(%esp);		\
+	jne ok;					\
+label:						\
+	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
+	pushfl;					\
+	pushl $__KERNEL_CS;			\
+	pushl $sysenter_past_esp
+
+ENTRY(debug)
+	cmpl $sysenter_entry,(%esp)
+	jne debug_stack_correct
+	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+debug_stack_correct:
+	pushl $-1			# mark this as an int
+	SAVE_ALL
+	xorl %edx,%edx			# error code 0
+	movl %esp,%eax			# pt_regs pointer
+	call do_debug
+	testl %eax,%eax
+	jnz restore_all
+	jmp ret_from_exception
+
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got  an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+	pushl %eax
+	movl %ss, %eax
+	cmpw $__ESPFIX_SS, %ax
+	popl %eax
+	je nmi_16bit_stack
+	cmpl $sysenter_entry,(%esp)
+	je nmi_stack_fixup
+	pushl %eax
+	movl %esp,%eax
+	/* Do not access memory above the end of our stack page,
+	 * it might not exist.
+	 */
+	andl $(THREAD_SIZE-1),%eax
+	cmpl $(THREAD_SIZE-20),%eax
+	popl %eax
+	jae nmi_stack_correct
+	cmpl $sysenter_entry,12(%esp)
+	je nmi_debug_stack_check
+nmi_stack_correct:
+	pushl %eax
+	SAVE_ALL
+	xorl %edx,%edx		# zero error code
+	movl %esp,%eax		# pt_regs pointer
+	call do_nmi
+	jmp restore_all
+
+nmi_stack_fixup:
+	FIX_STACK(12,nmi_stack_correct, 1)
+	jmp nmi_stack_correct
+nmi_debug_stack_check:
+	cmpw $__KERNEL_CS,16(%esp)
+	jne nmi_stack_correct
+	cmpl $debug - 1,(%esp)
+	jle nmi_stack_correct
+	cmpl $debug_esp_fix_insn,(%esp)
+	jle nmi_debug_stack_fixup
+nmi_debug_stack_fixup:
+	FIX_STACK(24,nmi_stack_correct, 1)
+	jmp nmi_stack_correct
+
+nmi_16bit_stack:
+	/* create the pointer to lss back */
+	pushl %ss
+	pushl %esp
+	movzwl %sp, %esp
+	addw $4, (%esp)
+	/* copy the iret frame of 12 bytes */
+	.rept 3
+	pushl 16(%esp)
+	.endr
+	pushl %eax
+	SAVE_ALL
+	FIXUP_ESPFIX_STACK		# %eax == %esp
+	xorl %edx,%edx			# zero error code
+	call do_nmi
+	RESTORE_REGS
+	lss 12+4(%esp), %esp		# back to 16bit stack
+1:	iret
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
+
+ENTRY(int3)
+	pushl $-1			# mark this as an int
+	SAVE_ALL
+	xorl %edx,%edx		# zero error code
+	movl %esp,%eax		# pt_regs pointer
+	call do_int3
+	testl %eax,%eax
+	jnz restore_all
+	jmp ret_from_exception
+
+ENTRY(overflow)
+	pushl $0
+	pushl $do_overflow
+	jmp error_code
+
+ENTRY(bounds)
+	pushl $0
+	pushl $do_bounds
+	jmp error_code
+
+ENTRY(invalid_op)
+	pushl $0
+	pushl $do_invalid_op
+	jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+	pushl $0
+	pushl $do_coprocessor_segment_overrun
+	jmp error_code
+
+ENTRY(invalid_TSS)
+	pushl $do_invalid_TSS
+	jmp error_code
+
+ENTRY(segment_not_present)
+	pushl $do_segment_not_present
+	jmp error_code
+
+ENTRY(stack_segment)
+	pushl $do_stack_segment
+	jmp error_code
+
+ENTRY(general_protection)
+	pushl $do_general_protection
+	jmp error_code
+
+ENTRY(alignment_check)
+	pushl $do_alignment_check
+	jmp error_code
+
+ENTRY(page_fault)
+	pushl $do_page_fault
+	jmp error_code
+
+#ifdef CONFIG_X86_MCE
+ENTRY(machine_check)
+	pushl $0
+	pushl machine_check_vector
+	jmp error_code
+#endif
+
+ENTRY(spurious_interrupt_bug)
+	pushl $0
+	pushl $do_spurious_interrupt_bug
+	jmp error_code
+
+.data
+ENTRY(sys_call_table)
+	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
+	.long sys_exit
+	.long sys_fork
+	.long sys_read
+	.long sys_write
+	.long sys_open		/* 5 */
+	.long sys_close
+	.long sys_waitpid
+	.long sys_creat
+	.long sys_link
+	.long sys_unlink	/* 10 */
+	.long sys_execve
+	.long sys_chdir
+	.long sys_time
+	.long sys_mknod
+	.long sys_chmod		/* 15 */
+	.long sys_lchown16
+	.long sys_ni_syscall	/* old break syscall holder */
+	.long sys_stat
+	.long sys_lseek
+	.long sys_getpid	/* 20 */
+	.long sys_mount
+	.long sys_oldumount
+	.long sys_setuid16
+	.long sys_getuid16
+	.long sys_stime		/* 25 */
+	.long sys_ptrace
+	.long sys_alarm
+	.long sys_fstat
+	.long sys_pause
+	.long sys_utime		/* 30 */
+	.long sys_ni_syscall	/* old stty syscall holder */
+	.long sys_ni_syscall	/* old gtty syscall holder */
+	.long sys_access
+	.long sys_nice
+	.long sys_ni_syscall	/* 35 - old ftime syscall holder */
+	.long sys_sync
+	.long sys_kill
+	.long sys_rename
+	.long sys_mkdir
+	.long sys_rmdir		/* 40 */
+	.long sys_dup
+	.long sys_pipe
+	.long sys_times
+	.long sys_ni_syscall	/* old prof syscall holder */
+	.long sys_brk		/* 45 */
+	.long sys_setgid16
+	.long sys_getgid16
+	.long sys_signal
+	.long sys_geteuid16
+	.long sys_getegid16	/* 50 */
+	.long sys_acct
+	.long sys_umount	/* recycled never used phys() */
+	.long sys_ni_syscall	/* old lock syscall holder */
+	.long sys_ioctl
+	.long sys_fcntl		/* 55 */
+	.long sys_ni_syscall	/* old mpx syscall holder */
+	.long sys_setpgid
+	.long sys_ni_syscall	/* old ulimit syscall holder */
+	.long sys_olduname
+	.long sys_umask		/* 60 */
+	.long sys_chroot
+	.long sys_ustat
+	.long sys_dup2
+	.long sys_getppid
+	.long sys_getpgrp	/* 65 */
+	.long sys_setsid
+	.long sys_sigaction
+	.long sys_sgetmask
+	.long sys_ssetmask
+	.long sys_setreuid16	/* 70 */
+	.long sys_setregid16
+	.long sys_sigsuspend
+	.long sys_sigpending
+	.long sys_sethostname
+	.long sys_setrlimit	/* 75 */
+	.long sys_old_getrlimit
+	.long sys_getrusage
+	.long sys_gettimeofday
+	.long sys_settimeofday
+	.long sys_getgroups16	/* 80 */
+	.long sys_setgroups16
+	.long old_select
+	.long sys_symlink
+	.long sys_lstat
+	.long sys_readlink	/* 85 */
+	.long sys_uselib
+	.long sys_swapon
+	.long sys_reboot
+	.long old_readdir
+	.long old_mmap		/* 90 */
+	.long sys_munmap
+	.long sys_truncate
+	.long sys_ftruncate
+	.long sys_fchmod
+	.long sys_fchown16	/* 95 */
+	.long sys_getpriority
+	.long sys_setpriority
+	.long sys_ni_syscall	/* old profil syscall holder */
+	.long sys_statfs
+	.long sys_fstatfs	/* 100 */
+	.long sys_ioperm
+	.long sys_socketcall
+	.long sys_syslog
+	.long sys_setitimer
+	.long sys_getitimer	/* 105 */
+	.long sys_newstat
+	.long sys_newlstat
+	.long sys_newfstat
+	.long sys_uname
+	.long sys_iopl		/* 110 */
+	.long sys_vhangup
+	.long sys_ni_syscall	/* old "idle" system call */
+	.long sys_vm86old
+	.long sys_wait4
+	.long sys_swapoff	/* 115 */
+	.long sys_sysinfo
+	.long sys_ipc
+	.long sys_fsync
+	.long sys_sigreturn
+	.long sys_clone		/* 120 */
+	.long sys_setdomainname
+	.long sys_newuname
+	.long sys_modify_ldt
+	.long sys_adjtimex
+	.long sys_mprotect	/* 125 */
+	.long sys_sigprocmask
+	.long sys_ni_syscall	/* old "create_module" */ 
+	.long sys_init_module
+	.long sys_delete_module
+	.long sys_ni_syscall	/* 130:	old "get_kernel_syms" */
+	.long sys_quotactl
+	.long sys_getpgid
+	.long sys_fchdir
+	.long sys_bdflush
+	.long sys_sysfs		/* 135 */
+	.long sys_personality
+	.long sys_ni_syscall	/* reserved for afs_syscall */
+	.long sys_setfsuid16
+	.long sys_setfsgid16
+	.long sys_llseek	/* 140 */
+	.long sys_getdents
+	.long sys_select
+	.long sys_flock
+	.long sys_msync
+	.long sys_readv		/* 145 */
+	.long sys_writev
+	.long sys_getsid
+	.long sys_fdatasync
+	.long sys_sysctl
+	.long sys_mlock		/* 150 */
+	.long sys_munlock
+	.long sys_mlockall
+	.long sys_munlockall
+	.long sys_sched_setparam
+	.long sys_sched_getparam   /* 155 */
+	.long sys_sched_setscheduler
+	.long sys_sched_getscheduler
+	.long sys_sched_yield
+	.long sys_sched_get_priority_max
+	.long sys_sched_get_priority_min  /* 160 */
+	.long sys_sched_rr_get_interval
+	.long sys_nanosleep
+	.long sys_mremap
+	.long sys_setresuid16
+	.long sys_getresuid16	/* 165 */
+	.long sys_vm86
+	.long sys_ni_syscall	/* Old sys_query_module */
+	.long sys_poll
+	.long sys_nfsservctl
+	.long sys_setresgid16	/* 170 */
+	.long sys_getresgid16
+	.long sys_prctl
+	.long sys_rt_sigreturn
+	.long sys_rt_sigaction
+	.long sys_rt_sigprocmask	/* 175 */
+	.long sys_rt_sigpending
+	.long sys_rt_sigtimedwait
+	.long sys_rt_sigqueueinfo
+	.long sys_rt_sigsuspend
+	.long sys_pread64	/* 180 */
+	.long sys_pwrite64
+	.long sys_chown16
+	.long sys_getcwd
+	.long sys_capget
+	.long sys_capset	/* 185 */
+	.long sys_sigaltstack
+	.long sys_sendfile
+	.long sys_ni_syscall	/* reserved for streams1 */
+	.long sys_ni_syscall	/* reserved for streams2 */
+	.long sys_vfork		/* 190 */
+	.long sys_getrlimit
+	.long sys_mmap2
+	.long sys_truncate64
+	.long sys_ftruncate64
+	.long sys_stat64	/* 195 */
+	.long sys_lstat64
+	.long sys_fstat64
+	.long sys_lchown
+	.long sys_getuid
+	.long sys_getgid	/* 200 */
+	.long sys_geteuid
+	.long sys_getegid
+	.long sys_setreuid
+	.long sys_setregid
+	.long sys_getgroups	/* 205 */
+	.long sys_setgroups
+	.long sys_fchown
+	.long sys_setresuid
+	.long sys_getresuid
+	.long sys_setresgid	/* 210 */
+	.long sys_getresgid
+	.long sys_chown
+	.long sys_setuid
+	.long sys_setgid
+	.long sys_setfsuid	/* 215 */
+	.long sys_setfsgid
+	.long sys_pivot_root
+	.long sys_mincore
+	.long sys_madvise
+	.long sys_getdents64	/* 220 */
+	.long sys_fcntl64
+	.long sys_ni_syscall	/* reserved for TUX */
+	.long sys_ni_syscall
+	.long sys_gettid
+	.long sys_readahead	/* 225 */
+	.long sys_setxattr
+	.long sys_lsetxattr
+	.long sys_fsetxattr
+	.long sys_getxattr
+	.long sys_lgetxattr	/* 230 */
+	.long sys_fgetxattr
+	.long sys_listxattr
+	.long sys_llistxattr
+	.long sys_flistxattr
+	.long sys_removexattr	/* 235 */
+	.long sys_lremovexattr
+	.long sys_fremovexattr
+	.long sys_tkill
+	.long sys_sendfile64
+	.long sys_futex		/* 240 */
+	.long sys_sched_setaffinity
+	.long sys_sched_getaffinity
+	.long sys_set_thread_area
+	.long sys_get_thread_area
+	.long sys_io_setup	/* 245 */
+	.long sys_io_destroy
+	.long sys_io_getevents
+	.long sys_io_submit
+	.long sys_io_cancel
+	.long sys_fadvise64	/* 250 */
+	.long sys_ni_syscall
+	.long sys_exit_group
+	.long sys_lookup_dcookie
+	.long sys_epoll_create
+	.long sys_epoll_ctl	/* 255 */
+	.long sys_epoll_wait
+ 	.long sys_remap_file_pages
+ 	.long sys_set_tid_address
+ 	.long sys_timer_create
+ 	.long sys_timer_settime		/* 260 */
+ 	.long sys_timer_gettime
+ 	.long sys_timer_getoverrun
+ 	.long sys_timer_delete
+ 	.long sys_clock_settime
+ 	.long sys_clock_gettime		/* 265 */
+ 	.long sys_clock_getres
+ 	.long sys_clock_nanosleep
+	.long sys_statfs64
+	.long sys_fstatfs64	
+	.long sys_tgkill	/* 270 */
+	.long sys_utimes
+ 	.long sys_fadvise64_64
+	.long sys_ni_syscall	/* sys_vserver */
+	.long sys_mbind
+	.long sys_get_mempolicy
+	.long sys_set_mempolicy
+	.long sys_mq_open
+	.long sys_mq_unlink
+	.long sys_mq_timedsend
+	.long sys_mq_timedreceive	/* 280 */
+	.long sys_mq_notify
+	.long sys_mq_getsetattr
+	.long sys_ni_syscall		/* reserved for kexec */
+	.long sys_waitid
+	.long sys_ni_syscall		/* 285 */ /* available */
+	.long sys_add_key
+	.long sys_request_key
+	.long sys_keyctl
+
+syscall_table_size=(.-sys_call_table)
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
new file mode 100644
index 00000000000..d273fd74619
--- /dev/null
+++ b/arch/i386/kernel/head.S
@@ -0,0 +1,521 @@
+/*
+ *  linux/arch/i386/kernel/head.S -- the 32-bit startup code.
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Enhanced CPU detection and feature setting code by Mike Jagdis
+ *  and Martin Mares, November 1997.
+ */
+
+.text
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm_offsets.h>
+#include <asm/setup.h>
+
+/*
+ * References to members of the new_cpu_data structure.
+ */
+
+#define X86		new_cpu_data+CPUINFO_x86
+#define X86_VENDOR	new_cpu_data+CPUINFO_x86_vendor
+#define X86_MODEL	new_cpu_data+CPUINFO_x86_model
+#define X86_MASK	new_cpu_data+CPUINFO_x86_mask
+#define X86_HARD_MATH	new_cpu_data+CPUINFO_hard_math
+#define X86_CPUID	new_cpu_data+CPUINFO_cpuid_level
+#define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
+#define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
+
+/*
+ * This is how much memory *in addition to the memory covered up to
+ * and including _end* we need mapped initially.  We need one bit for
+ * each possible page, but only in low memory, which means
+ * 2^32/4096/8 = 128K worst case (4G/4G split.)
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
+ */
+#define INIT_MAP_BEYOND_END	(128*1024)
+
+
+/*
+ * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
+ * %esi points to the real-mode code as a 32-bit pointer.
+ * CS and DS must be 4 GB flat segments, but we don't depend on
+ * any particular GDT layout, because we load our own as soon as we
+ * can.
+ */
+ENTRY(startup_32)
+
+/*
+ * Set segments to known values.
+ */
+	cld
+	lgdt boot_gdt_descr - __PAGE_OFFSET
+	movl $(__BOOT_DS),%eax
+	movl %eax,%ds
+	movl %eax,%es
+	movl %eax,%fs
+	movl %eax,%gs
+
+/*
+ * Clear BSS first so that there are no surprises...
+ * No need to cld as DF is already clear from cld above...
+ */
+	xorl %eax,%eax
+	movl $__bss_start - __PAGE_OFFSET,%edi
+	movl $__bss_stop - __PAGE_OFFSET,%ecx
+	subl %edi,%ecx
+	shrl $2,%ecx
+	rep ; stosl
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond _end.  The variable
+ * init_pg_tables_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ *
+ * Warning: don't use %esi or the stack in this code.  However, %esp
+ * can be used as a GPR if you really need it...
+ */
+page_pde_offset = (__PAGE_OFFSET >> 20);
+
+	movl $(pg0 - __PAGE_OFFSET), %edi
+	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
+	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
+10:
+	leal 0x007(%edi),%ecx			/* Create PDE entry */
+	movl %ecx,(%edx)			/* Store identity PDE entry */
+	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
+	addl $4,%edx
+	movl $1024, %ecx
+11:
+	stosl
+	addl $0x1000,%eax
+	loop 11b
+	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
+	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
+	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+	cmpl %ebp,%eax
+	jb 10b
+	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
+
+#ifdef CONFIG_SMP
+	xorl %ebx,%ebx				/* This is the boot CPU (BSP) */
+	jmp 3f
+
+/*
+ * Non-boot CPU entry point; entered from trampoline.S
+ * We can't lgdt here, because lgdt itself uses a data segment, but
+ * we know the trampoline has already loaded the boot_gdt_table GDT
+ * for us.
+ */
+ENTRY(startup_32_smp)
+	cld
+	movl $(__BOOT_DS),%eax
+	movl %eax,%ds
+	movl %eax,%es
+	movl %eax,%fs
+	movl %eax,%gs
+
+/*
+ *	New page tables may be in 4Mbyte page mode and may
+ *	be using the global pages. 
+ *
+ *	NOTE! If we are on a 486 we may have no cr4 at all!
+ *	So we do not try to touch it unless we really have
+ *	some bits in it to set.  This won't work if the BSP
+ *	implements cr4 but this AP does not -- very unlikely
+ *	but be warned!  The same applies to the pse feature
+ *	if not equally supported. --macro
+ *
+ *	NOTE! We have to correct for the fact that we're
+ *	not yet offset PAGE_OFFSET..
+ */
+#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
+	movl cr4_bits,%edx
+	andl %edx,%edx
+	jz 6f
+	movl %cr4,%eax		# Turn on paging options (PSE,PAE,..)
+	orl %edx,%eax
+	movl %eax,%cr4
+
+	btl $5, %eax		# check if PAE is enabled
+	jnc 6f
+
+	/* Check if extended functions are implemented */
+	movl $0x80000000, %eax
+	cpuid
+	cmpl $0x80000000, %eax
+	jbe 6f
+	mov $0x80000001, %eax
+	cpuid
+	/* Execute Disable bit supported? */
+	btl $20, %edx
+	jnc 6f
+
+	/* Setup EFER (Extended Feature Enable Register) */
+	movl $0xc0000080, %ecx
+	rdmsr
+
+	btsl $11, %eax
+	/* Make changes effective */
+	wrmsr
+
+6:
+	/* This is a secondary processor (AP) */
+	xorl %ebx,%ebx
+	incl %ebx
+
+3:
+#endif /* CONFIG_SMP */
+
+/*
+ * Enable paging
+ */
+	movl $swapper_pg_dir-__PAGE_OFFSET,%eax
+	movl %eax,%cr3		/* set the page table pointer.. */
+	movl %cr0,%eax
+	orl $0x80000000,%eax
+	movl %eax,%cr0		/* ..and set paging (PG) bit */
+	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
+1:
+	/* Set up the stack pointer */
+	lss stack_start,%esp
+
+/*
+ * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
+ * confuse the debugger if this code is traced.
+ * XXX - best to initialize before switching to protected mode.
+ */
+	pushl $0
+	popfl
+
+#ifdef CONFIG_SMP
+	andl %ebx,%ebx
+	jz  1f				/* Initial CPU cleans BSS */
+	jmp checkCPUtype
+1:
+#endif /* CONFIG_SMP */
+
+/*
+ * start system 32-bit setup. We need to re-do some of the things done
+ * in 16-bit mode for the "real" operations.
+ */
+	call setup_idt
+
+/*
+ * Copy bootup parameters out of the way.
+ * Note: %esi still has the pointer to the real-mode data.
+ */
+	movl $boot_params,%edi
+	movl $(PARAM_SIZE/4),%ecx
+	cld
+	rep
+	movsl
+	movl boot_params+NEW_CL_POINTER,%esi
+	andl %esi,%esi
+	jnz 2f			# New command line protocol
+	cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
+	jne 1f
+	movzwl OLD_CL_OFFSET,%esi
+	addl $(OLD_CL_BASE_ADDR),%esi
+2:
+	movl $saved_command_line,%edi
+	movl $(COMMAND_LINE_SIZE/4),%ecx
+	rep
+	movsl
+1:
+checkCPUtype:
+
+	movl $-1,X86_CPUID		#  -1 for no CPUID initially
+
+/* check if it is 486 or 386. */
+/*
+ * XXX - this does a lot of unnecessary setup.  Alignment checks don't
+ * apply at our cpl of 0 and the stack ought to be aligned already, and
+ * we don't need to preserve eflags.
+ */
+
+	movb $3,X86		# at least 386
+	pushfl			# push EFLAGS
+	popl %eax		# get EFLAGS
+	movl %eax,%ecx		# save original EFLAGS
+	xorl $0x240000,%eax	# flip AC and ID bits in EFLAGS
+	pushl %eax		# copy to EFLAGS
+	popfl			# set EFLAGS
+	pushfl			# get new EFLAGS
+	popl %eax		# put it in eax
+	xorl %ecx,%eax		# change in flags
+	pushl %ecx		# restore original EFLAGS
+	popfl
+	testl $0x40000,%eax	# check if AC bit changed
+	je is386
+
+	movb $4,X86		# at least 486
+	testl $0x200000,%eax	# check if ID bit changed
+	je is486
+
+	/* get vendor info */
+	xorl %eax,%eax			# call CPUID with 0 -> return vendor ID
+	cpuid
+	movl %eax,X86_CPUID		# save CPUID level
+	movl %ebx,X86_VENDOR_ID		# lo 4 chars
+	movl %edx,X86_VENDOR_ID+4	# next 4 chars
+	movl %ecx,X86_VENDOR_ID+8	# last 4 chars
+
+	orl %eax,%eax			# do we have processor info as well?
+	je is486
+
+	movl $1,%eax		# Use the CPUID instruction to get CPU type
+	cpuid
+	movb %al,%cl		# save reg for future use
+	andb $0x0f,%ah		# mask processor family
+	movb %ah,X86
+	andb $0xf0,%al		# mask model
+	shrb $4,%al
+	movb %al,X86_MODEL
+	andb $0x0f,%cl		# mask mask revision
+	movb %cl,X86_MASK
+	movl %edx,X86_CAPABILITY
+
+is486:	movl $0x50022,%ecx	# set AM, WP, NE and MP
+	jmp 2f
+
+is386:	movl $2,%ecx		# set MP
+2:	movl %cr0,%eax
+	andl $0x80000011,%eax	# Save PG,PE,ET
+	orl %ecx,%eax
+	movl %eax,%cr0
+
+	call check_x87
+	incb ready
+	lgdt cpu_gdt_descr
+	lidt idt_descr
+	ljmp $(__KERNEL_CS),$1f
+1:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
+	movl %eax,%ss			# after changing gdt.
+
+	movl $(__USER_DS),%eax		# DS/ES contains default USER segment
+	movl %eax,%ds
+	movl %eax,%es
+
+	xorl %eax,%eax			# Clear FS/GS and LDT
+	movl %eax,%fs
+	movl %eax,%gs
+	lldt %ax
+	cld			# gcc2 wants the direction flag cleared at all times
+#ifdef CONFIG_SMP
+	movb ready, %cl	
+	cmpb $1,%cl
+	je 1f			# the first CPU calls start_kernel
+				# all other CPUs call initialize_secondary
+	call initialize_secondary
+	jmp L6
+1:
+#endif /* CONFIG_SMP */
+	call start_kernel
+L6:
+	jmp L6			# main should never return here, but
+				# just in case, we know what happens.
+
+/*
+ * We depend on ET to be correct. This checks for 287/387.
+ */
+check_x87:
+	movb $0,X86_HARD_MATH
+	clts
+	fninit
+	fstsw %ax
+	cmpb $0,%al
+	je 1f
+	movl %cr0,%eax		/* no coprocessor: have to set bits */
+	xorl $4,%eax		/* set EM */
+	movl %eax,%cr0
+	ret
+	ALIGN
+1:	movb $1,X86_HARD_MATH
+	.byte 0xDB,0xE4		/* fsetpm for 287, ignored by 387 */
+	ret
+
+/*
+ *  setup_idt
+ *
+ *  sets up a idt with 256 entries pointing to
+ *  ignore_int, interrupt gates. It doesn't actually load
+ *  idt - that can be done only after paging has been enabled
+ *  and the kernel moved to PAGE_OFFSET. Interrupts
+ *  are enabled elsewhere, when we can be relatively
+ *  sure everything is ok.
+ *
+ *  Warning: %esi is live across this function.
+ */
+setup_idt:
+	lea ignore_int,%edx
+	movl $(__KERNEL_CS << 16),%eax
+	movw %dx,%ax		/* selector = 0x0010 = cs */
+	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */
+
+	lea idt_table,%edi
+	mov $256,%ecx
+rp_sidt:
+	movl %eax,(%edi)
+	movl %edx,4(%edi)
+	addl $8,%edi
+	dec %ecx
+	jne rp_sidt
+	ret
+
+/* This is the default interrupt "handler" :-) */
+	ALIGN
+ignore_int:
+	cld
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	pushl %es
+	pushl %ds
+	movl $(__KERNEL_DS),%eax
+	movl %eax,%ds
+	movl %eax,%es
+	pushl 16(%esp)
+	pushl 24(%esp)
+	pushl 32(%esp)
+	pushl 40(%esp)
+	pushl $int_msg
+	call printk
+	addl $(5*4),%esp
+	popl %ds
+	popl %es
+	popl %edx
+	popl %ecx
+	popl %eax
+	iret
+
+/*
+ * Real beginning of normal "text" segment
+ */
+ENTRY(stext)
+ENTRY(_stext)
+
+/*
+ * BSS section
+ */
+.section ".bss.page_aligned","w"
+ENTRY(swapper_pg_dir)
+	.fill 1024,4,0
+ENTRY(empty_zero_page)
+	.fill 4096,1,0
+
+/*
+ * This starts the data section.
+ */
+.data
+
+ENTRY(stack_start)
+	.long init_thread_union+THREAD_SIZE
+	.long __BOOT_DS
+
+ready:	.byte 0
+
+int_msg:
+	.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
+
+/*
+ * The IDT and GDT 'descriptors' are a strange 48-bit object
+ * only used by the lidt and lgdt instructions. They are not
+ * like usual segment descriptors - they consist of a 16-bit
+ * segment size, and 32-bit linear address value:
+ */
+
+.globl boot_gdt_descr
+.globl idt_descr
+.globl cpu_gdt_descr
+
+	ALIGN
+# early boot GDT descriptor (must use 1:1 address mapping)
+	.word 0				# 32 bit align gdt_desc.address
+boot_gdt_descr:
+	.word __BOOT_DS+7
+	.long boot_gdt_table - __PAGE_OFFSET
+
+	.word 0				# 32-bit align idt_desc.address
+idt_descr:
+	.word IDT_ENTRIES*8-1		# idt contains 256 entries
+	.long idt_table
+
+# boot GDT descriptor (later on used by CPU#0):
+	.word 0				# 32 bit align gdt_desc.address
+cpu_gdt_descr:
+	.word GDT_ENTRIES*8-1
+	.long cpu_gdt_table
+
+	.fill NR_CPUS-1,8,0		# space for the other GDT descriptors
+
+/*
+ * The boot_gdt_table must mirror the equivalent in setup.S and is
+ * used only for booting.
+ */
+	.align L1_CACHE_BYTES
+ENTRY(boot_gdt_table)
+	.fill GDT_ENTRY_BOOT_CS,8,0
+	.quad 0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */
+
+/*
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
+ */
+	.align PAGE_SIZE_asm
+ENTRY(cpu_gdt_table)
+	.quad 0x0000000000000000	/* NULL descriptor */
+	.quad 0x0000000000000000	/* 0x0b reserved */
+	.quad 0x0000000000000000	/* 0x13 reserved */
+	.quad 0x0000000000000000	/* 0x1b reserved */
+	.quad 0x0000000000000000	/* 0x20 unused */
+	.quad 0x0000000000000000	/* 0x28 unused */
+	.quad 0x0000000000000000	/* 0x33 TLS entry 1 */
+	.quad 0x0000000000000000	/* 0x3b TLS entry 2 */
+	.quad 0x0000000000000000	/* 0x43 TLS entry 3 */
+	.quad 0x0000000000000000	/* 0x4b reserved */
+	.quad 0x0000000000000000	/* 0x53 reserved */
+	.quad 0x0000000000000000	/* 0x5b reserved */
+
+	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x00cffa000000ffff	/* 0x73 user 4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x7b user 4GB data at 0x00000000 */
+
+	.quad 0x0000000000000000	/* 0x80 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x88 LDT descriptor */
+
+	/* Segments used for calling PnP BIOS */
+	.quad 0x00c09a0000000000	/* 0x90 32-bit code */
+	.quad 0x00809a0000000000	/* 0x98 16-bit code */
+	.quad 0x0080920000000000	/* 0xa0 16-bit data */
+	.quad 0x0080920000000000	/* 0xa8 16-bit data */
+	.quad 0x0080920000000000	/* 0xb0 16-bit data */
+	/*
+	 * The APM segments have byte granularity and their bases
+	 * and limits are set at run time.
+	 */
+	.quad 0x00409a0000000000	/* 0xb8 APM CS    code */
+	.quad 0x00009a0000000000	/* 0xc0 APM CS 16 code (16 bit) */
+	.quad 0x0040920000000000	/* 0xc8 APM DS    data */
+
+	.quad 0x0000920000000000	/* 0xd0 - ESPFIX 16-bit SS */
+	.quad 0x0000000000000000	/* 0xd8 - unused */
+	.quad 0x0000000000000000	/* 0xe0 - unused */
+	.quad 0x0000000000000000	/* 0xe8 - unused */
+	.quad 0x0000000000000000	/* 0xf0 - unused */
+	.quad 0x0000000000000000	/* 0xf8 - GDT entry 31: double-fault TSS */
+
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
new file mode 100644
index 00000000000..14ec354bec9
--- /dev/null
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -0,0 +1,195 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/user.h>
+#include <linux/elfcore.h>
+#include <linux/mca.h>
+#include <linux/sched.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/pm.h>
+#include <linux/pci.h>
+#include <linux/apm_bios.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/mmx.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/nmi.h>
+#include <asm/ist.h>
+#include <asm/kdebug.h>
+
+extern void dump_thread(struct pt_regs *, struct user *);
+extern spinlock_t rtc_lock;
+
+/* This is definitely a GPL-only symbol */
+EXPORT_SYMBOL_GPL(cpu_gdt_table);
+
+#if defined(CONFIG_APM_MODULE)
+extern void machine_real_restart(unsigned char *, int);
+EXPORT_SYMBOL(machine_real_restart);
+extern void default_idle(void);
+EXPORT_SYMBOL(default_idle);
+#endif
+
+#ifdef CONFIG_SMP
+extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
+#endif
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+
+extern unsigned long cpu_khz;
+extern unsigned long get_cmos_time(void);
+
+/* platform dependent support */
+EXPORT_SYMBOL(boot_cpu_data);
+#ifdef CONFIG_DISCONTIGMEM
+EXPORT_SYMBOL(node_data);
+EXPORT_SYMBOL(physnode_map);
+#endif
+#ifdef CONFIG_X86_NUMAQ
+EXPORT_SYMBOL(xquad_portio);
+#endif
+EXPORT_SYMBOL(dump_thread);
+EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(ioremap_nocache);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(pm_idle);
+EXPORT_SYMBOL(pm_power_off);
+EXPORT_SYMBOL(get_cmos_time);
+EXPORT_SYMBOL(cpu_khz);
+EXPORT_SYMBOL(apm_info);
+
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__down_failed_trylock);
+EXPORT_SYMBOL(__up_wakeup);
+/* Networking helper routines. */
+EXPORT_SYMBOL(csum_partial_copy_generic);
+/* Delay loops */
+EXPORT_SYMBOL(__ndelay);
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__delay);
+EXPORT_SYMBOL(__const_udelay);
+
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strstr);
+
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__copy_from_user_ll);
+EXPORT_SYMBOL(__copy_to_user_ll);
+EXPORT_SYMBOL(strnlen_user);
+
+EXPORT_SYMBOL(dma_alloc_coherent);
+EXPORT_SYMBOL(dma_free_coherent);
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+#ifdef CONFIG_PCI_BIOS
+EXPORT_SYMBOL(pcibios_set_irq_routing);
+EXPORT_SYMBOL(pcibios_get_irq_routing_table);
+#endif
+
+#ifdef CONFIG_X86_USE_3DNOW
+EXPORT_SYMBOL(_mmx_memcpy);
+EXPORT_SYMBOL(mmx_clear_page);
+EXPORT_SYMBOL(mmx_copy_page);
+#endif
+
+#ifdef CONFIG_X86_HT
+EXPORT_SYMBOL(smp_num_siblings);
+EXPORT_SYMBOL(cpu_sibling_map);
+#endif
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_data);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(cpu_callout_map);
+EXPORT_SYMBOL(__write_lock_failed);
+EXPORT_SYMBOL(__read_lock_failed);
+
+/* Global SMP stuff */
+EXPORT_SYMBOL(smp_call_function);
+
+/* TLB flushing */
+EXPORT_SYMBOL(flush_tlb_page);
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+#endif
+
+#ifdef CONFIG_MCA
+EXPORT_SYMBOL(machine_id);
+#endif
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(get_wchan);
+
+EXPORT_SYMBOL(rtc_lock);
+
+EXPORT_SYMBOL_GPL(set_nmi_callback);
+EXPORT_SYMBOL_GPL(unset_nmi_callback);
+
+#undef memcmp
+extern int memcmp(const void *,const void *,__kernel_size_t);
+EXPORT_SYMBOL(memcmp);
+
+EXPORT_SYMBOL(register_die_notifier);
+#ifdef CONFIG_HAVE_DEC_LOCK
+EXPORT_SYMBOL(_atomic_dec_and_lock);
+#endif
+
+EXPORT_SYMBOL(__PAGE_KERNEL);
+
+#ifdef CONFIG_HIGHMEM
+EXPORT_SYMBOL(kmap);
+EXPORT_SYMBOL(kunmap);
+EXPORT_SYMBOL(kmap_atomic);
+EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_to_page);
+#endif
+
+#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
+EXPORT_SYMBOL(ist_info);
+#endif
+
+EXPORT_SYMBOL(csum_partial);
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c
new file mode 100644
index 00000000000..c55e037f08f
--- /dev/null
+++ b/arch/i386/kernel/i387.c
@@ -0,0 +1,555 @@
+/*
+ *  linux/arch/i386/kernel/i387.c
+ *
+ *  Copyright (C) 1994 Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *  General FPU state handling cleanups
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/math_emu.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_MATH_EMULATION
+#define HAVE_HWFP (boot_cpu_data.hard_math)
+#else
+#define HAVE_HWFP 1
+#endif
+
+static unsigned long mxcsr_feature_mask = 0xffffffff;
+
+void mxcsr_feature_mask_init(void)
+{
+	unsigned long mask = 0;
+	clts();
+	if (cpu_has_fxsr) {
+		memset(&current->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
+		asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); 
+		mask = current->thread.i387.fxsave.mxcsr_mask;
+		if (mask == 0) mask = 0x0000ffbf;
+	} 
+	mxcsr_feature_mask &= mask;
+	stts();
+}
+
+/*
+ * The _current_ task is using the FPU for the first time
+ * so initialize it and set the mxcsr to its default
+ * value at reset if we support XMM instructions and then
+ * remeber the current task has used the FPU.
+ */
+void init_fpu(struct task_struct *tsk)
+{
+	if (cpu_has_fxsr) {
+		memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
+		tsk->thread.i387.fxsave.cwd = 0x37f;
+		if (cpu_has_xmm)
+			tsk->thread.i387.fxsave.mxcsr = 0x1f80;
+	} else {
+		memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct));
+		tsk->thread.i387.fsave.cwd = 0xffff037fu;
+		tsk->thread.i387.fsave.swd = 0xffff0000u;
+		tsk->thread.i387.fsave.twd = 0xffffffffu;
+		tsk->thread.i387.fsave.fos = 0xffff0000u;
+	}
+	/* only the device not available exception or ptrace can call init_fpu */
+	set_stopped_child_used_math(tsk);
+}
+
+/*
+ * FPU lazy state save handling.
+ */
+
+void kernel_fpu_begin(void)
+{
+	struct thread_info *thread = current_thread_info();
+
+	preempt_disable();
+	if (thread->status & TS_USEDFPU) {
+		__save_init_fpu(thread->task);
+		return;
+	}
+	clts();
+}
+
+void restore_fpu( struct task_struct *tsk )
+{
+	if ( cpu_has_fxsr ) {
+		asm volatile( "fxrstor %0"
+			      : : "m" (tsk->thread.i387.fxsave) );
+	} else {
+		asm volatile( "frstor %0"
+			      : : "m" (tsk->thread.i387.fsave) );
+	}
+}
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
+{
+	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+ 
+	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
+        tmp = ~twd;
+        tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+        /* and move the valid bits to the lower byte. */
+        tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+        tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+        tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+        return tmp;
+}
+
+static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
+{
+	struct _fpxreg *st = NULL;
+	unsigned long tos = (fxsave->swd >> 11) & 7;
+	unsigned long twd = (unsigned long) fxsave->twd;
+	unsigned long tag;
+	unsigned long ret = 0xffff0000u;
+	int i;
+
+#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
+
+	for ( i = 0 ; i < 8 ; i++ ) {
+		if ( twd & 0x1 ) {
+			st = FPREG_ADDR( fxsave, (i - tos) & 7 );
+
+			switch ( st->exponent & 0x7fff ) {
+			case 0x7fff:
+				tag = 2;		/* Special */
+				break;
+			case 0x0000:
+				if ( !st->significand[0] &&
+				     !st->significand[1] &&
+				     !st->significand[2] &&
+				     !st->significand[3] ) {
+					tag = 1;	/* Zero */
+				} else {
+					tag = 2;	/* Special */
+				}
+				break;
+			default:
+				if ( st->significand[3] & 0x8000 ) {
+					tag = 0;	/* Valid */
+				} else {
+					tag = 2;	/* Special */
+				}
+				break;
+			}
+		} else {
+			tag = 3;			/* Empty */
+		}
+		ret |= (tag << (2 * i));
+		twd = twd >> 1;
+	}
+	return ret;
+}
+
+/*
+ * FPU state interaction.
+ */
+
+unsigned short get_fpu_cwd( struct task_struct *tsk )
+{
+	if ( cpu_has_fxsr ) {
+		return tsk->thread.i387.fxsave.cwd;
+	} else {
+		return (unsigned short)tsk->thread.i387.fsave.cwd;
+	}
+}
+
+unsigned short get_fpu_swd( struct task_struct *tsk )
+{
+	if ( cpu_has_fxsr ) {
+		return tsk->thread.i387.fxsave.swd;
+	} else {
+		return (unsigned short)tsk->thread.i387.fsave.swd;
+	}
+}
+
+#if 0
+unsigned short get_fpu_twd( struct task_struct *tsk )
+{
+	if ( cpu_has_fxsr ) {
+		return tsk->thread.i387.fxsave.twd;
+	} else {
+		return (unsigned short)tsk->thread.i387.fsave.twd;
+	}
+}
+#endif  /*  0  */
+
+unsigned short get_fpu_mxcsr( struct task_struct *tsk )
+{
+	if ( cpu_has_xmm ) {
+		return tsk->thread.i387.fxsave.mxcsr;
+	} else {
+		return 0x1f80;
+	}
+}
+
+#if 0
+
+void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
+{
+	if ( cpu_has_fxsr ) {
+		tsk->thread.i387.fxsave.cwd = cwd;
+	} else {
+		tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u);
+	}
+}
+
+void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
+{
+	if ( cpu_has_fxsr ) {
+		tsk->thread.i387.fxsave.swd = swd;
+	} else {
+		tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u);
+	}
+}
+
+void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
+{
+	if ( cpu_has_fxsr ) {
+		tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
+	} else {
+		tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u);
+	}
+}
+
+#endif  /*  0  */
+
+/*
+ * FXSR floating point environment conversions.
+ */
+
+static int convert_fxsr_to_user( struct _fpstate __user *buf,
+					struct i387_fxsave_struct *fxsave )
+{
+	unsigned long env[7];
+	struct _fpreg __user *to;
+	struct _fpxreg *from;
+	int i;
+
+	env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
+	env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
+	env[2] = twd_fxsr_to_i387(fxsave);
+	env[3] = fxsave->fip;
+	env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
+	env[5] = fxsave->foo;
+	env[6] = fxsave->fos;
+
+	if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
+		return 1;
+
+	to = &buf->_st[0];
+	from = (struct _fpxreg *) &fxsave->st_space[0];
+	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+		unsigned long __user *t = (unsigned long __user *)to;
+		unsigned long *f = (unsigned long *)from;
+
+		if (__put_user(*f, t) ||
+				__put_user(*(f + 1), t + 1) ||
+				__put_user(from->exponent, &to->exponent))
+			return 1;
+	}
+	return 0;
+}
+
+static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
+					  struct _fpstate __user *buf )
+{
+	unsigned long env[7];
+	struct _fpxreg *to;
+	struct _fpreg __user *from;
+	int i;
+
+	if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
+		return 1;
+
+	fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+	fxsave->swd = (unsigned short)(env[1] & 0xffff);
+	fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
+	fxsave->fip = env[3];
+	fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
+	fxsave->fcs = (env[4] & 0xffff);
+	fxsave->foo = env[5];
+	fxsave->fos = env[6];
+
+	to = (struct _fpxreg *) &fxsave->st_space[0];
+	from = &buf->_st[0];
+	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+		unsigned long *t = (unsigned long *)to;
+		unsigned long __user *f = (unsigned long __user *)from;
+
+		if (__get_user(*t, f) ||
+				__get_user(*(t + 1), f + 1) ||
+				__get_user(to->exponent, &from->exponent))
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387_fsave( struct _fpstate __user *buf )
+{
+	struct task_struct *tsk = current;
+
+	unlazy_fpu( tsk );
+	tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
+	if ( __copy_to_user( buf, &tsk->thread.i387.fsave,
+			     sizeof(struct i387_fsave_struct) ) )
+		return -1;
+	return 1;
+}
+
+static int save_i387_fxsave( struct _fpstate __user *buf )
+{
+	struct task_struct *tsk = current;
+	int err = 0;
+
+	unlazy_fpu( tsk );
+
+	if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) )
+		return -1;
+
+	err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status );
+	err |= __put_user( X86_FXSR_MAGIC, &buf->magic );
+	if ( err )
+		return -1;
+
+	if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+			     sizeof(struct i387_fxsave_struct) ) )
+		return -1;
+	return 1;
+}
+
+int save_i387( struct _fpstate __user *buf )
+{
+	if ( !used_math() )
+		return 0;
+
+	/* This will cause a "finit" to be triggered by the next
+	 * attempted FPU operation by the 'current' process.
+	 */
+	clear_used_math();
+
+	if ( HAVE_HWFP ) {
+		if ( cpu_has_fxsr ) {
+			return save_i387_fxsave( buf );
+		} else {
+			return save_i387_fsave( buf );
+		}
+	} else {
+		return save_i387_soft( &current->thread.i387.soft, buf );
+	}
+}
+
+static inline int restore_i387_fsave( struct _fpstate __user *buf )
+{
+	struct task_struct *tsk = current;
+	clear_fpu( tsk );
+	return __copy_from_user( &tsk->thread.i387.fsave, buf,
+				 sizeof(struct i387_fsave_struct) );
+}
+
+static int restore_i387_fxsave( struct _fpstate __user *buf )
+{
+	int err;
+	struct task_struct *tsk = current;
+	clear_fpu( tsk );
+	err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
+				sizeof(struct i387_fxsave_struct) );
+	/* mxcsr reserved bits must be masked to zero for security reasons */
+	tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+	return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
+}
+
+int restore_i387( struct _fpstate __user *buf )
+{
+	int err;
+
+	if ( HAVE_HWFP ) {
+		if ( cpu_has_fxsr ) {
+			err = restore_i387_fxsave( buf );
+		} else {
+			err = restore_i387_fsave( buf );
+		}
+	} else {
+		err = restore_i387_soft( &current->thread.i387.soft, buf );
+	}
+	set_used_math();
+	return err;
+}
+
+/*
+ * ptrace request handlers.
+ */
+
+static inline int get_fpregs_fsave( struct user_i387_struct __user *buf,
+				    struct task_struct *tsk )
+{
+	return __copy_to_user( buf, &tsk->thread.i387.fsave,
+			       sizeof(struct user_i387_struct) );
+}
+
+static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf,
+				     struct task_struct *tsk )
+{
+	return convert_fxsr_to_user( (struct _fpstate __user *)buf,
+				     &tsk->thread.i387.fxsave );
+}
+
+int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk )
+{
+	if ( HAVE_HWFP ) {
+		if ( cpu_has_fxsr ) {
+			return get_fpregs_fxsave( buf, tsk );
+		} else {
+			return get_fpregs_fsave( buf, tsk );
+		}
+	} else {
+		return save_i387_soft( &tsk->thread.i387.soft,
+				       (struct _fpstate __user *)buf );
+	}
+}
+
+static inline int set_fpregs_fsave( struct task_struct *tsk,
+				    struct user_i387_struct __user *buf )
+{
+	return __copy_from_user( &tsk->thread.i387.fsave, buf,
+				 sizeof(struct user_i387_struct) );
+}
+
+static inline int set_fpregs_fxsave( struct task_struct *tsk,
+				     struct user_i387_struct __user *buf )
+{
+	return convert_fxsr_from_user( &tsk->thread.i387.fxsave,
+				       (struct _fpstate __user *)buf );
+}
+
+int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf )
+{
+	if ( HAVE_HWFP ) {
+		if ( cpu_has_fxsr ) {
+			return set_fpregs_fxsave( tsk, buf );
+		} else {
+			return set_fpregs_fsave( tsk, buf );
+		}
+	} else {
+		return restore_i387_soft( &tsk->thread.i387.soft,
+					  (struct _fpstate __user *)buf );
+	}
+}
+
+int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk )
+{
+	if ( cpu_has_fxsr ) {
+		if (__copy_to_user( buf, &tsk->thread.i387.fxsave,
+				    sizeof(struct user_fxsr_struct) ))
+			return -EFAULT;
+		return 0;
+	} else {
+		return -EIO;
+	}
+}
+
+int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf )
+{
+	int ret = 0;
+
+	if ( cpu_has_fxsr ) {
+		if (__copy_from_user( &tsk->thread.i387.fxsave, buf,
+				  sizeof(struct user_fxsr_struct) ))
+			ret = -EFAULT;
+		/* mxcsr reserved bits must be masked to zero for security reasons */
+		tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+	} else {
+		ret = -EIO;
+	}
+	return ret;
+}
+
+/*
+ * FPU state for core dumps.
+ */
+
+static inline void copy_fpu_fsave( struct task_struct *tsk,
+				   struct user_i387_struct *fpu )
+{
+	memcpy( fpu, &tsk->thread.i387.fsave,
+		sizeof(struct user_i387_struct) );
+}
+
+static inline void copy_fpu_fxsave( struct task_struct *tsk,
+				   struct user_i387_struct *fpu )
+{
+	unsigned short *to;
+	unsigned short *from;
+	int i;
+
+	memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) );
+
+	to = (unsigned short *)&fpu->st_space[0];
+	from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
+	for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
+		memcpy( to, from, 5 * sizeof(unsigned short) );
+	}
+}
+
+int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
+{
+	int fpvalid;
+	struct task_struct *tsk = current;
+
+	fpvalid = !!used_math();
+	if ( fpvalid ) {
+		unlazy_fpu( tsk );
+		if ( cpu_has_fxsr ) {
+			copy_fpu_fxsave( tsk, fpu );
+		} else {
+			copy_fpu_fsave( tsk, fpu );
+		}
+	}
+
+	return fpvalid;
+}
+
+int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
+{
+	int fpvalid = !!tsk_used_math(tsk);
+
+	if (fpvalid) {
+		if (tsk == current)
+			unlazy_fpu(tsk);
+		if (cpu_has_fxsr)
+			copy_fpu_fxsave(tsk, fpu);
+		else
+			copy_fpu_fsave(tsk, fpu);
+	}
+	return fpvalid;
+}
+
+int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu)
+{
+	int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr;
+
+	if (fpvalid) {
+		if (tsk == current)
+		       unlazy_fpu(tsk);
+		memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu));
+	}
+	return fpvalid;
+}
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
new file mode 100644
index 00000000000..560bef1afb3
--- /dev/null
+++ b/arch/i386/kernel/i8259.c
@@ -0,0 +1,429 @@
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+#include <linux/bitops.h>
+
+#include <asm/8253pit.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/timer.h>
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+#include <asm/apic.h>
+#include <asm/arch_hooks.h>
+#include <asm/i8259.h>
+
+#include <linux/irq.h>
+
+#include <io_ports.h>
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ * this file should become arch/i386/kernel/irq.c when the old irq.c
+ * moves to arch independent land
+ */
+
+DEFINE_SPINLOCK(i8259A_lock);
+
+static void end_8259A_irq (unsigned int irq)
+{
+	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
+							irq_desc[irq].action)
+		enable_8259A_irq(irq);
+}
+
+#define shutdown_8259A_irq	disable_8259A_irq
+
+static void mask_and_ack_8259A(unsigned int);
+
+unsigned int startup_8259A_irq(unsigned int irq)
+{ 
+	enable_8259A_irq(irq);
+	return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type i8259A_irq_type = {
+	.typename = "XT-PIC",
+	.startup = startup_8259A_irq,
+	.shutdown = shutdown_8259A_irq,
+	.enable = enable_8259A_irq,
+	.disable = disable_8259A_irq,
+	.ack = mask_and_ack_8259A,
+	.end = end_8259A_irq,
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+unsigned int cached_irq_mask = 0xffff;
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not really connected to any IO-APIC pin,
+ * it's fed to the master 8259A's IR0 line only.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs;
+
+void disable_8259A_irq(unsigned int irq)
+{
+	unsigned int mask = 1 << irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	cached_irq_mask |= mask;
+	if (irq & 8)
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+	else
+		outb(cached_master_mask, PIC_MASTER_IMR);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void enable_8259A_irq(unsigned int irq)
+{
+	unsigned int mask = ~(1 << irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	cached_irq_mask &= mask;
+	if (irq & 8)
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+	else
+		outb(cached_master_mask, PIC_MASTER_IMR);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+	unsigned int mask = 1<<irq;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	if (irq < 8)
+		ret = inb(PIC_MASTER_CMD) & mask;
+	else
+		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	return ret;
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+	disable_irq_nosync(irq);
+	io_apic_irqs &= ~(1<<irq);
+	irq_desc[irq].handler = &i8259A_irq_type;
+	enable_irq(irq);
+}
+
+/*
+ * This function assumes to be called rarely. Switching between
+ * 8259A registers is slow.
+ * This has to be protected by the irq controller spinlock
+ * before being called.
+ */
+static inline int i8259A_irq_real(unsigned int irq)
+{
+	int value;
+	int irqmask = 1<<irq;
+
+	if (irq < 8) {
+		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
+		value = inb(PIC_MASTER_CMD) & irqmask;
+		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
+		return value;
+	}
+	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
+	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
+	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
+	return value;
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+static void mask_and_ack_8259A(unsigned int irq)
+{
+	unsigned int irqmask = 1 << irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	/*
+	 * Lightweight spurious IRQ detection. We do not want
+	 * to overdo spurious IRQ handling - it's usually a sign
+	 * of hardware problems, so we only do the checks we can
+	 * do without slowing down good hardware unnecesserily.
+	 *
+	 * Note that IRQ7 and IRQ15 (the two spurious IRQs
+	 * usually resulting from the 8259A-1|2 PICs) occur
+	 * even if the IRQ is masked in the 8259A. Thus we
+	 * can check spurious 8259A IRQs without doing the
+	 * quite slow i8259A_irq_real() call for every IRQ.
+	 * This does not cover 100% of spurious interrupts,
+	 * but should be enough to warn the user that there
+	 * is something bad going on ...
+	 */
+	if (cached_irq_mask & irqmask)
+		goto spurious_8259A_irq;
+	cached_irq_mask |= irqmask;
+
+handle_real_irq:
+	if (irq & 8) {
+		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+		outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
+		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
+	} else {
+		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_master_mask, PIC_MASTER_IMR);
+		outb(0x60+irq,PIC_MASTER_CMD);	/* 'Specific EOI to master */
+	}
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+	return;
+
+spurious_8259A_irq:
+	/*
+	 * this is the slow path - should happen rarely.
+	 */
+	if (i8259A_irq_real(irq))
+		/*
+		 * oops, the IRQ _is_ in service according to the
+		 * 8259A - not spurious, go handle it.
+		 */
+		goto handle_real_irq;
+
+	{
+		static int spurious_irq_mask;
+		/*
+		 * At this point we can be sure the IRQ is spurious,
+		 * lets ACK and report it. [once per IRQ]
+		 */
+		if (!(spurious_irq_mask & irqmask)) {
+			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
+			spurious_irq_mask |= irqmask;
+		}
+		atomic_inc(&irq_err_count);
+		/*
+		 * Theoretically we do not have to handle this IRQ,
+		 * but in Linux this does not cause problems and is
+		 * simpler for us.
+		 */
+		goto handle_real_irq;
+	}
+}
+
+static char irq_trigger[2];
+/**
+ * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
+ */
+static void restore_ELCR(char *trigger)
+{
+	outb(trigger[0], 0x4d0);
+	outb(trigger[1], 0x4d1);
+}
+
+static void save_ELCR(char *trigger)
+{
+	/* IRQ 0,1,2,8,13 are marked as reserved */
+	trigger[0] = inb(0x4d0) & 0xF8;
+	trigger[1] = inb(0x4d1) & 0xDE;
+}
+
+static int i8259A_resume(struct sys_device *dev)
+{
+	init_8259A(0);
+	restore_ELCR(irq_trigger);
+	return 0;
+}
+
+static int i8259A_suspend(struct sys_device *dev, u32 state)
+{
+	save_ELCR(irq_trigger);
+	return 0;
+}
+
+static struct sysdev_class i8259_sysdev_class = {
+	set_kset_name("i8259"),
+	.suspend = i8259A_suspend,
+	.resume = i8259A_resume,
+};
+
+static struct sys_device device_i8259A = {
+	.id	= 0,
+	.cls	= &i8259_sysdev_class,
+};
+
+static int __init i8259A_init_sysfs(void)
+{
+	int error = sysdev_class_register(&i8259_sysdev_class);
+	if (!error)
+		error = sysdev_register(&device_i8259A);
+	return error;
+}
+
+device_initcall(i8259A_init_sysfs);
+
+void init_8259A(int auto_eoi)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+
+	/*
+	 * outb_p - this has to work on a wide range of PC hardware.
+	 */
+	outb_p(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
+	outb_p(0x20 + 0, PIC_MASTER_IMR);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+	outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);	/* 8259A-1 (the master) has a slave on IR2 */
+	if (auto_eoi)	/* master does Auto EOI */
+		outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+	else		/* master expects normal EOI */
+		outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+
+	outb_p(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
+	outb_p(0x20 + 8, PIC_SLAVE_IMR);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+	outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR);	/* 8259A-2 is a slave on master's IR2 */
+	outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
+	if (auto_eoi)
+		/*
+		 * in AEOI mode we just have to mask the interrupt
+		 * when acking.
+		 */
+		i8259A_irq_type.ack = disable_8259A_irq;
+	else
+		i8259A_irq_type.ack = mask_and_ack_8259A;
+
+	udelay(100);		/* wait for 8259A to initialize */
+
+	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+/*
+ * Note that on a 486, we don't want to do a SIGFPE on an irq13
+ * as the irq is unreliable, and exception 16 works correctly
+ * (ie as explained in the intel literature). On a 386, you
+ * can't use exception 16 due to bad IBM design, so we have to
+ * rely on the less exact irq13.
+ *
+ * Careful.. Not only is IRQ13 unreliable, but it is also
+ * leads to races. IBM designers who came up with it should
+ * be shot.
+ */
+ 
+
+static irqreturn_t math_error_irq(int cpl, void *dev_id, struct pt_regs *regs)
+{
+	extern void math_error(void __user *);
+	outb(0,0xF0);
+	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
+		return IRQ_NONE;
+	math_error((void __user *)regs->eip);
+	return IRQ_HANDLED;
+}
+
+/*
+ * New motherboards sometimes make IRQ 13 be a PCI interrupt,
+ * so allow interrupt sharing.
+ */
+static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL };
+
+void __init init_ISA_irqs (void)
+{
+	int i;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	init_bsp_APIC();
+#endif
+	init_8259A(0);
+
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc[i].status = IRQ_DISABLED;
+		irq_desc[i].action = NULL;
+		irq_desc[i].depth = 1;
+
+		if (i < 16) {
+			/*
+			 * 16 old-style INTA-cycle interrupts:
+			 */
+			irq_desc[i].handler = &i8259A_irq_type;
+		} else {
+			/*
+			 * 'high' PCI IRQs filled in on demand
+			 */
+			irq_desc[i].handler = &no_irq_type;
+		}
+	}
+}
+
+void __init init_IRQ(void)
+{
+	int i;
+
+	/* all the set up before the call gates are initialised */
+	pre_intr_init_hook();
+
+	/*
+	 * Cover the whole vector space, no vector can escape
+	 * us. (some of these will be overridden and become
+	 * 'special' SMP interrupts)
+	 */
+	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+		int vector = FIRST_EXTERNAL_VECTOR + i;
+		if (i >= NR_IRQS)
+			break;
+		if (vector != SYSCALL_VECTOR) 
+			set_intr_gate(vector, interrupt[i]);
+	}
+
+	/* setup after call gates are initialised (usually add in
+	 * the architecture specific gates)
+	 */
+	intr_init_hook();
+
+	/*
+	 * Set the clock to HZ Hz, we already have a valid
+	 * vector now:
+	 */
+	setup_pit_timer();
+
+	/*
+	 * External FPU? Set up irq13 if so, for
+	 * original braindamaged IBM FERR coupling.
+	 */
+	if (boot_cpu_data.hard_math && !cpu_has_fpu)
+		setup_irq(FPU_IRQ, &fpu_irq);
+
+	irq_ctx_init(smp_processor_id());
+}
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
new file mode 100644
index 00000000000..9caa8e8db80
--- /dev/null
+++ b/arch/i386/kernel/init_task.c
@@ -0,0 +1,46 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union 
+	__attribute__((__section__(".data.init_task"))) =
+		{ INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's.
+ */ 
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
+
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
new file mode 100644
index 00000000000..9c1350e811d
--- /dev/null
+++ b/arch/i386/kernel/io_apic.c
@@ -0,0 +1,2545 @@
+/*
+ *	Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ *	Many thanks to Stig Venaas for trying out countless experimental
+ *	patches and reporting/debugging problems patiently!
+ *
+ *	(c) 1999, Multiple IO-APIC support, developed by
+ *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ *	further tested and cleaned up by Zach Brown <zab@redhat.com>
+ *	and Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+ *					thanks to Eric Gilmore
+ *					and Rolf G. Tews
+ *					for testing these extensively
+ *	Paul Diefenbaugh	:	Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/config.h>
+#include <linux/smp_lock.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+
+#include <linux/sysdev.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/timer.h>
+
+#include <mach_apic.h>
+
+#include "io_ports.h"
+
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+atomic_t irq_mis_count;
+
+static DEFINE_SPINLOCK(ioapic_lock);
+
+/*
+ *	Is the SiS APIC rmw bug present ?
+ *	-1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+	int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
+#ifdef CONFIG_PCI_MSI
+#define vector_to_irq(vector) 	\
+	(platform_legacy_irq(vector) ? vector : vector_irq[vector])
+#else
+#define vector_to_irq(vector)	(vector)
+#endif
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+	static int first_free_entry = NR_IRQS;
+	struct irq_pin_list *entry = irq_2_pin + irq;
+
+	while (entry->next)
+		entry = irq_2_pin + entry->next;
+
+	if (entry->pin != -1) {
+		entry->next = first_free_entry;
+		entry = irq_2_pin + entry->next;
+		if (++first_free_entry >= PIN_MAP_SIZE)
+			panic("io_apic.c: whoops");
+	}
+	entry->apic = apic;
+	entry->pin = pin;
+}
+
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+				      int oldapic, int oldpin,
+				      int newapic, int newpin)
+{
+	struct irq_pin_list *entry = irq_2_pin + irq;
+
+	while (1) {
+		if (entry->apic == oldapic && entry->pin == oldpin) {
+			entry->apic = newapic;
+			entry->pin = newpin;
+		}
+		if (!entry->next)
+			break;
+		entry = irq_2_pin + entry->next;
+	}
+}
+
+static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
+{
+	struct irq_pin_list *entry = irq_2_pin + irq;
+	unsigned int pin, reg;
+
+	for (;;) {
+		pin = entry->pin;
+		if (pin == -1)
+			break;
+		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+		reg &= ~disable;
+		reg |= enable;
+		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = irq_2_pin + entry->next;
+	}
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq (unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, 0x00010000, 0);
+}
+
+/* mask = 0 */
+static void __unmask_IO_APIC_irq (unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, 0, 0x00010000);
+}
+
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
+}
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__mask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__unmask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned long flags;
+	
+	/* Check delivery_mode to be sure we're not clearing an SMI pin */
+	spin_lock_irqsave(&ioapic_lock, flags);
+	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+	*(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+	if (entry.delivery_mode == dest_SMI)
+		return;
+
+	/*
+	 * Disable it in the IO-APIC irq-routing table:
+	 */
+	memset(&entry, 0, sizeof(entry));
+	entry.mask = 1;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			clear_IO_APIC_pin(apic, pin);
+}
+
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
+{
+	unsigned long flags;
+	int pin;
+	struct irq_pin_list *entry = irq_2_pin + irq;
+	unsigned int apicid_value;
+	
+	apicid_value = cpu_mask_to_apicid(cpumask);
+	/* Prepare to do the io_apic_write */
+	apicid_value = apicid_value << 24;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	for (;;) {
+		pin = entry->pin;
+		if (pin == -1)
+			break;
+		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
+		if (!entry->next)
+			break;
+		entry = irq_2_pin + entry->next;
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#if defined(CONFIG_IRQBALANCE)
+# include <asm/processor.h>	/* kernel_thread() */
+# include <linux/kernel_stat.h>	/* kstat */
+# include <linux/slab.h>		/* kmalloc() */
+# include <linux/timer.h>	/* time_after() */
+ 
+# ifdef CONFIG_BALANCED_IRQ_DEBUG
+#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
+#  define Dprintk(x...) do { TDprintk(x); } while (0)
+# else
+#  define TDprintk(x...) 
+#  define Dprintk(x...) 
+# endif
+
+cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
+
+#define IRQBALANCE_CHECK_ARCH -999
+static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
+static int physical_balance = 0;
+
+static struct irq_cpu_info {
+	unsigned long * last_irq;
+	unsigned long * irq_delta;
+	unsigned long irq;
+} irq_cpu_data[NR_CPUS];
+
+#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
+#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
+#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
+
+#define IDLE_ENOUGH(cpu,now) \
+	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
+
+#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
+
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
+
+#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
+#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
+#define BALANCED_IRQ_LESS_DELTA		(HZ)
+
+static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
+
+static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
+			unsigned long now, int direction)
+{
+	int search_idle = 1;
+	int cpu = curr_cpu;
+
+	goto inside;
+
+	do {
+		if (unlikely(cpu == curr_cpu))
+			search_idle = 0;
+inside:
+		if (direction == 1) {
+			cpu++;
+			if (cpu >= NR_CPUS)
+				cpu = 0;
+		} else {
+			cpu--;
+			if (cpu == -1)
+				cpu = NR_CPUS-1;
+		}
+	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
+			(search_idle && !IDLE_ENOUGH(cpu,now)));
+
+	return cpu;
+}
+
+static inline void balance_irq(int cpu, int irq)
+{
+	unsigned long now = jiffies;
+	cpumask_t allowed_mask;
+	unsigned int new_cpu;
+		
+	if (irqbalance_disabled)
+		return; 
+
+	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
+	new_cpu = move(cpu, allowed_mask, now, 1);
+	if (cpu != new_cpu) {
+		irq_desc_t *desc = irq_desc + irq;
+		unsigned long flags;
+
+		spin_lock_irqsave(&desc->lock, flags);
+		pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
+		spin_unlock_irqrestore(&desc->lock, flags);
+	}
+}
+
+static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
+{
+	int i, j;
+	Dprintk("Rotating IRQs among CPUs.\n");
+	for (i = 0; i < NR_CPUS; i++) {
+		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+			if (!irq_desc[j].action)
+				continue;
+			/* Is it a significant load ?  */
+			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
+						useful_load_threshold)
+				continue;
+			balance_irq(i, j);
+		}
+	}
+	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
+	return;
+}
+
+static void do_irq_balance(void)
+{
+	int i, j;
+	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
+	unsigned long move_this_load = 0;
+	int max_loaded = 0, min_loaded = 0;
+	int load;
+	unsigned long useful_load_threshold = balanced_irq_interval + 10;
+	int selected_irq;
+	int tmp_loaded, first_attempt = 1;
+	unsigned long tmp_cpu_irq;
+	unsigned long imbalance = 0;
+	cpumask_t allowed_mask, target_cpu_mask, tmp;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		int package_index;
+		CPU_IRQ(i) = 0;
+		if (!cpu_online(i))
+			continue;
+		package_index = CPU_TO_PACKAGEINDEX(i);
+		for (j = 0; j < NR_IRQS; j++) {
+			unsigned long value_now, delta;
+			/* Is this an active IRQ? */
+			if (!irq_desc[j].action)
+				continue;
+			if ( package_index == i )
+				IRQ_DELTA(package_index,j) = 0;
+			/* Determine the total count per processor per IRQ */
+			value_now = (unsigned long) kstat_cpu(i).irqs[j];
+
+			/* Determine the activity per processor per IRQ */
+			delta = value_now - LAST_CPU_IRQ(i,j);
+
+			/* Update last_cpu_irq[][] for the next time */
+			LAST_CPU_IRQ(i,j) = value_now;
+
+			/* Ignore IRQs whose rate is less than the clock */
+			if (delta < useful_load_threshold)
+				continue;
+			/* update the load for the processor or package total */
+			IRQ_DELTA(package_index,j) += delta;
+
+			/* Keep track of the higher numbered sibling as well */
+			if (i != package_index)
+				CPU_IRQ(i) += delta;
+			/*
+			 * We have sibling A and sibling B in the package
+			 *
+			 * cpu_irq[A] = load for cpu A + load for cpu B
+			 * cpu_irq[B] = load for cpu B
+			 */
+			CPU_IRQ(package_index) += delta;
+		}
+	}
+	/* Find the least loaded processor package */
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		if (i != CPU_TO_PACKAGEINDEX(i))
+			continue;
+		if (min_cpu_irq > CPU_IRQ(i)) {
+			min_cpu_irq = CPU_IRQ(i);
+			min_loaded = i;
+		}
+	}
+	max_cpu_irq = ULONG_MAX;
+
+tryanothercpu:
+	/* Look for heaviest loaded processor.
+	 * We may come back to get the next heaviest loaded processor.
+	 * Skip processors with trivial loads.
+	 */
+	tmp_cpu_irq = 0;
+	tmp_loaded = -1;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		if (i != CPU_TO_PACKAGEINDEX(i))
+			continue;
+		if (max_cpu_irq <= CPU_IRQ(i)) 
+			continue;
+		if (tmp_cpu_irq < CPU_IRQ(i)) {
+			tmp_cpu_irq = CPU_IRQ(i);
+			tmp_loaded = i;
+		}
+	}
+
+	if (tmp_loaded == -1) {
+ 	 /* In the case of small number of heavy interrupt sources, 
+	  * loading some of the cpus too much. We use Ingo's original 
+	  * approach to rotate them around.
+	  */
+		if (!first_attempt && imbalance >= useful_load_threshold) {
+			rotate_irqs_among_cpus(useful_load_threshold);
+			return;
+		}
+		goto not_worth_the_effort;
+	}
+	
+	first_attempt = 0;		/* heaviest search */
+	max_cpu_irq = tmp_cpu_irq;	/* load */
+	max_loaded = tmp_loaded;	/* processor */
+	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
+	
+	Dprintk("max_loaded cpu = %d\n", max_loaded);
+	Dprintk("min_loaded cpu = %d\n", min_loaded);
+	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
+	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
+	Dprintk("load imbalance = %lu\n", imbalance);
+
+	/* if imbalance is less than approx 10% of max load, then
+	 * observe diminishing returns action. - quit
+	 */
+	if (imbalance < (max_cpu_irq >> 3)) {
+		Dprintk("Imbalance too trivial\n");
+		goto not_worth_the_effort;
+	}
+
+tryanotherirq:
+	/* if we select an IRQ to move that can't go where we want, then
+	 * see if there is another one to try.
+	 */
+	move_this_load = 0;
+	selected_irq = -1;
+	for (j = 0; j < NR_IRQS; j++) {
+		/* Is this an active IRQ? */
+		if (!irq_desc[j].action)
+			continue;
+		if (imbalance <= IRQ_DELTA(max_loaded,j))
+			continue;
+		/* Try to find the IRQ that is closest to the imbalance
+		 * without going over.
+		 */
+		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
+			move_this_load = IRQ_DELTA(max_loaded,j);
+			selected_irq = j;
+		}
+	}
+	if (selected_irq == -1) {
+		goto tryanothercpu;
+	}
+
+	imbalance = move_this_load;
+	
+	/* For physical_balance case, we accumlated both load
+	 * values in the one of the siblings cpu_irq[],
+	 * to use the same code for physical and logical processors
+	 * as much as possible. 
+	 *
+	 * NOTE: the cpu_irq[] array holds the sum of the load for
+	 * sibling A and sibling B in the slot for the lowest numbered
+	 * sibling (A), _AND_ the load for sibling B in the slot for
+	 * the higher numbered sibling.
+	 *
+	 * We seek the least loaded sibling by making the comparison
+	 * (A+B)/2 vs B
+	 */
+	load = CPU_IRQ(min_loaded) >> 1;
+	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+		if (load > CPU_IRQ(j)) {
+			/* This won't change cpu_sibling_map[min_loaded] */
+			load = CPU_IRQ(j);
+			min_loaded = j;
+		}
+	}
+
+	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
+	target_cpu_mask = cpumask_of_cpu(min_loaded);
+	cpus_and(tmp, target_cpu_mask, allowed_mask);
+
+	if (!cpus_empty(tmp)) {
+		irq_desc_t *desc = irq_desc + selected_irq;
+		unsigned long flags;
+
+		Dprintk("irq = %d moved to cpu = %d\n",
+				selected_irq, min_loaded);
+		/* mark for change destination */
+		spin_lock_irqsave(&desc->lock, flags);
+		pending_irq_balance_cpumask[selected_irq] =
+					cpumask_of_cpu(min_loaded);
+		spin_unlock_irqrestore(&desc->lock, flags);
+		/* Since we made a change, come back sooner to 
+		 * check for more variation.
+		 */
+		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
+		return;
+	}
+	goto tryanotherirq;
+
+not_worth_the_effort:
+	/*
+	 * if we did not find an IRQ to move, then adjust the time interval
+	 * upward
+	 */
+	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
+		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
+	Dprintk("IRQ worth rotating not found\n");
+	return;
+}
+
+static int balanced_irq(void *unused)
+{
+	int i;
+	unsigned long prev_balance_time = jiffies;
+	long time_remaining = balanced_irq_interval;
+
+	daemonize("kirqd");
+	
+	/* push everything to CPU 0 to give us a starting point.  */
+	for (i = 0 ; i < NR_IRQS ; i++) {
+		pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
+	}
+
+	for ( ; ; ) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		time_remaining = schedule_timeout(time_remaining);
+		try_to_freeze(PF_FREEZE);
+		if (time_after(jiffies,
+				prev_balance_time+balanced_irq_interval)) {
+			do_irq_balance();
+			prev_balance_time = jiffies;
+			time_remaining = balanced_irq_interval;
+		}
+	}
+	return 0;
+}
+
+static int __init balanced_irq_init(void)
+{
+	int i;
+	struct cpuinfo_x86 *c;
+	cpumask_t tmp;
+
+	cpus_shift_right(tmp, cpu_online_map, 2);
+        c = &boot_cpu_data;
+	/* When not overwritten by the command line ask subarchitecture. */
+	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
+		irqbalance_disabled = NO_BALANCE_IRQ;
+	if (irqbalance_disabled)
+		return 0;
+	
+	 /* disable irqbalance completely if there is only one processor online */
+	if (num_online_cpus() < 2) {
+		irqbalance_disabled = 1;
+		return 0;
+	}
+	/*
+	 * Enable physical balance only if more than 1 physical processor
+	 * is present
+	 */
+	if (smp_num_siblings > 1 && !cpus_empty(tmp))
+		physical_balance = 1;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
+			printk(KERN_ERR "balanced_irq_init: out of memory");
+			goto failed;
+		}
+		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
+		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
+	}
+	
+	printk(KERN_INFO "Starting balanced_irq\n");
+	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
+		return 0;
+	else 
+		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+failed:
+	for (i = 0; i < NR_CPUS; i++) {
+		if(irq_cpu_data[i].irq_delta)
+			kfree(irq_cpu_data[i].irq_delta);
+		if(irq_cpu_data[i].last_irq)
+			kfree(irq_cpu_data[i].last_irq);
+	}
+	return 0;
+}
+
+int __init irqbalance_disable(char *str)
+{
+	irqbalance_disabled = 1;
+	return 0;
+}
+
+__setup("noirqbalance", irqbalance_disable);
+
+static inline void move_irq(int irq)
+{
+	/* note - we hold the desc->lock */
+	if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
+		set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
+		cpus_clear(pending_irq_balance_cpumask[irq]);
+	}
+}
+
+late_initcall(balanced_irq_init);
+
+#else /* !CONFIG_IRQBALANCE */
+static inline void move_irq(int irq) { }
+#endif /* CONFIG_IRQBALANCE */
+
+#ifndef CONFIG_SMP
+void fastcall send_IPI_self(int vector)
+{
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write_around(APIC_ICR, cfg);
+}
+#endif /* !CONFIG_SMP */
+
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+int skip_ioapic_setup;
+
+static int __init ioapic_setup(char *str)
+{
+	skip_ioapic_setup = 1;
+	return 1;
+}
+
+__setup("noapic", ioapic_setup);
+
+static int __init ioapic_pirq_setup(char *str)
+{
+	int i, max;
+	int ints[MAX_PIRQS+1];
+
+	get_options(str, ARRAY_SIZE(ints), ints);
+
+	for (i = 0; i < MAX_PIRQS; i++)
+		pirq_entries[i] = -1;
+
+	pirqs_enabled = 1;
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"PIRQ redirection, working around broken MP-BIOS.\n");
+	max = MAX_PIRQS;
+	if (ints[0] < MAX_PIRQS)
+		max = ints[0];
+
+	for (i = 0; i < max; i++) {
+		apic_printk(APIC_VERBOSE, KERN_DEBUG
+				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+		/*
+		 * PIRQs are mapped upside down, usually.
+		 */
+		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+	}
+	return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int apic, int pin, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++)
+		if (mp_irqs[i].mpc_irqtype == type &&
+		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+		    mp_irqs[i].mpc_dstirq == pin)
+			return i;
+
+	return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int find_isa_irq_pin(int irq, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].mpc_srcbus;
+
+		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+		    ) &&
+		    (mp_irqs[i].mpc_irqtype == type) &&
+		    (mp_irqs[i].mpc_srcbusirq == irq))
+
+			return mp_irqs[i].mpc_dstirq;
+	}
+	return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+	int apic, i, best_guess = -1;
+
+	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
+		"slot:%d, pin:%d.\n", bus, slot, pin);
+	if (mp_bus_id_to_pci_bus[bus] == -1) {
+		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		return -1;
+	}
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].mpc_srcbus;
+
+		for (apic = 0; apic < nr_ioapics; apic++)
+			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+				break;
+
+		if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+		    !mp_irqs[i].mpc_irqtype &&
+		    (bus == lbus) &&
+		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+			if (!(apic || IO_APIC_IRQ(irq)))
+				continue;
+
+			if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+				return irq;
+			/*
+			 * Use the first all-but-pin matching entry as a
+			 * best-guess fuzzy result for broken mptables.
+			 */
+			if (best_guess < 0)
+				best_guess = irq;
+		}
+	}
+	return best_guess;
+}
+
+/*
+ * This function currently is only a helper for the i386 smp boot process where 
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+void __init setup_ioapic_dest(void)
+{
+	int pin, ioapic, irq, irq_entry;
+
+	if (skip_ioapic_setup == 1)
+		return;
+
+	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+			if (irq_entry == -1)
+				continue;
+			irq = pin_2_irq(irq_entry, ioapic, pin);
+			set_ioapic_affinity_irq(irq, TARGET_CPUS);
+		}
+
+	}
+}
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+	if (irq < 16) {
+		unsigned int port = 0x4d0 + (irq >> 3);
+		return (inb(port) >> (irq & 7)) & 1;
+	}
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"Broken MPtable reports ISA irq %d\n", irq);
+	return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx)	(0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)	(0)
+#define default_ISA_polarity(idx)	(0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)	(1)
+#define default_PCI_polarity(idx)	(1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)	(1)
+#define default_MCA_polarity(idx)	(0)
+
+/* NEC98 interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_NEC98_trigger(idx)     (0)
+#define default_NEC98_polarity(idx)    (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+	int bus = mp_irqs[idx].mpc_srcbus;
+	int polarity;
+
+	/*
+	 * Determine IRQ line polarity (high active or low active):
+	 */
+	switch (mp_irqs[idx].mpc_irqflag & 3)
+	{
+		case 0: /* conforms, ie. bus-type dependent polarity */
+		{
+			switch (mp_bus_id_to_type[bus])
+			{
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					polarity = default_ISA_polarity(idx);
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					polarity = default_EISA_polarity(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					polarity = default_PCI_polarity(idx);
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					polarity = default_MCA_polarity(idx);
+					break;
+				}
+				case MP_BUS_NEC98: /* NEC 98 pin */
+				{
+					polarity = default_NEC98_polarity(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					polarity = 1;
+					break;
+				}
+			}
+			break;
+		}
+		case 1: /* high active */
+		{
+			polarity = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+		case 3: /* low active */
+		{
+			polarity = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+	}
+	return polarity;
+}
+
+static int MPBIOS_trigger(int idx)
+{
+	int bus = mp_irqs[idx].mpc_srcbus;
+	int trigger;
+
+	/*
+	 * Determine IRQ trigger mode (edge or level sensitive):
+	 */
+	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+	{
+		case 0: /* conforms, ie. bus-type dependent */
+		{
+			switch (mp_bus_id_to_type[bus])
+			{
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					trigger = default_ISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					trigger = default_PCI_trigger(idx);
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					trigger = default_MCA_trigger(idx);
+					break;
+				}
+				case MP_BUS_NEC98: /* NEC 98 pin */
+				{
+					trigger = default_NEC98_trigger(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+			break;
+		}
+		case 1: /* edge */
+		{
+			trigger = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			trigger = 1;
+			break;
+		}
+		case 3: /* level */
+		{
+			trigger = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			trigger = 0;
+			break;
+		}
+	}
+	return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+	return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+	return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+	int irq, i;
+	int bus = mp_irqs[idx].mpc_srcbus;
+
+	/*
+	 * Debugging check, we are in big trouble if this message pops up!
+	 */
+	if (mp_irqs[idx].mpc_dstirq != pin)
+		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+	switch (mp_bus_id_to_type[bus])
+	{
+		case MP_BUS_ISA: /* ISA pin */
+		case MP_BUS_EISA:
+		case MP_BUS_MCA:
+		case MP_BUS_NEC98:
+		{
+			irq = mp_irqs[idx].mpc_srcbusirq;
+			break;
+		}
+		case MP_BUS_PCI: /* PCI pin */
+		{
+			/*
+			 * PCI IRQs are mapped in order
+			 */
+			i = irq = 0;
+			while (i < apic)
+				irq += nr_ioapic_registers[i++];
+			irq += pin;
+
+			/*
+			 * For MPS mode, so far only needed by ES7000 platform
+			 */
+			if (ioapic_renumber_irq)
+				irq = ioapic_renumber_irq(apic, irq);
+
+			break;
+		}
+		default:
+		{
+			printk(KERN_ERR "unknown bus type %d.\n",bus); 
+			irq = 0;
+			break;
+		}
+	}
+
+	/*
+	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
+	 */
+	if ((pin >= 16) && (pin <= 23)) {
+		if (pirq_entries[pin-16] != -1) {
+			if (!pirq_entries[pin-16]) {
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"disabling PIRQ%d\n", pin-16);
+			} else {
+				irq = pirq_entries[pin-16];
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"using PIRQ%d -> IRQ %d\n",
+						pin-16, irq);
+			}
+		}
+	}
+	return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	int apic, idx, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			idx = find_irq_entry(apic,pin,mp_INT);
+			if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+				return irq_trigger(idx);
+		}
+	}
+	/*
+	 * nonexistent IRQs are edge default
+	 */
+	return 0;
+}
+
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
+
+int assign_irq_vector(int irq)
+{
+	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+
+	BUG_ON(irq >= NR_IRQ_VECTORS);
+	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+		return IO_APIC_VECTOR(irq);
+next:
+	current_vector += 8;
+	if (current_vector == SYSCALL_VECTOR)
+		goto next;
+
+	if (current_vector >= FIRST_SYSTEM_VECTOR) {
+		offset++;
+		if (!(offset%8))
+			return -ENOSPC;
+		current_vector = FIRST_DEVICE_VECTOR + offset;
+	}
+
+	vector_irq[current_vector] = irq;
+	if (irq != AUTO_ASSIGN)
+		IO_APIC_VECTOR(irq) = current_vector;
+
+	return current_vector;
+}
+
+static struct hw_interrupt_type ioapic_level_type;
+static struct hw_interrupt_type ioapic_edge_type;
+
+#define IOAPIC_AUTO	-1
+#define IOAPIC_EDGE	0
+#define IOAPIC_LEVEL	1
+
+static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+{
+	if (use_pci_vector() && !platform_legacy_irq(irq)) {
+		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+				trigger == IOAPIC_LEVEL)
+			irq_desc[vector].handler = &ioapic_level_type;
+		else
+			irq_desc[vector].handler = &ioapic_edge_type;
+		set_intr_gate(vector, interrupt[vector]);
+	} else	{
+		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+				trigger == IOAPIC_LEVEL)
+			irq_desc[irq].handler = &ioapic_level_type;
+		else
+			irq_desc[irq].handler = &ioapic_edge_type;
+		set_intr_gate(vector, interrupt[irq]);
+	}
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+	struct IO_APIC_route_entry entry;
+	int apic, pin, idx, irq, first_notcon = 1, vector;
+	unsigned long flags;
+
+	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+		/*
+		 * add it to the IO-APIC irq-routing table:
+		 */
+		memset(&entry,0,sizeof(entry));
+
+		entry.delivery_mode = INT_DELIVERY_MODE;
+		entry.dest_mode = INT_DEST_MODE;
+		entry.mask = 0;				/* enable IRQ */
+		entry.dest.logical.logical_dest = 
+					cpu_mask_to_apicid(TARGET_CPUS);
+
+		idx = find_irq_entry(apic,pin,mp_INT);
+		if (idx == -1) {
+			if (first_notcon) {
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						" IO-APIC (apicid-pin) %d-%d",
+						mp_ioapics[apic].mpc_apicid,
+						pin);
+				first_notcon = 0;
+			} else
+				apic_printk(APIC_VERBOSE, ", %d-%d",
+					mp_ioapics[apic].mpc_apicid, pin);
+			continue;
+		}
+
+		entry.trigger = irq_trigger(idx);
+		entry.polarity = irq_polarity(idx);
+
+		if (irq_trigger(idx)) {
+			entry.trigger = 1;
+			entry.mask = 1;
+		}
+
+		irq = pin_2_irq(idx, apic, pin);
+		/*
+		 * skip adding the timer int on secondary nodes, which causes
+		 * a small but painful rift in the time-space continuum
+		 */
+		if (multi_timer_check(apic, irq))
+			continue;
+		else
+			add_pin_to_irq(irq, apic, pin);
+
+		if (!apic && !IO_APIC_IRQ(irq))
+			continue;
+
+		if (IO_APIC_IRQ(irq)) {
+			vector = assign_irq_vector(irq);
+			entry.vector = vector;
+			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+		
+			if (!apic && (irq < 16))
+				disable_8259A_irq(irq);
+		}
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+		io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+	}
+
+	if (!first_notcon)
+		apic_printk(APIC_VERBOSE, " not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin:
+ */
+static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned long flags;
+
+	memset(&entry,0,sizeof(entry));
+
+	disable_8259A_irq(0);
+
+	/* mask LVT0 */
+	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+	/*
+	 * We use logical delivery to get the timer IRQ
+	 * to the first CPU.
+	 */
+	entry.dest_mode = INT_DEST_MODE;
+	entry.mask = 0;					/* unmask IRQ now */
+	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+	entry.delivery_mode = INT_DELIVERY_MODE;
+	entry.polarity = 0;
+	entry.trigger = 0;
+	entry.vector = vector;
+
+	/*
+	 * The timer IRQ doesn't have to know that behind the
+	 * scene we have a 8259A-master in AEOI mode ...
+	 */
+	irq_desc[0].handler = &ioapic_edge_type;
+
+	/*
+	 * Add it to the IO-APIC irq-routing table:
+	 */
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+	io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	enable_8259A_irq(0);
+}
+
+static inline void UNEXPECTED_IO_APIC(void)
+{
+}
+
+void __init print_IO_APIC(void)
+{
+	int apic, i;
+	union IO_APIC_reg_00 reg_00;
+	union IO_APIC_reg_01 reg_01;
+	union IO_APIC_reg_02 reg_02;
+	union IO_APIC_reg_03 reg_03;
+	unsigned long flags;
+
+	if (apic_verbosity == APIC_QUIET)
+		return;
+
+ 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+	for (i = 0; i < nr_ioapics; i++)
+		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+	/*
+	 * We are a bit conservative about what we expect.  We have to
+	 * know about every hardware change ASAP.
+	 */
+	printk(KERN_INFO "testing the IO APIC.......................\n");
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(apic, 0);
+	reg_01.raw = io_apic_read(apic, 1);
+	if (reg_01.bits.version >= 0x10)
+		reg_02.raw = io_apic_read(apic, 2);
+	if (reg_01.bits.version >= 0x20)
+		reg_03.raw = io_apic_read(apic, 3);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
+	if (reg_00.bits.ID >= get_physical_broadcast())
+		UNEXPECTED_IO_APIC();
+	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+		UNEXPECTED_IO_APIC();
+
+	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
+	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+		(reg_01.bits.entries != 0x2E) &&
+		(reg_01.bits.entries != 0x3F)
+	)
+		UNEXPECTED_IO_APIC();
+
+	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
+	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
+	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
+	)
+		UNEXPECTED_IO_APIC();
+	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+		UNEXPECTED_IO_APIC();
+
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+	 * but the value of reg_02 is read as the previous read register
+	 * value, so ignore it if reg_02 == reg_01.
+	 */
+	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
+		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+			UNEXPECTED_IO_APIC();
+	}
+
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+	 * or reg_03, but the value of reg_0[23] is read as the previous read
+	 * register value, so ignore it if reg_03 == reg_0[12].
+	 */
+	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+	    reg_03.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+		if (reg_03.bits.__reserved_1)
+			UNEXPECTED_IO_APIC();
+	}
+
+	printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+			  " Stat Dest Deli Vect:   \n");
+
+	for (i = 0; i <= reg_01.bits.entries; i++) {
+		struct IO_APIC_route_entry entry;
+
+		spin_lock_irqsave(&ioapic_lock, flags);
+		*(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+		*(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		printk(KERN_DEBUG " %02x %03X %02X  ",
+			i,
+			entry.dest.logical.logical_dest,
+			entry.dest.physical.physical_dest
+		);
+
+		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+			entry.mask,
+			entry.trigger,
+			entry.irr,
+			entry.polarity,
+			entry.delivery_status,
+			entry.dest_mode,
+			entry.delivery_mode,
+			entry.vector
+		);
+	}
+	}
+	if (use_pci_vector())
+		printk(KERN_INFO "Using vector-based indexing\n");
+	printk(KERN_DEBUG "IRQ to pin mappings:\n");
+	for (i = 0; i < NR_IRQS; i++) {
+		struct irq_pin_list *entry = irq_2_pin + i;
+		if (entry->pin < 0)
+			continue;
+ 		if (use_pci_vector() && !platform_legacy_irq(i))
+			printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+		else
+			printk(KERN_DEBUG "IRQ%d ", i);
+		for (;;) {
+			printk("-> %d:%d", entry->apic, entry->pin);
+			if (!entry->next)
+				break;
+			entry = irq_2_pin + entry->next;
+		}
+		printk("\n");
+	}
+
+	printk(KERN_INFO ".................................... done.\n");
+
+	return;
+}
+
+#if 0
+
+static void print_APIC_bitfield (int base)
+{
+	unsigned int v;
+	int i, j;
+
+	if (apic_verbosity == APIC_QUIET)
+		return;
+
+	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+	for (i = 0; i < 8; i++) {
+		v = apic_read(base + i*0x10);
+		for (j = 0; j < 32; j++) {
+			if (v & (1<<j))
+				printk("1");
+			else
+				printk("0");
+		}
+		printk("\n");
+	}
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+	unsigned int v, ver, maxlvt;
+
+	if (apic_verbosity == APIC_QUIET)
+		return;
+
+	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+		smp_processor_id(), hard_smp_processor_id());
+	v = apic_read(APIC_ID);
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+	v = apic_read(APIC_LVR);
+	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+	ver = GET_APIC_VERSION(v);
+	maxlvt = get_maxlvt();
+
+	v = apic_read(APIC_TASKPRI);
+	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
+		v = apic_read(APIC_ARBPRI);
+		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+			v & APIC_ARBPRI_MASK);
+		v = apic_read(APIC_PROCPRI);
+		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	}
+
+	v = apic_read(APIC_EOI);
+	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+	v = apic_read(APIC_RRR);
+	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+	v = apic_read(APIC_LDR);
+	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+	v = apic_read(APIC_DFR);
+	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	v = apic_read(APIC_SPIV);
+	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+	printk(KERN_DEBUG "... APIC ISR field:\n");
+	print_APIC_bitfield(APIC_ISR);
+	printk(KERN_DEBUG "... APIC TMR field:\n");
+	print_APIC_bitfield(APIC_TMR);
+	printk(KERN_DEBUG "... APIC IRR field:\n");
+	print_APIC_bitfield(APIC_IRR);
+
+	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
+		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+		v = apic_read(APIC_ESR);
+		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_ICR);
+	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+	v = apic_read(APIC_ICR2);
+	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+	v = apic_read(APIC_LVTT);
+	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+	if (maxlvt > 3) {                       /* PC is LVT#4. */
+		v = apic_read(APIC_LVTPC);
+		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+	}
+	v = apic_read(APIC_LVT0);
+	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+	v = apic_read(APIC_LVT1);
+	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+	if (maxlvt > 2) {			/* ERR is LVT#3. */
+		v = apic_read(APIC_LVTERR);
+		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_TMICT);
+	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+	v = apic_read(APIC_TMCCT);
+	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+	v = apic_read(APIC_TDCR);
+	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+	printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+	on_each_cpu(print_local_APIC, NULL, 1, 1);
+}
+
+void /*__init*/ print_PIC(void)
+{
+	extern spinlock_t i8259A_lock;
+	unsigned int v;
+	unsigned long flags;
+
+	if (apic_verbosity == APIC_QUIET)
+		return;
+
+	printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	v = inb(0xa1) << 8 | inb(0x21);
+	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+
+	v = inb(0xa0) << 8 | inb(0x20);
+	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+
+	outb(0x0b,0xa0);
+	outb(0x0b,0x20);
+	v = inb(0xa0) << 8 | inb(0x20);
+	outb(0x0a,0xa0);
+	outb(0x0a,0x20);
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+
+	v = inb(0x4d1) << 8 | inb(0x4d0);
+	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+#endif  /*  0  */
+
+static void __init enable_IO_APIC(void)
+{
+	union IO_APIC_reg_01 reg_01;
+	int i;
+	unsigned long flags;
+
+	for (i = 0; i < PIN_MAP_SIZE; i++) {
+		irq_2_pin[i].pin = -1;
+		irq_2_pin[i].next = 0;
+	}
+	if (!pirqs_enabled)
+		for (i = 0; i < MAX_PIRQS; i++)
+			pirq_entries[i] = -1;
+
+	/*
+	 * The number of IO-APIC IRQ registers (== #pins):
+	 */
+	for (i = 0; i < nr_ioapics; i++) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_01.raw = io_apic_read(i, 1);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		nr_ioapic_registers[i] = reg_01.bits.entries+1;
+	}
+
+	/*
+	 * Do not trust the IO-APIC being empty at bootup
+	 */
+	clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+	/*
+	 * Clear the IO-APIC before rebooting:
+	 */
+	clear_IO_APIC();
+
+	disconnect_bsp_APIC();
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+#ifndef CONFIG_X86_NUMAQ
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+	union IO_APIC_reg_00 reg_00;
+	physid_mask_t phys_id_present_map;
+	int apic;
+	int i;
+	unsigned char old_id;
+	unsigned long flags;
+
+	/*
+	 * This is broken; anything with a real cpu count has to
+	 * circumvent this idiocy regardless.
+	 */
+	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	/*
+	 * Set the IOAPIC ID to the value stored in the MPC table.
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+		/* Read the register 0 value */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		
+		old_id = mp_ioapics[apic].mpc_apicid;
+
+		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+				apic, mp_ioapics[apic].mpc_apicid);
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				reg_00.bits.ID);
+			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+		}
+
+		/* Don't check I/O APIC IDs for some xAPIC systems.  They have
+		 * no meaning without the serial APIC bus. */
+		if (NO_IOAPIC_CHECK)
+			continue;
+		/*
+		 * Sanity check, is the ID really free? Every APIC in a
+		 * system must have a unique ID or we get lots of nice
+		 * 'stuck on smp_invalidate_needed IPI wait' messages.
+		 */
+		if (check_apicid_used(phys_id_present_map,
+					mp_ioapics[apic].mpc_apicid)) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+				apic, mp_ioapics[apic].mpc_apicid);
+			for (i = 0; i < get_physical_broadcast(); i++)
+				if (!physid_isset(i, phys_id_present_map))
+					break;
+			if (i >= get_physical_broadcast())
+				panic("Max APIC ID exceeded!\n");
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				i);
+			physid_set(i, phys_id_present_map);
+			mp_ioapics[apic].mpc_apicid = i;
+		} else {
+			physid_mask_t tmp;
+			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+			apic_printk(APIC_VERBOSE, "Setting %d in the "
+					"phys_id_present_map\n",
+					mp_ioapics[apic].mpc_apicid);
+			physids_or(phys_id_present_map, phys_id_present_map, tmp);
+		}
+
+
+		/*
+		 * We need to adjust the IRQ routing table
+		 * if the ID changed.
+		 */
+		if (old_id != mp_ioapics[apic].mpc_apicid)
+			for (i = 0; i < mp_irq_entries; i++)
+				if (mp_irqs[i].mpc_dstapic == old_id)
+					mp_irqs[i].mpc_dstapic
+						= mp_ioapics[apic].mpc_apicid;
+
+		/*
+		 * Read the right value from the MPC table and
+		 * write it into the ID register.
+	 	 */
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"...changing IO-APIC physical APIC ID to %d ...",
+			mp_ioapics[apic].mpc_apicid);
+
+		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(apic, 0, reg_00.raw);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/*
+		 * Sanity check
+		 */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+			printk("could not set ID!\n");
+		else
+			apic_printk(APIC_VERBOSE, " ok.\n");
+	}
+}
+#else
+static void __init setup_ioapic_ids_from_mpc(void) { }
+#endif
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *	- timer IRQ defaults to IO-APIC IRQ
+ *	- if this function detects that timer IRQs are defunct, then we fall
+ *	  back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+	unsigned long t1 = jiffies;
+
+	local_irq_enable();
+	/* Let ten ticks pass... */
+	mdelay((10 * 1000) / HZ);
+
+	/*
+	 * Expect a few ticks at least, to be sure some possible
+	 * glue logic does not lock up after one or two first
+	 * ticks in a non-ExtINT mode.  Also the local APIC
+	 * might have cached one ExtINT interrupt.  Finally, at
+	 * least one tick may be lost due to delays.
+	 */
+	if (jiffies - t1 > 4)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+	int was_pending = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	if (irq < 16) {
+		disable_8259A_irq(irq);
+		if (i8259A_irq_pending(irq))
+			was_pending = 1;
+	}
+	__unmask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return was_pending;
+}
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+	move_irq(irq);
+	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+					== (IRQ_PENDING | IRQ_DISABLED))
+		mask_IO_APIC_irq(irq);
+	ack_APIC_irq();
+}
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+	unmask_IO_APIC_irq(irq);
+
+	return 0; /* don't check for pending */
+}
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+	unsigned long v;
+	int i;
+
+	move_irq(irq);
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+	i = IO_APIC_VECTOR(irq);
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+	ack_APIC_irq();
+
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+		spin_lock(&ioapic_lock);
+		__mask_and_edge_IO_APIC_irq(irq);
+		__unmask_and_level_IO_APIC_irq(irq);
+		spin_unlock(&ioapic_lock);
+	}
+}
+
+#ifdef CONFIG_PCI_MSI
+static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+{
+	int irq = vector_to_irq(vector);
+
+	return startup_edge_ioapic_irq(irq);
+}
+
+static void ack_edge_ioapic_vector(unsigned int vector)
+{
+	int irq = vector_to_irq(vector);
+
+	ack_edge_ioapic_irq(irq);
+}
+
+static unsigned int startup_level_ioapic_vector (unsigned int vector)
+{
+	int irq = vector_to_irq(vector);
+
+	return startup_level_ioapic_irq (irq);
+}
+
+static void end_level_ioapic_vector (unsigned int vector)
+{
+	int irq = vector_to_irq(vector);
+
+	end_level_ioapic_irq(irq);
+}
+
+static void mask_IO_APIC_vector (unsigned int vector)
+{
+	int irq = vector_to_irq(vector);
+
+	mask_IO_APIC_irq(irq);
+}
+
+static void unmask_IO_APIC_vector (unsigned int vector)
+{
+	int irq = vector_to_irq(vector);
+
+	unmask_IO_APIC_irq(irq);
+}
+
+static void set_ioapic_affinity_vector (unsigned int vector,
+					cpumask_t cpu_mask)
+{
+	int irq = vector_to_irq(vector);
+
+	set_ioapic_affinity_irq(irq, cpu_mask);
+}
+#endif
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+static struct hw_interrupt_type ioapic_edge_type = {
+	.typename 	= "IO-APIC-edge",
+	.startup 	= startup_edge_ioapic,
+	.shutdown 	= shutdown_edge_ioapic,
+	.enable 	= enable_edge_ioapic,
+	.disable 	= disable_edge_ioapic,
+	.ack 		= ack_edge_ioapic,
+	.end 		= end_edge_ioapic,
+	.set_affinity 	= set_ioapic_affinity,
+};
+
+static struct hw_interrupt_type ioapic_level_type = {
+	.typename 	= "IO-APIC-level",
+	.startup 	= startup_level_ioapic,
+	.shutdown 	= shutdown_level_ioapic,
+	.enable 	= enable_level_ioapic,
+	.disable 	= disable_level_ioapic,
+	.ack 		= mask_and_ack_level_ioapic,
+	.end 		= end_level_ioapic,
+	.set_affinity 	= set_ioapic_affinity,
+};
+
+static inline void init_IO_APIC_traps(void)
+{
+	int irq;
+
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	for (irq = 0; irq < NR_IRQS ; irq++) {
+		int tmp = irq;
+		if (use_pci_vector()) {
+			if (!platform_legacy_irq(tmp))
+				if ((tmp = vector_to_irq(tmp)) == -1)
+					continue;
+		}
+		if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
+			/*
+			 * Hmm.. We don't have an entry for this,
+			 * so default to an old-fashioned 8259
+			 * interrupt if we can..
+			 */
+			if (irq < 16)
+				make_8259A_irq(irq);
+			else
+				/* Strange. Oh, well.. */
+				irq_desc[irq].handler = &no_irq_type;
+		}
+	}
+}
+
+static void enable_lapic_irq (unsigned int irq)
+{
+	unsigned long v;
+
+	v = apic_read(APIC_LVT0);
+	apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+	unsigned long v;
+
+	v = apic_read(APIC_LVT0);
+	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+	ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type = {
+	.typename 	= "local-APIC-edge",
+	.startup 	= NULL, /* startup_irq() not used for IRQ0 */
+	.shutdown 	= NULL, /* shutdown_irq() not used for IRQ0 */
+	.enable 	= enable_lapic_irq,
+	.disable 	= disable_lapic_irq,
+	.ack 		= ack_lapic_irq,
+	.end 		= end_lapic_irq
+};
+
+static void setup_nmi (void)
+{
+	/*
+ 	 * Dirty trick to enable the NMI watchdog ...
+	 * We put the 8259A master into AEOI mode and
+	 * unmask on all local APICs LVT0 as NMI.
+	 *
+	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+	 * is from Maciej W. Rozycki - so we do not have to EOI from
+	 * the NMI handler or the timer interrupt.
+	 */ 
+	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+
+	on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
+
+	apic_printk(APIC_VERBOSE, " done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+	int pin, i;
+	struct IO_APIC_route_entry entry0, entry1;
+	unsigned char save_control, save_freq_select;
+	unsigned long flags;
+
+	pin = find_isa_irq_pin(8, mp_INT);
+	if (pin == -1)
+		return;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	*(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+	*(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+	clear_IO_APIC_pin(0, pin);
+
+	memset(&entry1, 0, sizeof(entry1));
+
+	entry1.dest_mode = 0;			/* physical delivery */
+	entry1.mask = 0;			/* unmask IRQ now */
+	entry1.dest.physical.physical_dest = hard_smp_processor_id();
+	entry1.delivery_mode = dest_ExtINT;
+	entry1.polarity = entry0.polarity;
+	entry1.trigger = 0;
+	entry1.vector = 0;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	save_control = CMOS_READ(RTC_CONTROL);
+	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+		   RTC_FREQ_SELECT);
+	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+	i = 100;
+	while (i-- > 0) {
+		mdelay(10);
+		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+			i -= 10;
+	}
+
+	CMOS_WRITE(save_control, RTC_CONTROL);
+	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+	clear_IO_APIC_pin(0, pin);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+	int pin1, pin2;
+	int vector;
+
+	/*
+	 * get/set the timer IRQ vector:
+	 */
+	disable_8259A_irq(0);
+	vector = assign_irq_vector(0);
+	set_intr_gate(vector, interrupt[0]);
+
+	/*
+	 * Subtle, code in do_timer_interrupt() expects an AEOI
+	 * mode for the 8259A whenever interrupts are routed
+	 * through I/O APICs.  Also IRQ0 has to be enabled in
+	 * the 8259A which implies the virtual wire has to be
+	 * disabled in the local APIC.
+	 */
+	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+	init_8259A(1);
+	timer_ack = 1;
+	enable_8259A_irq(0);
+
+	pin1 = find_isa_irq_pin(0, mp_INT);
+	pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+	printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+	if (pin1 != -1) {
+		/*
+		 * Ok, does IRQ0 through the IOAPIC work?
+		 */
+		unmask_IO_APIC_irq(0);
+		if (timer_irq_works()) {
+			if (nmi_watchdog == NMI_IO_APIC) {
+				disable_8259A_irq(0);
+				setup_nmi();
+				enable_8259A_irq(0);
+				check_nmi_watchdog();
+			}
+			return;
+		}
+		clear_IO_APIC_pin(0, pin1);
+		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+	}
+
+	printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+	if (pin2 != -1) {
+		printk("\n..... (found pin %d) ...", pin2);
+		/*
+		 * legacy devices should be connected to IO APIC #0
+		 */
+		setup_ExtINT_IRQ0_pin(pin2, vector);
+		if (timer_irq_works()) {
+			printk("works.\n");
+			if (pin1 != -1)
+				replace_pin_at_irq(0, 0, pin1, 0, pin2);
+			else
+				add_pin_to_irq(0, 0, pin2);
+			if (nmi_watchdog == NMI_IO_APIC) {
+				setup_nmi();
+				check_nmi_watchdog();
+			}
+			return;
+		}
+		/*
+		 * Cleanup, just in case ...
+		 */
+		clear_IO_APIC_pin(0, pin2);
+	}
+	printk(" failed.\n");
+
+	if (nmi_watchdog == NMI_IO_APIC) {
+		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+		nmi_watchdog = 0;
+	}
+
+	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+	disable_8259A_irq(0);
+	irq_desc[0].handler = &lapic_irq_type;
+	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
+	enable_8259A_irq(0);
+
+	if (timer_irq_works()) {
+		printk(" works.\n");
+		return;
+	}
+	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+	printk(" failed.\n");
+
+	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+	timer_ack = 0;
+	init_8259A(0);
+	make_8259A_irq(0);
+	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+	unlock_ExtINT_logic();
+
+	if (timer_irq_works()) {
+		printk(" works.\n");
+		return;
+	}
+	printk(" failed :(.\n");
+	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+		"report.  Then try booting with the 'noapic' option");
+}
+
+/*
+ *
+ * IRQ's that are handled by the PIC in the MPS IOAPIC case.
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ *   Linux doesn't really care, as it's not actually used
+ *   for any interrupt handling anyway.
+ */
+#define PIC_IRQS	(1 << PIC_CASCADE_IR)
+
+void __init setup_IO_APIC(void)
+{
+	enable_IO_APIC();
+
+	if (acpi_ioapic)
+		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
+	else
+		io_apic_irqs = ~PIC_IRQS;
+
+	printk("ENABLING IO-APIC IRQs\n");
+
+	/*
+	 * Set up IO-APIC IRQ routing.
+	 */
+	if (!acpi_ioapic)
+		setup_ioapic_ids_from_mpc();
+	sync_Arb_IDs();
+	setup_IO_APIC_irqs();
+	init_IO_APIC_traps();
+	check_timer();
+	if (!acpi_ioapic)
+		print_IO_APIC();
+}
+
+/*
+ *	Called after all the initialization is done. If we didnt find any
+ *	APIC bugs then we can allow the modify fast path
+ */
+ 
+static int __init io_apic_bug_finalize(void)
+{
+	if(sis_apic_bug == -1)
+		sis_apic_bug = 0;
+	return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+struct sysfs_ioapic_data {
+	struct sys_device dev;
+	struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, u32 state)
+{
+	struct IO_APIC_route_entry *entry;
+	struct sysfs_ioapic_data *data;
+	unsigned long flags;
+	int i;
+	
+	data = container_of(dev, struct sysfs_ioapic_data, dev);
+	entry = data->entry;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+		*(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+		*(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+	struct IO_APIC_route_entry *entry;
+	struct sysfs_ioapic_data *data;
+	unsigned long flags;
+	union IO_APIC_reg_00 reg_00;
+	int i;
+	
+	data = container_of(dev, struct sysfs_ioapic_data, dev);
+	entry = data->entry;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(dev->id, 0);
+	if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
+		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+		io_apic_write(dev->id, 0, reg_00.raw);
+	}
+	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+		io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+		io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+	set_kset_name("ioapic"),
+	.suspend = ioapic_suspend,
+	.resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+	struct sys_device * dev;
+	int i, size, error = 0;
+
+	error = sysdev_class_register(&ioapic_sysdev_class);
+	if (error)
+		return error;
+
+	for (i = 0; i < nr_ioapics; i++ ) {
+		size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
+			* sizeof(struct IO_APIC_route_entry);
+		mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+		if (!mp_ioapic_data[i]) {
+			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+			continue;
+		}
+		memset(mp_ioapic_data[i], 0, size);
+		dev = &mp_ioapic_data[i]->dev;
+		dev->id = i; 
+		dev->cls = &ioapic_sysdev_class;
+		error = sysdev_register(dev);
+		if (error) {
+			kfree(mp_ioapic_data[i]);
+			mp_ioapic_data[i] = NULL;
+			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+			continue;
+		}
+	}
+
+	return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+/* --------------------------------------------------------------------------
+                          ACPI-based IOAPIC Configuration
+   -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+
+int __init io_apic_get_unique_id (int ioapic, int apic_id)
+{
+	union IO_APIC_reg_00 reg_00;
+	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+	physid_mask_t tmp;
+	unsigned long flags;
+	int i = 0;
+
+	/*
+	 * The P4 platform supports up to 256 APIC IDs on two separate APIC 
+	 * buses (one for LAPICs, one for IOAPICs), where predecessors only 
+	 * supports up to 16 on one shared APIC bus.
+	 * 
+	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+	 *      advantage of new APIC bus architecture.
+	 */
+
+	if (physids_empty(apic_id_map))
+		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(ioapic, 0);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	if (apic_id >= get_physical_broadcast()) {
+		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+			"%d\n", ioapic, apic_id, reg_00.bits.ID);
+		apic_id = reg_00.bits.ID;
+	}
+
+	/*
+	 * Every APIC in a system must have a unique ID or we get lots of nice 
+	 * 'stuck on smp_invalidate_needed IPI wait' messages.
+	 */
+	if (check_apicid_used(apic_id_map, apic_id)) {
+
+		for (i = 0; i < get_physical_broadcast(); i++) {
+			if (!check_apicid_used(apic_id_map, i))
+				break;
+		}
+
+		if (i == get_physical_broadcast())
+			panic("Max apic_id exceeded!\n");
+
+		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+			"trying %d\n", ioapic, apic_id, i);
+
+		apic_id = i;
+	} 
+
+	tmp = apicid_to_cpu_present(apic_id);
+	physids_or(apic_id_map, apic_id_map, tmp);
+
+	if (reg_00.bits.ID != apic_id) {
+		reg_00.bits.ID = apic_id;
+
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(ioapic, 0, reg_00.raw);
+		reg_00.raw = io_apic_read(ioapic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/* Sanity check */
+		if (reg_00.bits.ID != apic_id)
+			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
+	}
+
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+	return apic_id;
+}
+
+
+int __init io_apic_get_version (int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.version;
+}
+
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.entries;
+}
+
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned long flags;
+
+	if (!IO_APIC_IRQ(irq)) {
+		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+			ioapic);
+		return -EINVAL;
+	}
+
+	/*
+	 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+	 * Note that we mask (disable) IRQs now -- these get enabled when the
+	 * corresponding device driver registers for this IRQ.
+	 */
+
+	memset(&entry,0,sizeof(entry));
+
+	entry.delivery_mode = INT_DELIVERY_MODE;
+	entry.dest_mode = INT_DEST_MODE;
+	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+	entry.trigger = edge_level;
+	entry.polarity = active_high_low;
+	entry.mask  = 1;
+
+	/*
+	 * IRQs < 16 are already in the irq_2_pin[] map
+	 */
+	if (irq >= 16)
+		add_pin_to_irq(irq, ioapic, pin);
+
+	entry.vector = assign_irq_vector(irq);
+
+	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
+		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
+		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+		edge_level, active_high_low);
+
+	ioapic_register_intr(irq, entry.vector, edge_level);
+
+	if (!ioapic && (irq < 16))
+		disable_8259A_irq(irq);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+	io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return 0;
+}
+
+#endif /*CONFIG_ACPI_BOOT*/
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
new file mode 100644
index 00000000000..8b25160393c
--- /dev/null
+++ b/arch/i386/kernel/ioport.c
@@ -0,0 +1,147 @@
+/*
+ *	linux/arch/i386/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
+{
+	unsigned long mask;
+	unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
+	unsigned int low_index = base & (BITS_PER_LONG-1);
+	int length = low_index + extent;
+
+	if (low_index != 0) {
+		mask = (~0UL << low_index);
+		if (length < BITS_PER_LONG)
+			mask &= ~(~0UL << length);
+		if (new_value)
+			*bitmap_base++ |= mask;
+		else
+			*bitmap_base++ &= ~mask;
+		length -= BITS_PER_LONG;
+	}
+
+	mask = (new_value ? ~0UL : 0UL);
+	while (length >= BITS_PER_LONG) {
+		*bitmap_base++ = mask;
+		length -= BITS_PER_LONG;
+	}
+
+	if (length > 0) {
+		mask = ~(~0UL << length);
+		if (new_value)
+			*bitmap_base++ |= mask;
+		else
+			*bitmap_base++ &= ~mask;
+	}
+}
+
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+	unsigned long i, max_long, bytes, bytes_updated;
+	struct thread_struct * t = &current->thread;
+	struct tss_struct * tss;
+	unsigned long *bitmap;
+
+	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
+		return -EINVAL;
+	if (turn_on && !capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	/*
+	 * If it's the first ioperm() call in this thread's lifetime, set the
+	 * IO bitmap up. ioperm() is much less timing critical than clone(),
+	 * this is why we delay this operation until now:
+	 */
+	if (!t->io_bitmap_ptr) {
+		bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+		if (!bitmap)
+			return -ENOMEM;
+
+		memset(bitmap, 0xff, IO_BITMAP_BYTES);
+		t->io_bitmap_ptr = bitmap;
+	}
+
+	/*
+	 * do it in the per-thread copy and in the TSS ...
+	 *
+	 * Disable preemption via get_cpu() - we must not switch away
+	 * because the ->io_bitmap_max value must match the bitmap
+	 * contents:
+	 */
+	tss = &per_cpu(init_tss, get_cpu());
+
+	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+
+	/*
+	 * Search for a (possibly new) maximum. This is simple and stupid,
+	 * to keep it obviously correct:
+	 */
+	max_long = 0;
+	for (i = 0; i < IO_BITMAP_LONGS; i++)
+		if (t->io_bitmap_ptr[i] != ~0UL)
+			max_long = i;
+
+	bytes = (max_long + 1) * sizeof(long);
+	bytes_updated = max(bytes, t->io_bitmap_max);
+
+	t->io_bitmap_max = bytes;
+
+	/*
+	 * Sets the lazy trigger so that the next I/O operation will
+	 * reload the correct bitmap.
+	 */
+	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+
+	put_cpu();
+
+	return 0;
+}
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
+
+asmlinkage long sys_iopl(unsigned long unused)
+{
+	volatile struct pt_regs * regs = (struct pt_regs *) &unused;
+	unsigned int level = regs->ebx;
+	unsigned int old = (regs->eflags >> 12) & 3;
+
+	if (level > 3)
+		return -EINVAL;
+	/* Trying to gain more privileges? */
+	if (level > old) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+	}
+	regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12);
+	/* Make sure we return the long way (not sysenter) */
+	set_thread_flag(TIF_IRET);
+	return 0;
+}
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
new file mode 100644
index 00000000000..73945a3c53c
--- /dev/null
+++ b/arch/i386/kernel/irq.c
@@ -0,0 +1,261 @@
+/*
+ *	linux/arch/i386/kernel/irq.c
+ *
+ *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+#ifndef CONFIG_X86_LOCAL_APIC
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+/*
+ * per-CPU IRQ handling contexts (thread information and stack)
+ */
+union irq_ctx {
+	struct thread_info      tinfo;
+	u32                     stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS];
+static union irq_ctx *softirq_ctx[NR_CPUS];
+#endif
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int do_IRQ(struct pt_regs *regs)
+{	
+	/* high bits used in ret_from_ code */
+	int irq = regs->orig_eax & 0xff;
+#ifdef CONFIG_4KSTACKS
+	union irq_ctx *curctx, *irqctx;
+	u32 *isp;
+#endif
+
+	irq_enter();
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	/* Debugging check for stack overflow: is there less than 1KB free? */
+	{
+		long esp;
+
+		__asm__ __volatile__("andl %%esp,%0" :
+					"=r" (esp) : "0" (THREAD_SIZE - 1));
+		if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+			printk("do_IRQ: stack overflow: %ld\n",
+				esp - sizeof(struct thread_info));
+			dump_stack();
+		}
+	}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+
+	curctx = (union irq_ctx *) current_thread_info();
+	irqctx = hardirq_ctx[smp_processor_id()];
+
+	/*
+	 * this is where we switch to the IRQ stack. However, if we are
+	 * already using the IRQ stack (because we interrupted a hardirq
+	 * handler) we can't do that and just have to keep using the
+	 * current stack (which is the irq stack already after all)
+	 */
+	if (curctx != irqctx) {
+		int arg1, arg2, ebx;
+
+		/* build the stack frame on the IRQ stack */
+		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+		irqctx->tinfo.task = curctx->tinfo.task;
+		irqctx->tinfo.previous_esp = current_stack_pointer;
+
+		asm volatile(
+			"       xchgl   %%ebx,%%esp      \n"
+			"       call    __do_IRQ         \n"
+			"       movl   %%ebx,%%esp      \n"
+			: "=a" (arg1), "=d" (arg2), "=b" (ebx)
+			:  "0" (irq),   "1" (regs),  "2" (isp)
+			: "memory", "cc", "ecx"
+		);
+	} else
+#endif
+		__do_IRQ(irq, regs);
+
+	irq_exit();
+
+	return 1;
+}
+
+#ifdef CONFIG_4KSTACKS
+
+/*
+ * These should really be __section__(".bss.page_aligned") as well, but
+ * gcc's 3.0 and earlier don't handle that correctly.
+ */
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+		__attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+		__attribute__((__aligned__(THREAD_SIZE)));
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+	union irq_ctx *irqctx;
+
+	if (hardirq_ctx[cpu])
+		return;
+
+	irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+	irqctx->tinfo.task              = NULL;
+	irqctx->tinfo.exec_domain       = NULL;
+	irqctx->tinfo.cpu               = cpu;
+	irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
+	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+	hardirq_ctx[cpu] = irqctx;
+
+	irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+	irqctx->tinfo.task              = NULL;
+	irqctx->tinfo.exec_domain       = NULL;
+	irqctx->tinfo.cpu               = cpu;
+	irqctx->tinfo.preempt_count     = SOFTIRQ_OFFSET;
+	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+	softirq_ctx[cpu] = irqctx;
+
+	printk("CPU %u irqstacks, hard=%p soft=%p\n",
+		cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+	unsigned long flags;
+	struct thread_info *curctx;
+	union irq_ctx *irqctx;
+	u32 *isp;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+
+	if (local_softirq_pending()) {
+		curctx = current_thread_info();
+		irqctx = softirq_ctx[smp_processor_id()];
+		irqctx->tinfo.task = curctx->task;
+		irqctx->tinfo.previous_esp = current_stack_pointer;
+
+		/* build the stack frame on the softirq stack */
+		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+
+		asm volatile(
+			"       xchgl   %%ebx,%%esp     \n"
+			"       call    __do_softirq    \n"
+			"       movl    %%ebx,%%esp     \n"
+			: "=b"(isp)
+			: "0"(isp)
+			: "memory", "cc", "edx", "ecx", "eax"
+		);
+	}
+
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *) v, j;
+	struct irqaction * action;
+	unsigned long flags;
+
+	if (i == 0) {
+		seq_printf(p, "           ");
+		for (j=0; j<NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "CPU%d       ",j);
+		seq_putc(p, '\n');
+	}
+
+	if (i < NR_IRQS) {
+		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		action = irq_desc[i].action;
+		if (!action)
+			goto skip;
+		seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+		seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+		seq_printf(p, " %14s", irq_desc[i].handler->typename);
+		seq_printf(p, "  %s", action->name);
+
+		for (action=action->next; action; action = action->next)
+			seq_printf(p, ", %s", action->name);
+
+		seq_putc(p, '\n');
+skip:
+		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+	} else if (i == NR_IRQS) {
+		seq_printf(p, "NMI: ");
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "%10u ", nmi_count(j));
+		seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+		seq_printf(p, "LOC: ");
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "%10u ",
+					per_cpu(irq_stat,j).apic_timer_irqs);
+		seq_putc(p, '\n');
+#endif
+		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+	}
+	return 0;
+}
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
new file mode 100644
index 00000000000..67168165924
--- /dev/null
+++ b/arch/i386/kernel/kprobes.c
@@ -0,0 +1,385 @@
+/*
+ *  Kernel Probes (KProbes)
+ *  arch/i386/kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes contributions from
+ *		Rusty Russell).
+ * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ *		interface to access function arguments.
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/spinlock.h>
+#include <linux/preempt.h>
+#include <asm/kdebug.h>
+#include <asm/desc.h>
+
+/* kprobe_status settings */
+#define KPROBE_HIT_ACTIVE	0x00000001
+#define KPROBE_HIT_SS		0x00000002
+
+static struct kprobe *current_kprobe;
+static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
+static struct pt_regs jprobe_saved_regs;
+static long *jprobe_saved_esp;
+/* copy of the kernel stack at the probe fire time */
+static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+void jprobe_return_end(void);
+
+/*
+ * returns non-zero if opcode modifies the interrupt flag.
+ */
+static inline int is_IF_modifier(kprobe_opcode_t opcode)
+{
+	switch (opcode) {
+	case 0xfa:		/* cli */
+	case 0xfb:		/* sti */
+	case 0xcf:		/* iret/iretd */
+	case 0x9d:		/* popf/popfd */
+		return 1;
+	}
+	return 0;
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+	return 0;
+}
+
+void arch_copy_kprobe(struct kprobe *p)
+{
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+}
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+}
+
+static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs)
+{
+	*p->addr = p->opcode;
+	regs->eip = (unsigned long)p->addr;
+}
+
+static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	regs->eflags |= TF_MASK;
+	regs->eflags &= ~IF_MASK;
+	/*single step inline if the instruction is an int3*/
+	if (p->opcode == BREAKPOINT_INSTRUCTION)
+		regs->eip = (unsigned long)p->addr;
+	else
+		regs->eip = (unsigned long)&p->ainsn.insn;
+}
+
+/*
+ * Interrupts are disabled on entry as trap3 is an interrupt gate and they
+ * remain disabled thorough out this function.
+ */
+static int kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	int ret = 0;
+	kprobe_opcode_t *addr = NULL;
+	unsigned long *lp;
+
+	/* We're in an interrupt, but this is clear and BUG()-safe. */
+	preempt_disable();
+	/* Check if the application is using LDT entry for its code segment and
+	 * calculate the address by reading the base address from the LDT entry.
+	 */
+	if ((regs->xcs & 4) && (current->mm)) {
+		lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8)
+					+ (char *) current->mm->context.ldt);
+		addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip -
+						sizeof(kprobe_opcode_t));
+	} else {
+		addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
+	}
+	/* Check we're not actually recursing */
+	if (kprobe_running()) {
+		/* We *are* holding lock here, so this is safe.
+		   Disarm the probe we just hit, and ignore it. */
+		p = get_kprobe(addr);
+		if (p) {
+			if (kprobe_status == KPROBE_HIT_SS) {
+				regs->eflags &= ~TF_MASK;
+				regs->eflags |= kprobe_saved_eflags;
+				unlock_kprobes();
+				goto no_kprobe;
+			}
+			disarm_kprobe(p, regs);
+			ret = 1;
+		} else {
+			p = current_kprobe;
+			if (p->break_handler && p->break_handler(p, regs)) {
+				goto ss_probe;
+			}
+		}
+		/* If it's not ours, can't be delete race, (we hold lock). */
+		goto no_kprobe;
+	}
+
+	lock_kprobes();
+	p = get_kprobe(addr);
+	if (!p) {
+		unlock_kprobes();
+		if (regs->eflags & VM_MASK) {
+			/* We are in virtual-8086 mode. Return 0 */
+			goto no_kprobe;
+		}
+
+		if (*addr != BREAKPOINT_INSTRUCTION) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+		}
+		/* Not one of ours: let kernel handle it */
+		goto no_kprobe;
+	}
+
+	kprobe_status = KPROBE_HIT_ACTIVE;
+	current_kprobe = p;
+	kprobe_saved_eflags = kprobe_old_eflags
+	    = (regs->eflags & (TF_MASK | IF_MASK));
+	if (is_IF_modifier(p->opcode))
+		kprobe_saved_eflags &= ~IF_MASK;
+
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+
+ss_probe:
+	prepare_singlestep(p, regs);
+	kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "int 3"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ *
+ * This function prepares to return from the post-single-step
+ * interrupt.  We have to fix up the stack as follows:
+ *
+ * 0) Except in the case of absolute or indirect jump or call instructions,
+ * the new eip is relative to the copied instruction.  We need to make
+ * it relative to the original instruction.
+ *
+ * 1) If the single-stepped instruction was pushfl, then the TF and IF
+ * flags are set in the just-pushed eflags, and may need to be cleared.
+ *
+ * 2) If the single-stepped instruction was a call, the return address
+ * that is atop the stack is the address following the copied instruction.
+ * We need to make it the address following the original instruction.
+ */
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long *tos = (unsigned long *)&regs->esp;
+	unsigned long next_eip = 0;
+	unsigned long copy_eip = (unsigned long)&p->ainsn.insn;
+	unsigned long orig_eip = (unsigned long)p->addr;
+
+	switch (p->ainsn.insn[0]) {
+	case 0x9c:		/* pushfl */
+		*tos &= ~(TF_MASK | IF_MASK);
+		*tos |= kprobe_old_eflags;
+		break;
+	case 0xe8:		/* call relative - Fix return addr */
+		*tos = orig_eip + (*tos - copy_eip);
+		break;
+	case 0xff:
+		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
+			/* call absolute, indirect */
+			/* Fix return addr; eip is correct. */
+			next_eip = regs->eip;
+			*tos = orig_eip + (*tos - copy_eip);
+		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
+			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
+			/* eip is correct. */
+			next_eip = regs->eip;
+		}
+		break;
+	case 0xea:		/* jmp absolute -- eip is correct */
+		next_eip = regs->eip;
+		break;
+	default:
+		break;
+	}
+
+	regs->eflags &= ~TF_MASK;
+	if (next_eip) {
+		regs->eip = next_eip;
+	} else {
+		regs->eip = orig_eip + (regs->eip - copy_eip);
+	}
+}
+
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate and they
+ * remain disabled thoroughout this function.  And we hold kprobe lock.
+ */
+static inline int post_kprobe_handler(struct pt_regs *regs)
+{
+	if (!kprobe_running())
+		return 0;
+
+	if (current_kprobe->post_handler)
+		current_kprobe->post_handler(current_kprobe, regs, 0);
+
+	resume_execution(current_kprobe, regs);
+	regs->eflags |= kprobe_saved_eflags;
+
+	unlock_kprobes();
+	preempt_enable_no_resched();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, eflags
+	 * will have TF set, in which case, continue the remaining processing
+	 * of do_debug, as if this is not a probe hit.
+	 */
+	if (regs->eflags & TF_MASK)
+		return 0;
+
+	return 1;
+}
+
+/* Interrupts disabled, kprobe_lock held. */
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	if (current_kprobe->fault_handler
+	    && current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+		return 1;
+
+	if (kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(current_kprobe, regs);
+		regs->eflags |= kprobe_old_eflags;
+
+		unlock_kprobes();
+		preempt_enable_no_resched();
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
+			     void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	switch (val) {
+	case DIE_INT3:
+		if (kprobe_handler(args->regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_DEBUG:
+		if (post_kprobe_handler(args->regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_GPF:
+		if (kprobe_running() &&
+		    kprobe_fault_handler(args->regs, args->trapnr))
+			return NOTIFY_STOP;
+		break;
+	case DIE_PAGE_FAULT:
+		if (kprobe_running() &&
+		    kprobe_fault_handler(args->regs, args->trapnr))
+			return NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	unsigned long addr;
+
+	jprobe_saved_regs = *regs;
+	jprobe_saved_esp = &regs->esp;
+	addr = (unsigned long)jprobe_saved_esp;
+
+	/*
+	 * TBD: As Linus pointed out, gcc assumes that the callee
+	 * owns the argument space and could overwrite it, e.g.
+	 * tailcall optimization. So, to be absolutely safe
+	 * we also save and restore enough stack bytes to cover
+	 * the argument area.
+	 */
+	memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr));
+	regs->eflags &= ~IF_MASK;
+	regs->eip = (unsigned long)(jp->entry);
+	return 1;
+}
+
+void jprobe_return(void)
+{
+	preempt_enable_no_resched();
+	asm volatile ("       xchgl   %%ebx,%%esp     \n"
+		      "       int3			\n"
+		      "       .globl jprobe_return_end	\n"
+		      "       jprobe_return_end:	\n"
+		      "       nop			\n"::"b"
+		      (jprobe_saved_esp):"memory");
+}
+
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	u8 *addr = (u8 *) (regs->eip - 1);
+	unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+
+	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
+		if (&regs->esp != jprobe_saved_esp) {
+			struct pt_regs *saved_regs =
+			    container_of(jprobe_saved_esp, struct pt_regs, esp);
+			printk("current esp %p does not match saved esp %p\n",
+			       &regs->esp, jprobe_saved_esp);
+			printk("Saved registers for jprobe %p\n", jp);
+			show_registers(saved_regs);
+			printk("Current registers\n");
+			show_registers(regs);
+			BUG();
+		}
+		*regs = jprobe_saved_regs;
+		memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack,
+		       MIN_STACK_SIZE(stack_addr));
+		return 1;
+	}
+	return 0;
+}
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
new file mode 100644
index 00000000000..bb50afbee92
--- /dev/null
+++ b/arch/i386/kernel/ldt.c
@@ -0,0 +1,255 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+
+#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+static void flush_ldt(void *null)
+{
+	if (current->active_mm)
+		load_LDT(&current->active_mm->context);
+}
+#endif
+
+static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+{
+	void *oldldt;
+	void *newldt;
+	int oldsize;
+
+	if (mincount <= pc->size)
+		return 0;
+	oldsize = pc->size;
+	mincount = (mincount+511)&(~511);
+	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+	else
+		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+	if (!newldt)
+		return -ENOMEM;
+
+	if (oldsize)
+		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+	oldldt = pc->ldt;
+	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+	pc->ldt = newldt;
+	wmb();
+	pc->size = mincount;
+	wmb();
+
+	if (reload) {
+#ifdef CONFIG_SMP
+		cpumask_t mask;
+		preempt_disable();
+		load_LDT(pc);
+		mask = cpumask_of_cpu(smp_processor_id());
+		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
+			smp_call_function(flush_ldt, NULL, 1, 1);
+		preempt_enable();
+#else
+		load_LDT(pc);
+#endif
+	}
+	if (oldsize) {
+		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(oldldt);
+		else
+			kfree(oldldt);
+	}
+	return 0;
+}
+
+static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+{
+	int err = alloc_ldt(new, old->size, 0);
+	if (err < 0)
+		return err;
+	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+	return 0;
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	struct mm_struct * old_mm;
+	int retval = 0;
+
+	init_MUTEX(&mm->context.sem);
+	mm->context.size = 0;
+	old_mm = current->mm;
+	if (old_mm && old_mm->context.size > 0) {
+		down(&old_mm->context.sem);
+		retval = copy_ldt(&mm->context, &old_mm->context);
+		up(&old_mm->context.sem);
+	}
+	return retval;
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void destroy_context(struct mm_struct *mm)
+{
+	if (mm->context.size) {
+		if (mm == current->active_mm)
+			clear_LDT();
+		if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(mm->context.ldt);
+		else
+			kfree(mm->context.ldt);
+		mm->context.size = 0;
+	}
+}
+
+static int read_ldt(void __user * ptr, unsigned long bytecount)
+{
+	int err;
+	unsigned long size;
+	struct mm_struct * mm = current->mm;
+
+	if (!mm->context.size)
+		return 0;
+	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+
+	down(&mm->context.sem);
+	size = mm->context.size*LDT_ENTRY_SIZE;
+	if (size > bytecount)
+		size = bytecount;
+
+	err = 0;
+	if (copy_to_user(ptr, mm->context.ldt, size))
+		err = -EFAULT;
+	up(&mm->context.sem);
+	if (err < 0)
+		goto error_return;
+	if (size != bytecount) {
+		/* zero-fill the rest */
+		if (clear_user(ptr+size, bytecount-size) != 0) {
+			err = -EFAULT;
+			goto error_return;
+		}
+	}
+	return bytecount;
+error_return:
+	return err;
+}
+
+static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+{
+	int err;
+	unsigned long size;
+	void *address;
+
+	err = 0;
+	address = &default_ldt[0];
+	size = 5*sizeof(struct desc_struct);
+	if (size > bytecount)
+		size = bytecount;
+
+	err = size;
+	if (copy_to_user(ptr, address, size))
+		err = -EFAULT;
+
+	return err;
+}
+
+static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+{
+	struct mm_struct * mm = current->mm;
+	__u32 entry_1, entry_2, *lp;
+	int error;
+	struct user_desc ldt_info;
+
+	error = -EINVAL;
+	if (bytecount != sizeof(ldt_info))
+		goto out;
+	error = -EFAULT; 	
+	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+		goto out;
+
+	error = -EINVAL;
+	if (ldt_info.entry_number >= LDT_ENTRIES)
+		goto out;
+	if (ldt_info.contents == 3) {
+		if (oldmode)
+			goto out;
+		if (ldt_info.seg_not_present == 0)
+			goto out;
+	}
+
+	down(&mm->context.sem);
+	if (ldt_info.entry_number >= mm->context.size) {
+		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
+		if (error < 0)
+			goto out_unlock;
+	}
+
+	lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
+
+   	/* Allow LDTs to be cleared by the user. */
+   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+		if (oldmode || LDT_empty(&ldt_info)) {
+			entry_1 = 0;
+			entry_2 = 0;
+			goto install;
+		}
+	}
+
+	entry_1 = LDT_entry_a(&ldt_info);
+	entry_2 = LDT_entry_b(&ldt_info);
+	if (oldmode)
+		entry_2 &= ~(1 << 20);
+
+	/* Install the new entry ...  */
+install:
+	*lp	= entry_1;
+	*(lp+1)	= entry_2;
+	error = 0;
+
+out_unlock:
+	up(&mm->context.sem);
+out:
+	return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+{
+	int ret = -ENOSYS;
+
+	switch (func) {
+	case 0:
+		ret = read_ldt(ptr, bytecount);
+		break;
+	case 1:
+		ret = write_ldt(ptr, bytecount, 1);
+		break;
+	case 2:
+		ret = read_default_ldt(ptr, bytecount);
+		break;
+	case 0x11:
+		ret = write_ldt(ptr, bytecount, 0);
+		break;
+	}
+	return ret;
+}
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
new file mode 100644
index 00000000000..8600faeea29
--- /dev/null
+++ b/arch/i386/kernel/mca.c
@@ -0,0 +1,474 @@
+/*
+ *  linux/arch/i386/kernel/mca.c
+ *  Written by Martin Kolinek, February 1996
+ *
+ * Changes:
+ *
+ *	Chris Beauregard July 28th, 1996
+ *	- Fixed up integrated SCSI detection
+ *
+ *	Chris Beauregard August 3rd, 1996
+ *	- Made mca_info local
+ *	- Made integrated registers accessible through standard function calls
+ *	- Added name field
+ *	- More sanity checking
+ *
+ *	Chris Beauregard August 9th, 1996
+ *	- Rewrote /proc/mca
+ *
+ *	Chris Beauregard January 7th, 1997
+ *	- Added basic NMI-processing
+ *	- Added more information to mca_info structure
+ *
+ *	David Weinehall October 12th, 1998
+ *	- Made a lot of cleaning up in the source
+ *	- Added use of save_flags / restore_flags
+ *	- Added the 'driver_loaded' flag in MCA_adapter
+ *	- Added an alternative implemention of ZP Gu's mca_find_unused_adapter
+ *
+ *	David Weinehall March 24th, 1999
+ *	- Fixed the output of 'Driver Installed' in /proc/mca/pos
+ *	- Made the Integrated Video & SCSI show up even if they have id 0000
+ *
+ *	Alexander Viro November 9th, 1999
+ *	- Switched to regular procfs methods
+ *
+ *	Alfred Arnold & David Weinehall August 23rd, 2000
+ *	- Added support for Planar POS-registers
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mca.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <linux/proc_fs.h>
+#include <linux/mman.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/ioport.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <asm/arch_hooks.h>
+
+static unsigned char which_scsi = 0;
+
+int MCA_bus = 0;
+EXPORT_SYMBOL(MCA_bus);
+
+/*
+ * Motherboard register spinlock. Untested on SMP at the moment, but
+ * are there any MCA SMP boxes?
+ *
+ * Yes - Alan
+ */
+static DEFINE_SPINLOCK(mca_lock);
+
+/* Build the status info for the adapter */
+
+static void mca_configure_adapter_status(struct mca_device *mca_dev) {
+	mca_dev->status = MCA_ADAPTER_NONE;
+
+	mca_dev->pos_id = mca_dev->pos[0]
+		+ (mca_dev->pos[1] << 8);
+
+	if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
+
+		/* id = 0x0000 usually indicates hardware failure,
+		 * however, ZP Gu (zpg@castle.net> reports that his 9556
+		 * has 0x0000 as id and everything still works. There
+		 * also seem to be an adapter with id = 0x0000; the
+		 * NCR Parallel Bus Memory Card. Until this is confirmed,
+		 * however, this code will stay.
+		 */
+
+		mca_dev->status = MCA_ADAPTER_ERROR;
+
+		return;
+	} else if(mca_dev->pos_id != 0xffff) {
+
+		/* 0xffff usually indicates that there's no adapter,
+		 * however, some integrated adapters may have 0xffff as
+		 * their id and still be valid. Examples are on-board
+		 * VGA of the 55sx, the integrated SCSI of the 56 & 57,
+		 * and possibly also the 95 ULTIMEDIA.
+		 */
+
+		mca_dev->status = MCA_ADAPTER_NORMAL;
+	}
+
+	if((mca_dev->pos_id == 0xffff ||
+	    mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) {
+		int j;
+
+		for(j = 2; j < 8; j++) {
+			if(mca_dev->pos[j] != 0xff) {
+				mca_dev->status = MCA_ADAPTER_NORMAL;
+				break;
+			}
+		}
+	}
+
+	if(!(mca_dev->pos[2] & MCA_ENABLED)) {
+
+		/* enabled bit is in POS 2 */
+
+		mca_dev->status = MCA_ADAPTER_DISABLED;
+	}
+} /* mca_configure_adapter_status */
+
+/*--------------------------------------------------------------------*/
+
+static struct resource mca_standard_resources[] = {
+	{ .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" },
+	{ .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" },
+	{ .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" },
+	{ .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" },
+	{ .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" },
+	{ .start = 0x96, .end = 0x97, .name = "POS (MCA)" },
+	{ .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
+};
+
+#define MCA_STANDARD_RESOURCES	(sizeof(mca_standard_resources)/sizeof(struct resource))
+
+/**
+ *	mca_read_and_store_pos - read the POS registers into a memory buffer
+ *      @pos: a char pointer to 8 bytes, contains the POS register value on
+ *            successful return
+ *
+ *	Returns 1 if a card actually exists (i.e. the pos isn't
+ *	all 0xff) or 0 otherwise
+ */
+static int mca_read_and_store_pos(unsigned char *pos) {
+	int j;
+	int found = 0;
+
+	for(j=0; j<8; j++) {
+		if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) {
+			/* 0xff all across means no device. 0x00 means
+			 * something's broken, but a device is
+			 * probably there.  However, if you get 0x00
+			 * from a motherboard register it won't matter
+			 * what we find.  For the record, on the
+			 * 57SLC, the integrated SCSI adapter has
+			 * 0xffff for the adapter ID, but nonzero for
+			 * other registers.  */
+
+			found = 1;
+		}
+	}
+	return found;
+}
+
+static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg)
+{
+	unsigned char byte;
+	unsigned long flags;
+
+	if(reg < 0 || reg >= 8)
+		return 0;
+
+	spin_lock_irqsave(&mca_lock, flags);
+	if(mca_dev->pos_register) {
+		/* Disable adapter setup, enable motherboard setup */
+
+		outb_p(0, MCA_ADAPTER_SETUP_REG);
+		outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
+
+		byte = inb_p(MCA_POS_REG(reg));
+		outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
+	} else {
+
+		/* Make sure motherboard setup is off */
+
+		outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
+
+		/* Read the appropriate register */
+
+		outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG);
+		byte = inb_p(MCA_POS_REG(reg));
+		outb_p(0, MCA_ADAPTER_SETUP_REG);
+	}
+	spin_unlock_irqrestore(&mca_lock, flags);
+
+	mca_dev->pos[reg] = byte;
+
+	return byte;
+}
+
+static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
+			     unsigned char byte)
+{
+	unsigned long flags;
+
+	if(reg < 0 || reg >= 8)
+		return;
+
+	spin_lock_irqsave(&mca_lock, flags);
+
+	/* Make sure motherboard setup is off */
+
+	outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
+
+	/* Read in the appropriate register */
+
+	outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG);
+	outb_p(byte, MCA_POS_REG(reg));
+	outb_p(0, MCA_ADAPTER_SETUP_REG);
+
+	spin_unlock_irqrestore(&mca_lock, flags);
+
+	/* Update the global register list, while we have the byte */
+
+	mca_dev->pos[reg] = byte;
+
+}
+
+/* for the primary MCA bus, we have identity transforms */
+static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq)
+{
+	return irq;
+}
+
+static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port)
+{
+	return port;
+}
+
+static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem)
+{
+	return mem;
+}
+
+
+static int __init mca_init(void)
+{
+	unsigned int i, j;
+	struct mca_device *mca_dev;
+	unsigned char pos[8];
+	short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00};
+	struct mca_bus *bus;
+
+	/* WARNING: Be careful when making changes here. Putting an adapter
+	 * and the motherboard simultaneously into setup mode may result in
+	 * damage to chips (according to The Indispensible PC Hardware Book
+	 * by Hans-Peter Messmer). Also, we disable system interrupts (so
+	 * that we are not disturbed in the middle of this).
+	 */
+
+	/* Make sure the MCA bus is present */
+
+	if (mca_system_init()) {
+		printk(KERN_ERR "MCA bus system initialisation failed\n");
+		return -ENODEV;
+	}
+
+	if (!MCA_bus)
+		return -ENODEV;
+
+	printk(KERN_INFO "Micro Channel bus detected.\n");
+
+	/* All MCA systems have at least a primary bus */
+	bus = mca_attach_bus(MCA_PRIMARY_BUS);
+	if (!bus)
+		goto out_nomem;
+	bus->default_dma_mask = 0xffffffffLL;
+	bus->f.mca_write_pos = mca_pc_write_pos;
+	bus->f.mca_read_pos = mca_pc_read_pos;
+	bus->f.mca_transform_irq = mca_dummy_transform_irq;
+	bus->f.mca_transform_ioport = mca_dummy_transform_ioport;
+	bus->f.mca_transform_memory = mca_dummy_transform_memory;
+
+	/* get the motherboard device */
+	mca_dev = kmalloc(sizeof(struct mca_device), GFP_KERNEL);
+	if(unlikely(!mca_dev))
+		goto out_nomem;
+	memset(mca_dev, 0, sizeof(struct mca_device));
+
+	/*
+	 * We do not expect many MCA interrupts during initialization,
+	 * but let us be safe:
+	 */
+	spin_lock_irq(&mca_lock);
+
+	/* Make sure adapter setup is off */
+
+	outb_p(0, MCA_ADAPTER_SETUP_REG);
+
+	/* Read motherboard POS registers */
+
+	mca_dev->pos_register = 0x7f;
+	outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
+	mca_dev->name[0] = 0;
+	mca_read_and_store_pos(mca_dev->pos);
+	mca_configure_adapter_status(mca_dev);
+	/* fake POS and slot for a motherboard */
+	mca_dev->pos_id = MCA_MOTHERBOARD_POS;
+	mca_dev->slot = MCA_MOTHERBOARD;
+	mca_register_device(MCA_PRIMARY_BUS, mca_dev);
+
+	mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
+	if(unlikely(!mca_dev))
+		goto out_unlock_nomem;
+	memset(mca_dev, 0, sizeof(struct mca_device));
+
+
+	/* Put motherboard into video setup mode, read integrated video
+	 * POS registers, and turn motherboard setup off.
+	 */
+
+	mca_dev->pos_register = 0xdf;
+	outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
+	mca_dev->name[0] = 0;
+	mca_read_and_store_pos(mca_dev->pos);
+	mca_configure_adapter_status(mca_dev);
+	/* fake POS and slot for the integrated video */
+	mca_dev->pos_id = MCA_INTEGVIDEO_POS;
+	mca_dev->slot = MCA_INTEGVIDEO;
+	mca_register_device(MCA_PRIMARY_BUS, mca_dev);
+
+	/* Put motherboard into scsi setup mode, read integrated scsi
+	 * POS registers, and turn motherboard setup off.
+	 *
+	 * It seems there are two possible SCSI registers. Martin says that
+	 * for the 56,57, 0xf7 is the one, but fails on the 76.
+	 * Alfredo (apena@vnet.ibm.com) says
+	 * 0xfd works on his machine. We'll try both of them. I figure it's
+	 * a good bet that only one could be valid at a time. This could
+	 * screw up though if one is used for something else on the other
+	 * machine.
+	 */
+
+	for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
+		outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG);
+		if(mca_read_and_store_pos(pos))
+			break;
+	}
+	if(which_scsi) {
+		/* found a scsi card */
+		mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
+		if(unlikely(!mca_dev))
+			goto out_unlock_nomem;
+		memset(mca_dev, 0, sizeof(struct mca_device));
+
+		for(j = 0; j < 8; j++)
+			mca_dev->pos[j] = pos[j];
+
+		mca_configure_adapter_status(mca_dev);
+		/* fake POS and slot for integrated SCSI controller */
+		mca_dev->pos_id = MCA_INTEGSCSI_POS;
+		mca_dev->slot = MCA_INTEGSCSI;
+		mca_dev->pos_register = which_scsi;
+		mca_register_device(MCA_PRIMARY_BUS, mca_dev);
+	}
+
+	/* Turn off motherboard setup */
+
+	outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
+
+	/* Now loop over MCA slots: put each adapter into setup mode, and
+	 * read its POS registers. Then put adapter setup off.
+	 */
+
+	for(i=0; i<MCA_MAX_SLOT_NR; i++) {
+		outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG);
+		if(!mca_read_and_store_pos(pos))
+			continue;
+
+		mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
+		if(unlikely(!mca_dev))
+			goto out_unlock_nomem;
+		memset(mca_dev, 0, sizeof(struct mca_device));
+
+		for(j=0; j<8; j++)
+			mca_dev->pos[j]=pos[j];
+
+		mca_dev->driver_loaded = 0;
+		mca_dev->slot = i;
+		mca_dev->pos_register = 0;
+		mca_configure_adapter_status(mca_dev);
+		mca_register_device(MCA_PRIMARY_BUS, mca_dev);
+	}
+	outb_p(0, MCA_ADAPTER_SETUP_REG);
+
+	/* Enable interrupts and return memory start */
+	spin_unlock_irq(&mca_lock);
+
+	for (i = 0; i < MCA_STANDARD_RESOURCES; i++)
+		request_resource(&ioport_resource, mca_standard_resources + i);
+
+	mca_do_proc_init();
+
+	return 0;
+
+ out_unlock_nomem:
+	spin_unlock_irq(&mca_lock);
+ out_nomem:
+	printk(KERN_EMERG "Failed memory allocation in MCA setup!\n");
+	return -ENOMEM;
+}
+
+subsys_initcall(mca_init);
+
+/*--------------------------------------------------------------------*/
+
+static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
+{
+	int slot = mca_dev->slot;
+
+	if(slot == MCA_INTEGSCSI) {
+		printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n",
+			mca_dev->name);
+	} else if(slot == MCA_INTEGVIDEO) {
+		printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n",
+			mca_dev->name);
+	} else if(slot == MCA_MOTHERBOARD) {
+		printk(KERN_CRIT "NMI: caused by motherboard (%s)\n",
+			mca_dev->name);
+	}
+
+	/* More info available in POS 6 and 7? */
+
+	if(check_flag) {
+		unsigned char pos6, pos7;
+
+		pos6 = mca_device_read_pos(mca_dev, 6);
+		pos7 = mca_device_read_pos(mca_dev, 7);
+
+		printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7);
+	}
+
+} /* mca_handle_nmi_slot */
+
+/*--------------------------------------------------------------------*/
+
+static int mca_handle_nmi_callback(struct device *dev, void *data)
+{
+	struct mca_device *mca_dev = to_mca_device(dev);
+	unsigned char pos5;
+
+	pos5 = mca_device_read_pos(mca_dev, 5);
+
+	if(!(pos5 & 0x80)) {
+		/* Bit 7 of POS 5 is reset when this adapter has a hardware
+		 * error. Bit 7 it reset if there's error information
+		 * available in POS 6 and 7.
+		 */
+		mca_handle_nmi_device(mca_dev, !(pos5 & 0x40));
+		return 1;
+	}
+	return 0;
+}
+
+void mca_handle_nmi(void)
+{
+	/* First try - scan the various adapters and see if a specific
+	 * adapter was responsible for the error.
+	 */
+	bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
+
+	mca_nmi_hook();
+} /* mca_handle_nmi */
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
new file mode 100644
index 00000000000..a77c612aad0
--- /dev/null
+++ b/arch/i386/kernel/microcode.c
@@ -0,0 +1,512 @@
+/*
+ *	Intel CPU Microcode Update Driver for Linux
+ *
+ *	Copyright (C) 2000-2004 Tigran Aivazian
+ *
+ *	This driver allows to upgrade microcode on Intel processors
+ *	belonging to IA-32 family - PentiumPro, Pentium II, 
+ *	Pentium III, Xeon, Pentium 4, etc.
+ *
+ *	Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, 
+ *	Order Number 245472 or free download from:
+ *		
+ *	http://developer.intel.com/design/pentium4/manuals/245472.htm
+ *
+ *	For more information, go to http://www.urbanmyth.org/microcode
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	1.0	16 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Initial release.
+ *	1.01	18 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Added read() support + cleanups.
+ *	1.02	21 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Added 'device trimming' support. open(O_WRONLY) zeroes
+ *		and frees the saved copy of applied microcode.
+ *	1.03	29 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Made to use devfs (/dev/cpu/microcode) + cleanups.
+ *	1.04	06 Jun 2000, Simon Trimmer <simon@veritas.com>
+ *		Added misc device support (now uses both devfs and misc).
+ *		Added MICROCODE_IOCFREE ioctl to clear memory.
+ *	1.05	09 Jun 2000, Simon Trimmer <simon@veritas.com>
+ *		Messages for error cases (non Intel & no suitable microcode).
+ *	1.06	03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
+ *		Removed ->release(). Removed exclusive open and status bitmap.
+ *		Added microcode_rwsem to serialize read()/write()/ioctl().
+ *		Removed global kernel lock usage.
+ *	1.07	07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
+ *		Write 0 to 0x8B msr and then cpuid before reading revision,
+ *		so that it works even if there were no update done by the
+ *		BIOS. Otherwise, reading from 0x8B gives junk (which happened
+ *		to be 0 on my machine which is why it worked even when I
+ *		disabled update by the BIOS)
+ *		Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
+ *	1.08	11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
+ *			     Tigran Aivazian <tigran@veritas.com>
+ *		Intel Pentium 4 processor support and bugfixes.
+ *	1.09	30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
+ *		Bugfix for HT (Hyper-Threading) enabled processors
+ *		whereby processor resources are shared by all logical processors
+ *		in a single CPU package.
+ *	1.10	28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
+ *		Tigran Aivazian <tigran@veritas.com>,
+ *		Serialize updates as required on HT processors due to speculative
+ *		nature of implementation.
+ *	1.11	22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
+ *		Fix the panic when writing zero-length microcode chunk.
+ *	1.12	29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, 
+ *		Jun Nakajima <jun.nakajima@intel.com>
+ *		Support for the microcode updates in the new format.
+ *	1.13	10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
+ *		Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
+ *		because we no longer hold a copy of applied microcode 
+ *		in kernel memory.
+ *	1.14	25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
+ *		Fix sigmatch() macro to handle old CPUs with pf == 0.
+ *		Thanks to Stuart Swales for pointing out this bug.
+ */
+
+//#define DEBUG /* pr_debug */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
+MODULE_LICENSE("GPL");
+
+#define MICROCODE_VERSION 	"1.14"
+
+#define DEFAULT_UCODE_DATASIZE 	(2000) 	  /* 2000 bytes */
+#define MC_HEADER_SIZE		(sizeof (microcode_header_t))  	  /* 48 bytes */
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
+#define EXT_HEADER_SIZE		(sizeof (struct extended_sigtable)) /* 20 bytes */
+#define EXT_SIGNATURE_SIZE	(sizeof (struct extended_signature)) /* 12 bytes */
+#define DWSIZE			(sizeof (u32))
+#define get_totalsize(mc) \
+	(((microcode_t *)mc)->hdr.totalsize ? \
+	 ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
+#define get_datasize(mc) \
+	(((microcode_t *)mc)->hdr.datasize ? \
+	 ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
+
+#define sigmatch(s1, s2, p1, p2) \
+	(((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
+
+#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
+
+/* serialize access to the physical write to MSR 0x79 */
+static DEFINE_SPINLOCK(microcode_update_lock);
+
+/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+static DECLARE_MUTEX(microcode_sem);
+
+static void __user *user_buffer;	/* user area microcode data buffer */
+static unsigned int user_buffer_size;	/* it's size */
+
+typedef enum mc_error_code {
+	MC_SUCCESS 	= 0,
+	MC_NOTFOUND 	= 1,
+	MC_MARKED 	= 2,
+	MC_ALLOCATED 	= 3,
+} mc_error_code_t;
+
+static struct ucode_cpu_info {
+	unsigned int sig;
+	unsigned int pf;
+	unsigned int rev;
+	unsigned int cksum;
+	mc_error_code_t err;
+	microcode_t *mc;
+} ucode_cpu_info[NR_CPUS];
+				
+static int microcode_open (struct inode *unused1, struct file *unused2)
+{
+	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+static void collect_cpu_info (void *unused)
+{
+	int cpu_num = smp_processor_id();
+	struct cpuinfo_x86 *c = cpu_data + cpu_num;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+	unsigned int val[2];
+
+	uci->sig = uci->pf = uci->rev = uci->cksum = 0;
+	uci->err = MC_NOTFOUND; 
+	uci->mc = NULL;
+
+	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
+	    	cpu_has(c, X86_FEATURE_IA64)) {
+		printk(KERN_ERR "microcode: CPU%d not a capable Intel processor\n", cpu_num);
+		return;
+	} else {
+		uci->sig = cpuid_eax(0x00000001);
+
+		if ((c->x86_model >= 5) || (c->x86 > 6)) {
+			/* get processor flags from MSR 0x17 */
+			rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+			uci->pf = 1 << ((val[1] >> 18) & 7);
+		}
+	}
+
+	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+	__asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
+	/* get the current revision from MSR 0x8B */
+	rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
+	pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
+			uci->sig, uci->pf, uci->rev);
+}
+
+static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_header, int sig, int pf, int cksum)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+
+	pr_debug("Microcode Found.\n");
+	pr_debug("   Header Revision 0x%x\n", mc_header->hdrver);
+	pr_debug("   Loader Revision 0x%x\n", mc_header->ldrver);
+	pr_debug("   Revision 0x%x \n", mc_header->rev);
+	pr_debug("   Date %x/%x/%x\n",
+		((mc_header->date >> 24 ) & 0xff),
+		((mc_header->date >> 16 ) & 0xff),
+		(mc_header->date & 0xFFFF));
+	pr_debug("   Signature 0x%x\n", sig);
+	pr_debug("   Type 0x%x Family 0x%x Model 0x%x Stepping 0x%x\n",
+		((sig >> 12) & 0x3),
+		((sig >> 8) & 0xf),
+		((sig >> 4) & 0xf),
+		((sig & 0xf)));
+	pr_debug("   Processor Flags 0x%x\n", pf);
+	pr_debug("   Checksum 0x%x\n", cksum);
+
+	if (mc_header->rev < uci->rev) {
+		printk(KERN_ERR "microcode: CPU%d not 'upgrading' to earlier revision"
+		       " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
+		goto out;
+	} else if (mc_header->rev == uci->rev) {
+		/* notify the caller of success on this cpu */
+		uci->err = MC_SUCCESS;
+		printk(KERN_ERR "microcode: CPU%d already at revision"
+			" 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
+		goto out;
+	}
+
+	pr_debug("microcode: CPU%d found a matching microcode update with "
+		" revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
+	uci->cksum = cksum;
+	uci->pf = pf; /* keep the original mc pf for cksum calculation */
+	uci->err = MC_MARKED; /* found the match */
+out:
+	return;
+}
+
+static int find_matching_ucodes (void) 
+{
+	int cursor = 0;
+	int error = 0;
+
+	while (cursor + MC_HEADER_SIZE < user_buffer_size) {
+		microcode_header_t mc_header;
+		void *newmc = NULL;
+		int i, sum, cpu_num, allocated_flag, total_size, data_size, ext_table_size;
+
+		if (copy_from_user(&mc_header, user_buffer + cursor, MC_HEADER_SIZE)) {
+			printk(KERN_ERR "microcode: error! Can not read user data\n");
+			error = -EFAULT;
+			goto out;
+		}
+
+		total_size = get_totalsize(&mc_header);
+		if ((cursor + total_size > user_buffer_size) || (total_size < DEFAULT_UCODE_TOTALSIZE)) {
+			printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+			error = -EINVAL;
+			goto out;
+		}
+
+		data_size = get_datasize(&mc_header);
+		if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < DEFAULT_UCODE_DATASIZE)) {
+			printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+			error = -EINVAL;
+			goto out;
+		}
+
+		if (mc_header.ldrver != 1 || mc_header.hdrver != 1) {
+			printk(KERN_ERR "microcode: error! Unknown microcode update format\n");
+			error = -EINVAL;
+			goto out;
+		}
+		
+		for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) {
+			struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+			if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/
+				continue;
+
+			if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->pf))
+				mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum);
+		}
+
+		ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
+		if (ext_table_size) {
+			struct extended_sigtable ext_header;
+			struct extended_signature ext_sig;
+			int ext_sigcount;
+
+			if ((ext_table_size < EXT_HEADER_SIZE) 
+					|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
+				printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+				error = -EINVAL;
+				goto out;
+			}
+			if (copy_from_user(&ext_header, user_buffer + cursor 
+					+ MC_HEADER_SIZE + data_size, EXT_HEADER_SIZE)) {
+				printk(KERN_ERR "microcode: error! Can not read user data\n");
+				error = -EFAULT;
+				goto out;
+			}
+			if (ext_table_size != exttable_size(&ext_header)) {
+				printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+				error = -EFAULT;
+				goto out;
+			}
+
+			ext_sigcount = ext_header.count;
+			
+			for (i = 0; i < ext_sigcount; i++) {
+				if (copy_from_user(&ext_sig, user_buffer + cursor + MC_HEADER_SIZE + data_size + EXT_HEADER_SIZE 
+						+ EXT_SIGNATURE_SIZE * i, EXT_SIGNATURE_SIZE)) {
+					printk(KERN_ERR "microcode: error! Can not read user data\n");
+					error = -EFAULT;
+					goto out;
+				}
+				for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) {
+					struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+					if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/
+						continue;
+					if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->pf)) {
+						mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum);
+					}
+				}
+			}
+		}
+		/* now check if any cpu has matched */
+		for (cpu_num = 0, allocated_flag = 0, sum = 0; cpu_num < num_online_cpus(); cpu_num++) {
+			if (ucode_cpu_info[cpu_num].err == MC_MARKED) { 
+				struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+				if (!allocated_flag) {
+					allocated_flag = 1;
+					newmc = vmalloc(total_size);
+					if (!newmc) {
+						printk(KERN_ERR "microcode: error! Can not allocate memory\n");
+						error = -ENOMEM;
+						goto out;
+					}
+					if (copy_from_user(newmc + MC_HEADER_SIZE, 
+								user_buffer + cursor + MC_HEADER_SIZE, 
+								total_size - MC_HEADER_SIZE)) {
+						printk(KERN_ERR "microcode: error! Can not read user data\n");
+						vfree(newmc);
+						error = -EFAULT;
+						goto out;
+					}
+					memcpy(newmc, &mc_header, MC_HEADER_SIZE);
+					/* check extended table checksum */
+					if (ext_table_size) {
+						int ext_table_sum = 0;
+						int * ext_tablep = (((void *) newmc) + MC_HEADER_SIZE + data_size);
+						i = ext_table_size / DWSIZE;
+						while (i--) ext_table_sum += ext_tablep[i];
+						if (ext_table_sum) {
+							printk(KERN_WARNING "microcode: aborting, bad extended signature table checksum\n");
+							vfree(newmc);
+							error = -EINVAL;
+							goto out;
+						}
+					}
+
+					/* calculate the checksum */
+					i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+					while (i--) sum += ((int *)newmc)[i];
+					sum -= (mc_header.sig + mc_header.pf + mc_header.cksum);
+				}
+				ucode_cpu_info[cpu_num].mc = newmc;
+				ucode_cpu_info[cpu_num].err = MC_ALLOCATED; /* mc updated */
+				if (sum + uci->sig + uci->pf + uci->cksum != 0) {
+					printk(KERN_ERR "microcode: CPU%d aborting, bad checksum\n", cpu_num);
+					error = -EINVAL;
+					goto out;
+				}
+			}
+		}
+		cursor += total_size; /* goto the next update patch */
+	} /* end of while */
+out:
+	return error;
+}
+
+static void do_update_one (void * unused)
+{
+	unsigned long flags;
+	unsigned int val[2];
+	int cpu_num = smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+
+	if (uci->mc == NULL) {
+		printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num);
+		return;
+	}
+
+	/* serialize access to the physical write to MSR 0x79 */
+	spin_lock_irqsave(&microcode_update_lock, flags);          
+
+	/* write microcode via MSR 0x79 */
+	wrmsr(MSR_IA32_UCODE_WRITE,
+		(unsigned long) uci->mc->bits, 
+		(unsigned long) uci->mc->bits >> 16 >> 16);
+	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+	__asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
+	/* get the current revision from MSR 0x8B */
+	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+
+	/* notify the caller of success on this cpu */
+	uci->err = MC_SUCCESS;
+	spin_unlock_irqrestore(&microcode_update_lock, flags);
+	printk(KERN_INFO "microcode: CPU%d updated from revision "
+	       "0x%x to 0x%x, date = %08x \n", 
+	       cpu_num, uci->rev, val[1], uci->mc->hdr.date);
+	return;
+}
+
+static int do_microcode_update (void)
+{
+	int i, error;
+
+	if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) {
+		printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
+		error = -EIO;
+		goto out;
+	}
+
+	if ((error = find_matching_ucodes())) {
+		printk(KERN_ERR "microcode: Error in the microcode data\n");
+		goto out_free;
+	}
+
+	if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) {
+		printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
+		error = -EIO;
+	}
+
+out_free:
+	for (i = 0; i < num_online_cpus(); i++) {
+		if (ucode_cpu_info[i].mc) {
+			int j;
+			void *tmp = ucode_cpu_info[i].mc;
+			vfree(tmp);
+			for (j = i; j < num_online_cpus(); j++) {
+				if (ucode_cpu_info[j].mc == tmp)
+					ucode_cpu_info[j].mc = NULL;
+			}
+		}
+	}
+out:
+	return error;
+}
+
+static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
+{
+	ssize_t ret;
+
+	if (len < DEFAULT_UCODE_TOTALSIZE) {
+		printk(KERN_ERR "microcode: not enough data\n"); 
+		return -EINVAL;
+	}
+
+	if ((len >> PAGE_SHIFT) > num_physpages) {
+		printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
+		return -EINVAL;
+	}
+
+	down(&microcode_sem);
+
+	user_buffer = (void __user *) buf;
+	user_buffer_size = (int) len;
+
+	ret = do_microcode_update();
+	if (!ret)
+		ret = (ssize_t)len;
+
+	up(&microcode_sem);
+
+	return ret;
+}
+
+static int microcode_ioctl (struct inode *inode, struct file *file, 
+		unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+		/* 
+		 *  XXX: will be removed after microcode_ctl 
+		 *  is updated to ignore failure of this ioctl()
+		 */
+		case MICROCODE_IOCFREE:
+			return 0;
+		default:
+			return -EINVAL;
+	}
+	return -EINVAL;
+}
+
+static struct file_operations microcode_fops = {
+	.owner		= THIS_MODULE,
+	.write		= microcode_write,
+	.ioctl		= microcode_ioctl,
+	.open		= microcode_open,
+};
+
+static struct miscdevice microcode_dev = {
+	.minor		= MICROCODE_MINOR,
+	.name		= "microcode",
+	.devfs_name	= "cpu/microcode",
+	.fops		= &microcode_fops,
+};
+
+static int __init microcode_init (void)
+{
+	int error;
+
+	error = misc_register(&microcode_dev);
+	if (error) {
+		printk(KERN_ERR
+			"microcode: can't misc_register on minor=%d\n",
+			MICROCODE_MINOR);
+		return error;
+	}
+
+	printk(KERN_INFO 
+		"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
+	return 0;
+}
+
+static void __exit microcode_exit (void)
+{
+	misc_deregister(&microcode_dev);
+	printk(KERN_INFO "IA-32 Microcode Update Driver v" MICROCODE_VERSION " unregistered\n");
+}
+
+module_init(microcode_init)
+module_exit(microcode_exit)
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c
new file mode 100644
index 00000000000..5149c8a621f
--- /dev/null
+++ b/arch/i386/kernel/module.c
@@ -0,0 +1,129 @@
+/*  Kernel module help for i386.
+    Copyright (C) 2001 Rusty Russell.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc_exec(size);
+}
+
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
+}
+
+/* We don't need anything special. */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+			      Elf_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *mod)
+{
+	return 0;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	uint32_t *location;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_386_32:
+			/* We add the value into the location given */
+			*location += sym->st_value;
+			break;
+		case R_386_PC32:
+			/* Add the value, subtract its postition */
+			*location += sym->st_value - (uint32_t)location;
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
+	       me->name);
+	return -ENOEXEC;
+}
+
+extern void apply_alternatives(void *start, void *end); 
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s;
+	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	/* look for .altinstructions to patch */ 
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
+		void *seg; 		
+		if (strcmp(".altinstructions", secstrings + s->sh_name))
+			continue;
+		seg = (void *)s->sh_addr; 
+		apply_alternatives(seg, seg + s->sh_size); 
+	} 	
+	return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+}
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
new file mode 100644
index 00000000000..1347ab4939e
--- /dev/null
+++ b/arch/i386/kernel/mpparse.c
@@ -0,0 +1,1109 @@
+/*
+ *	Intel Multiprocessor Specification 1.1 and 1.4
+ *	compliant MP-table parsing routines.
+ *
+ *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *		Erich Boleyn	:	MP v1.4 and additional changes.
+ *		Alan Cox	:	Added EBDA scanning
+ *		Ingo Molnar	:	various cleanups and rewrites
+ *		Maciej W. Rozycki:	Bits for default MP configurations
+ *		Paul Diefenbaugh:	Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bitops.h>
+
+#include <asm/smp.h>
+#include <asm/acpi.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#include <bios_ebda.h>
+
+/* Have we found an MP table */
+int smp_found_config;
+unsigned int __initdata maxcpus = NR_CPUS;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES];
+int mp_bus_id_to_node [MAX_MP_BUSSES];
+int mp_bus_id_to_local [MAX_MP_BUSSES];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+static int mp_current_pci_id;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_logical_apicid = -1U;
+/* Internal processor count */
+static unsigned int __initdata num_processors;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+	int sum = 0;
+
+	while (len--)
+		sum += *mp++;
+
+	return sum & 0xFF;
+}
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record; 
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
+
+#ifdef CONFIG_X86_NUMAQ
+static int MP_valid_apicid(int apicid, int version)
+{
+	return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
+}
+#else
+static int MP_valid_apicid(int apicid, int version)
+{
+	if (version >= 0x14)
+		return apicid < 0xff;
+	else
+		return apicid < 0xf;
+}
+#endif
+
+static void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ 	int ver, apicid;
+	physid_mask_t tmp;
+ 	
+	if (!(m->mpc_cpuflag & CPU_ENABLED))
+		return;
+
+	apicid = mpc_apic_id(m, translation_table[mpc_record]);
+
+	if (m->mpc_featureflag&(1<<0))
+		Dprintk("    Floating point unit present.\n");
+	if (m->mpc_featureflag&(1<<7))
+		Dprintk("    Machine Exception supported.\n");
+	if (m->mpc_featureflag&(1<<8))
+		Dprintk("    64 bit compare & exchange supported.\n");
+	if (m->mpc_featureflag&(1<<9))
+		Dprintk("    Internal APIC present.\n");
+	if (m->mpc_featureflag&(1<<11))
+		Dprintk("    SEP present.\n");
+	if (m->mpc_featureflag&(1<<12))
+		Dprintk("    MTRR  present.\n");
+	if (m->mpc_featureflag&(1<<13))
+		Dprintk("    PGE  present.\n");
+	if (m->mpc_featureflag&(1<<14))
+		Dprintk("    MCA  present.\n");
+	if (m->mpc_featureflag&(1<<15))
+		Dprintk("    CMOV  present.\n");
+	if (m->mpc_featureflag&(1<<16))
+		Dprintk("    PAT  present.\n");
+	if (m->mpc_featureflag&(1<<17))
+		Dprintk("    PSE  present.\n");
+	if (m->mpc_featureflag&(1<<18))
+		Dprintk("    PSN  present.\n");
+	if (m->mpc_featureflag&(1<<19))
+		Dprintk("    Cache Line Flush Instruction present.\n");
+	/* 20 Reserved */
+	if (m->mpc_featureflag&(1<<21))
+		Dprintk("    Debug Trace and EMON Store present.\n");
+	if (m->mpc_featureflag&(1<<22))
+		Dprintk("    ACPI Thermal Throttle Registers  present.\n");
+	if (m->mpc_featureflag&(1<<23))
+		Dprintk("    MMX  present.\n");
+	if (m->mpc_featureflag&(1<<24))
+		Dprintk("    FXSR  present.\n");
+	if (m->mpc_featureflag&(1<<25))
+		Dprintk("    XMM  present.\n");
+	if (m->mpc_featureflag&(1<<26))
+		Dprintk("    Willamette New Instructions  present.\n");
+	if (m->mpc_featureflag&(1<<27))
+		Dprintk("    Self Snoop  present.\n");
+	if (m->mpc_featureflag&(1<<28))
+		Dprintk("    HT  present.\n");
+	if (m->mpc_featureflag&(1<<29))
+		Dprintk("    Thermal Monitor present.\n");
+	/* 30, 31 Reserved */
+
+
+	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+		Dprintk("    Bootup CPU\n");
+		boot_cpu_physical_apicid = m->mpc_apicid;
+		boot_cpu_logical_apicid = apicid;
+	}
+
+	if (num_processors >= NR_CPUS) {
+		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+			"  Processor ignored.\n", NR_CPUS); 
+		return;
+	}
+
+	if (num_processors >= maxcpus) {
+		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+			" Processor ignored.\n", maxcpus); 
+		return;
+	}
+	num_processors++;
+	ver = m->mpc_apicver;
+
+	if (!MP_valid_apicid(apicid, ver)) {
+		printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
+			m->mpc_apicid, MAX_APICS);
+		--num_processors;
+		return;
+	}
+
+	tmp = apicid_to_cpu_present(apicid);
+	physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
+	
+	/*
+	 * Validate version
+	 */
+	if (ver == 0x0) {
+		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+		ver = 0x10;
+	}
+	apic_version[m->mpc_apicid] = ver;
+	bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+}
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+	char str[7];
+
+	memcpy(str, m->mpc_bustype, 6);
+	str[6] = 0;
+
+	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+
+	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+		mpc_oem_pci_bus(m, translation_table[mpc_record]);
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+		mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+		mp_current_pci_id++;
+	} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+	} else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
+	} else {
+		printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+	}
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+	if (!(m->mpc_flags & MPC_APIC_USABLE))
+		return;
+
+	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+		m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+	if (nr_ioapics >= MAX_IO_APICS) {
+		printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
+			MAX_IO_APICS, nr_ioapics);
+		panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+	}
+	if (!m->mpc_apicaddr) {
+		printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+			" found in MP table, skipping!\n");
+		return;
+	}
+	mp_ioapics[nr_ioapics] = *m;
+	nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+	mp_irqs [mp_irq_entries] = *m;
+	Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
+			m->mpc_irqtype, m->mpc_irqflag & 3,
+			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+	if (++mp_irq_entries == MAX_IRQ_SOURCES)
+		panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+	Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+		" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+			m->mpc_irqtype, m->mpc_irqflag & 3,
+			(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+			m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+	/*
+	 * Well it seems all SMP boards in existence
+	 * use ExtINT/LVT1 == LINT0 and
+	 * NMI/LVT2 == LINT1 - the following check
+	 * will show us if this assumptions is false.
+	 * Until then we do not have to add baggage.
+	 */
+	if ((m->mpc_irqtype == mp_ExtINT) &&
+		(m->mpc_destapiclint != 0))
+			BUG();
+	if ((m->mpc_irqtype == mp_NMI) &&
+		(m->mpc_destapiclint != 1))
+			BUG();
+}
+
+#ifdef CONFIG_X86_NUMAQ
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+	printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+
+	if (mpc_record >= MAX_MPC_ENTRY) 
+		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+	else
+		translation_table[mpc_record] = m; /* stash this for later */
+	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+		node_set_online(m->trans_quad);
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+	unsigned short oemsize)
+{
+	int count = sizeof (*oemtable); /* the header size */
+	unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+	
+	mpc_record = 0;
+	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+	if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+	{
+		printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+			oemtable->oem_signature[0],
+			oemtable->oem_signature[1],
+			oemtable->oem_signature[2],
+			oemtable->oem_signature[3]);
+		return;
+	}
+	if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+	{
+		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+		return;
+	}
+	while (count < oemtable->oem_length) {
+		switch (*oemptr) {
+			case MP_TRANSLATION:
+			{
+				struct mpc_config_translation *m=
+					(struct mpc_config_translation *)oemptr;
+				MP_translation_info(m);
+				oemptr += sizeof(*m);
+				count += sizeof(*m);
+				++mpc_record;
+				break;
+			}
+			default:
+			{
+				printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+				return;
+			}
+		}
+       }
+}
+
+static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+		char *productid)
+{
+	if (strncmp(oem, "IBM NUMA", 8))
+		printk("Warning!  May not be a NUMA-Q system!\n");
+	if (mpc->mpc_oemptr)
+		smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+				mpc->mpc_oemsize);
+}
+#endif	/* CONFIG_X86_NUMAQ */
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+	char str[16];
+	char oem[10];
+	int count=sizeof(*mpc);
+	unsigned char *mpt=((unsigned char *)mpc)+count;
+
+	if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+		printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+			*(u32 *)mpc->mpc_signature);
+		return 0;
+	}
+	if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+		printk(KERN_ERR "SMP mptable: checksum error!\n");
+		return 0;
+	}
+	if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+		printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+			mpc->mpc_spec);
+		return 0;
+	}
+	if (!mpc->mpc_lapic) {
+		printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+		return 0;
+	}
+	memcpy(oem,mpc->mpc_oem,8);
+	oem[8]=0;
+	printk(KERN_INFO "OEM ID: %s ",oem);
+
+	memcpy(str,mpc->mpc_productid,12);
+	str[12]=0;
+	printk("Product ID: %s ",str);
+
+	mps_oem_check(mpc, oem, str);
+
+	printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+	/* 
+	 * Save the local APIC address (it might be non-default) -- but only
+	 * if we're not using ACPI.
+	 */
+	if (!acpi_lapic)
+		mp_lapic_addr = mpc->mpc_lapic;
+
+	/*
+	 *	Now process the configuration blocks.
+	 */
+	mpc_record = 0;
+	while (count < mpc->mpc_length) {
+		switch(*mpt) {
+			case MP_PROCESSOR:
+			{
+				struct mpc_config_processor *m=
+					(struct mpc_config_processor *)mpt;
+				/* ACPI may have already provided this data */
+				if (!acpi_lapic)
+					MP_processor_info(m);
+				mpt += sizeof(*m);
+				count += sizeof(*m);
+				break;
+			}
+			case MP_BUS:
+			{
+				struct mpc_config_bus *m=
+					(struct mpc_config_bus *)mpt;
+				MP_bus_info(m);
+				mpt += sizeof(*m);
+				count += sizeof(*m);
+				break;
+			}
+			case MP_IOAPIC:
+			{
+				struct mpc_config_ioapic *m=
+					(struct mpc_config_ioapic *)mpt;
+				MP_ioapic_info(m);
+				mpt+=sizeof(*m);
+				count+=sizeof(*m);
+				break;
+			}
+			case MP_INTSRC:
+			{
+				struct mpc_config_intsrc *m=
+					(struct mpc_config_intsrc *)mpt;
+
+				MP_intsrc_info(m);
+				mpt+=sizeof(*m);
+				count+=sizeof(*m);
+				break;
+			}
+			case MP_LINTSRC:
+			{
+				struct mpc_config_lintsrc *m=
+					(struct mpc_config_lintsrc *)mpt;
+				MP_lintsrc_info(m);
+				mpt+=sizeof(*m);
+				count+=sizeof(*m);
+				break;
+			}
+			default:
+			{
+				count = mpc->mpc_length;
+				break;
+			}
+		}
+		++mpc_record;
+	}
+	clustered_apic_check();
+	if (!num_processors)
+		printk(KERN_ERR "SMP mptable: no processors registered!\n");
+	return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+	unsigned int port;
+
+	port = 0x4d0 + (irq >> 3);
+	return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+	struct mpc_config_intsrc intsrc;
+	int i;
+	int ELCR_fallback = 0;
+
+	intsrc.mpc_type = MP_INTSRC;
+	intsrc.mpc_irqflag = 0;			/* conforming */
+	intsrc.mpc_srcbus = 0;
+	intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+	intsrc.mpc_irqtype = mp_INT;
+
+	/*
+	 *  If true, we have an ISA/PCI system with no IRQ entries
+	 *  in the MP table. To prevent the PCI interrupts from being set up
+	 *  incorrectly, we try to use the ELCR. The sanity check to see if
+	 *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+	 *  never be level sensitive, so we simply see if the ELCR agrees.
+	 *  If it does, we assume it's valid.
+	 */
+	if (mpc_default_type == 5) {
+		printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+		if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+			printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
+		else {
+			printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
+			ELCR_fallback = 1;
+		}
+	}
+
+	for (i = 0; i < 16; i++) {
+		switch (mpc_default_type) {
+		case 2:
+			if (i == 0 || i == 13)
+				continue;	/* IRQ0 & IRQ13 not connected */
+			/* fall through */
+		default:
+			if (i == 2)
+				continue;	/* IRQ2 is never connected */
+		}
+
+		if (ELCR_fallback) {
+			/*
+			 *  If the ELCR indicates a level-sensitive interrupt, we
+			 *  copy that information over to the MP table in the
+			 *  irqflag field (level sensitive, active high polarity).
+			 */
+			if (ELCR_trigger(i))
+				intsrc.mpc_irqflag = 13;
+			else
+				intsrc.mpc_irqflag = 0;
+		}
+
+		intsrc.mpc_srcbusirq = i;
+		intsrc.mpc_dstirq = i ? i : 2;		/* IRQ0 to INTIN2 */
+		MP_intsrc_info(&intsrc);
+	}
+
+	intsrc.mpc_irqtype = mp_ExtINT;
+	intsrc.mpc_srcbusirq = 0;
+	intsrc.mpc_dstirq = 0;				/* 8259A to INTIN0 */
+	MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+	struct mpc_config_processor processor;
+	struct mpc_config_bus bus;
+	struct mpc_config_ioapic ioapic;
+	struct mpc_config_lintsrc lintsrc;
+	int linttypes[2] = { mp_ExtINT, mp_NMI };
+	int i;
+
+	/*
+	 * local APIC has default address
+	 */
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	/*
+	 * 2 CPUs, numbered 0 & 1.
+	 */
+	processor.mpc_type = MP_PROCESSOR;
+	/* Either an integrated APIC or a discrete 82489DX. */
+	processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+	processor.mpc_cpuflag = CPU_ENABLED;
+	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+				   (boot_cpu_data.x86_model << 4) |
+				   boot_cpu_data.x86_mask;
+	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+	processor.mpc_reserved[0] = 0;
+	processor.mpc_reserved[1] = 0;
+	for (i = 0; i < 2; i++) {
+		processor.mpc_apicid = i;
+		MP_processor_info(&processor);
+	}
+
+	bus.mpc_type = MP_BUS;
+	bus.mpc_busid = 0;
+	switch (mpc_default_type) {
+		default:
+			printk("???\n");
+			printk(KERN_ERR "Unknown standard configuration %d\n",
+				mpc_default_type);
+			/* fall through */
+		case 1:
+		case 5:
+			memcpy(bus.mpc_bustype, "ISA   ", 6);
+			break;
+		case 2:
+		case 6:
+		case 3:
+			memcpy(bus.mpc_bustype, "EISA  ", 6);
+			break;
+		case 4:
+		case 7:
+			memcpy(bus.mpc_bustype, "MCA   ", 6);
+	}
+	MP_bus_info(&bus);
+	if (mpc_default_type > 4) {
+		bus.mpc_busid = 1;
+		memcpy(bus.mpc_bustype, "PCI   ", 6);
+		MP_bus_info(&bus);
+	}
+
+	ioapic.mpc_type = MP_IOAPIC;
+	ioapic.mpc_apicid = 2;
+	ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+	ioapic.mpc_flags = MPC_APIC_USABLE;
+	ioapic.mpc_apicaddr = 0xFEC00000;
+	MP_ioapic_info(&ioapic);
+
+	/*
+	 * We set up most of the low 16 IO-APIC pins according to MPS rules.
+	 */
+	construct_default_ioirq_mptable(mpc_default_type);
+
+	lintsrc.mpc_type = MP_LINTSRC;
+	lintsrc.mpc_irqflag = 0;		/* conforming */
+	lintsrc.mpc_srcbusid = 0;
+	lintsrc.mpc_srcbusirq = 0;
+	lintsrc.mpc_destapic = MP_APIC_ALL;
+	for (i = 0; i < 2; i++) {
+		lintsrc.mpc_irqtype = linttypes[i];
+		lintsrc.mpc_destapiclint = i;
+		MP_lintsrc_info(&lintsrc);
+	}
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+	struct intel_mp_floating *mpf = mpf_found;
+
+	/*
+	 * ACPI may be used to obtain the entire SMP configuration or just to 
+	 * enumerate/configure processors (CONFIG_ACPI_BOOT).  Note that 
+	 * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+	 * processors, where MPS only supports physical.
+	 */
+	if (acpi_lapic && acpi_ioapic) {
+		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+		return;
+	}
+	else if (acpi_lapic)
+		printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+
+	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+	if (mpf->mpf_feature2 & (1<<7)) {
+		printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
+		pic_mode = 1;
+	} else {
+		printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
+		pic_mode = 0;
+	}
+
+	/*
+	 * Now see if we need to read further.
+	 */
+	if (mpf->mpf_feature1 != 0) {
+
+		printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
+		construct_default_ISA_mptable(mpf->mpf_feature1);
+
+	} else if (mpf->mpf_physptr) {
+
+		/*
+		 * Read the physical hardware table.  Anything here will
+		 * override the defaults.
+		 */
+		if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+			smp_found_config = 0;
+			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+			return;
+		}
+		/*
+		 * If there are no explicit MP IRQ entries, then we are
+		 * broken.  We set up most of the low 16 IO-APIC pins to
+		 * ISA defaults and hope it will work.
+		 */
+		if (!mp_irq_entries) {
+			struct mpc_config_bus bus;
+
+			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+			bus.mpc_type = MP_BUS;
+			bus.mpc_busid = 0;
+			memcpy(bus.mpc_bustype, "ISA   ", 6);
+			MP_bus_info(&bus);
+
+			construct_default_ioirq_mptable(0);
+		}
+
+	} else
+		BUG();
+
+	printk(KERN_INFO "Processors: %d\n", num_processors);
+	/*
+	 * Only use the first configuration found.
+	 */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+	unsigned long *bp = phys_to_virt(base);
+	struct intel_mp_floating *mpf;
+
+	Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+	if (sizeof(*mpf) != 16)
+		printk("Error: MPF size\n");
+
+	while (length > 0) {
+		mpf = (struct intel_mp_floating *)bp;
+		if ((*bp == SMP_MAGIC_IDENT) &&
+			(mpf->mpf_length == 1) &&
+			!mpf_checksum((unsigned char *)bp, 16) &&
+			((mpf->mpf_specification == 1)
+				|| (mpf->mpf_specification == 4)) ) {
+
+			smp_found_config = 1;
+			printk(KERN_INFO "found SMP MP-table at %08lx\n",
+						virt_to_phys(mpf));
+			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+			if (mpf->mpf_physptr) {
+				/*
+				 * We cannot access to MPC table to compute
+				 * table size yet, as only few megabytes from
+				 * the bottom is mapped now.
+				 * PC-9800's MPC table places on the very last
+				 * of physical memory; so that simply reserving
+				 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+				 * in reserve_bootmem.
+				 */
+				unsigned long size = PAGE_SIZE;
+				unsigned long end = max_low_pfn * PAGE_SIZE;
+				if (mpf->mpf_physptr + size > end)
+					size = end - mpf->mpf_physptr;
+				reserve_bootmem(mpf->mpf_physptr, size);
+			}
+
+			mpf_found = mpf;
+			return 1;
+		}
+		bp += 4;
+		length -= 16;
+	}
+	return 0;
+}
+
+void __init find_smp_config (void)
+{
+	unsigned int address;
+
+	/*
+	 * FIXME: Linux assumes you have 640K of base ram..
+	 * this continues the error...
+	 *
+	 * 1) Scan the bottom 1K for a signature
+	 * 2) Scan the top 1K of base RAM
+	 * 3) Scan the 64K of bios
+	 */
+	if (smp_scan_config(0x0,0x400) ||
+		smp_scan_config(639*0x400,0x400) ||
+			smp_scan_config(0xF0000,0x10000))
+		return;
+	/*
+	 * If it is an SMP machine we should know now, unless the
+	 * configuration is in an EISA/MCA bus machine with an
+	 * extended bios data area.
+	 *
+	 * there is a real-mode segmented pointer pointing to the
+	 * 4K EBDA area at 0x40E, calculate and scan it here.
+	 *
+	 * NOTE! There are Linux loaders that will corrupt the EBDA
+	 * area, and as such this kind of SMP config may be less
+	 * trustworthy, simply because the SMP table may have been
+	 * stomped on during early boot. These loaders are buggy and
+	 * should be fixed.
+	 *
+	 * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
+	 */
+
+	address = get_bios_ebda();
+	if (address)
+		smp_scan_config(address, 0x400);
+}
+
+/* --------------------------------------------------------------------------
+                            ACPI-based MP Configuration
+   -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+
+void __init mp_register_lapic_address (
+	u64			address)
+{
+	mp_lapic_addr = (unsigned long) address;
+
+	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+	Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+}
+
+
+void __init mp_register_lapic (
+	u8			id, 
+	u8			enabled)
+{
+	struct mpc_config_processor processor;
+	int			boot_cpu = 0;
+	
+	if (MAX_APICS - id <= 0) {
+		printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+			id, MAX_APICS);
+		return;
+	}
+
+	if (id == boot_cpu_physical_apicid)
+		boot_cpu = 1;
+
+	processor.mpc_type = MP_PROCESSOR;
+	processor.mpc_apicid = id;
+	processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
+	processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+	processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
+		(boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+	processor.mpc_reserved[0] = 0;
+	processor.mpc_reserved[1] = 0;
+
+	MP_processor_info(&processor);
+}
+
+#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT))
+
+#define MP_ISA_BUS		0
+#define MP_MAX_IOAPIC_PIN	127
+
+static struct mp_ioapic_routing {
+	int			apic_id;
+	int			gsi_base;
+	int			gsi_end;
+	u32			pin_programmed[4];
+} mp_ioapic_routing[MAX_IO_APICS];
+
+
+static int mp_find_ioapic (
+	int			gsi)
+{
+	int			i = 0;
+
+	/* Find the IOAPIC that manages this GSI. */
+	for (i = 0; i < nr_ioapics; i++) {
+		if ((gsi >= mp_ioapic_routing[i].gsi_base)
+			&& (gsi <= mp_ioapic_routing[i].gsi_end))
+			return i;
+	}
+
+	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+
+	return -1;
+}
+	
+
+void __init mp_register_ioapic (
+	u8			id, 
+	u32			address,
+	u32			gsi_base)
+{
+	int			idx = 0;
+
+	if (nr_ioapics >= MAX_IO_APICS) {
+		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+			"(found %d)\n", MAX_IO_APICS, nr_ioapics);
+		panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+	}
+	if (!address) {
+		printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+			" found in MADT table, skipping!\n");
+		return;
+	}
+
+	idx = nr_ioapics++;
+
+	mp_ioapics[idx].mpc_type = MP_IOAPIC;
+	mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+	mp_ioapics[idx].mpc_apicaddr = address;
+
+	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+	mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+	mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+	
+	/* 
+	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+	 */
+	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+	mp_ioapic_routing[idx].gsi_base = gsi_base;
+	mp_ioapic_routing[idx].gsi_end = gsi_base + 
+		io_apic_get_redir_entries(idx);
+
+	printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+		"GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
+		mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+		mp_ioapic_routing[idx].gsi_base,
+		mp_ioapic_routing[idx].gsi_end);
+
+	return;
+}
+
+
+void __init mp_override_legacy_irq (
+	u8			bus_irq,
+	u8			polarity, 
+	u8			trigger, 
+	u32			gsi)
+{
+	struct mpc_config_intsrc intsrc;
+	int			ioapic = -1;
+	int			pin = -1;
+
+	/* 
+	 * Convert 'gsi' to 'ioapic.pin'.
+	 */
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0)
+		return;
+	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+	/*
+	 * TBD: This check is for faulty timer entries, where the override
+	 *      erroneously sets the trigger to level, resulting in a HUGE 
+	 *      increase of timer interrupts!
+	 */
+	if ((bus_irq == 0) && (trigger == 3))
+		trigger = 1;
+
+	intsrc.mpc_type = MP_INTSRC;
+	intsrc.mpc_irqtype = mp_INT;
+	intsrc.mpc_irqflag = (trigger << 2) | polarity;
+	intsrc.mpc_srcbus = MP_ISA_BUS;
+	intsrc.mpc_srcbusirq = bus_irq;				       /* IRQ */
+	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;	   /* APIC ID */
+	intsrc.mpc_dstirq = pin;				    /* INTIN# */
+
+	Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+		intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+		intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+
+	mp_irqs[mp_irq_entries] = intsrc;
+	if (++mp_irq_entries == MAX_IRQ_SOURCES)
+		panic("Max # of irq sources exceeded!\n");
+
+	return;
+}
+
+int es7000_plat;
+
+void __init mp_config_acpi_legacy_irqs (void)
+{
+	struct mpc_config_intsrc intsrc;
+	int			i = 0;
+	int			ioapic = -1;
+
+	/* 
+	 * Fabricate the legacy ISA bus (bus #31).
+	 */
+	mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+	Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+	/*
+	 * Older generations of ES7000 have no legacy identity mappings
+	 */
+	if (es7000_plat == 1)
+		return;
+
+	/* 
+	 * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+	 */
+	ioapic = mp_find_ioapic(0);
+	if (ioapic < 0)
+		return;
+
+	intsrc.mpc_type = MP_INTSRC;
+	intsrc.mpc_irqflag = 0;					/* Conforming */
+	intsrc.mpc_srcbus = MP_ISA_BUS;
+	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+
+	/* 
+	 * Use the default configuration for the IRQs 0-15.  Unless
+	 * overriden by (MADT) interrupt source override entries.
+	 */
+	for (i = 0; i < 16; i++) {
+		int idx;
+
+		for (idx = 0; idx < mp_irq_entries; idx++) {
+			struct mpc_config_intsrc *irq = mp_irqs + idx;
+
+			/* Do we already have a mapping for this ISA IRQ? */
+			if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+				break;
+
+			/* Do we already have a mapping for this IOAPIC pin */
+			if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+				(irq->mpc_dstirq == i))
+				break;
+		}
+
+		if (idx != mp_irq_entries) {
+			printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+			continue;			/* IRQ already used */
+		}
+
+		intsrc.mpc_irqtype = mp_INT;
+		intsrc.mpc_srcbusirq = i;		   /* Identity mapped */
+		intsrc.mpc_dstirq = i;
+
+		Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+			intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+			intsrc.mpc_dstirq);
+
+		mp_irqs[mp_irq_entries] = intsrc;
+		if (++mp_irq_entries == MAX_IRQ_SOURCES)
+			panic("Max # of irq sources exceeded!\n");
+	}
+}
+
+int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
+{
+	int			ioapic = -1;
+	int			ioapic_pin = 0;
+	int			idx, bit = 0;
+
+#ifdef CONFIG_ACPI_BUS
+	/* Don't set up the ACPI SCI because it's already set up */
+	if (acpi_fadt.sci_int == gsi)
+		return gsi;
+#endif
+
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0) {
+		printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+		return gsi;
+	}
+
+	ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+	if (ioapic_renumber_irq)
+		gsi = ioapic_renumber_irq(ioapic, gsi);
+
+	/* 
+	 * Avoid pin reprogramming.  PRTs typically include entries  
+	 * with redundant pin->gsi mappings (but unique PCI devices);
+	 * we only program the IOAPIC on the first.
+	 */
+	bit = ioapic_pin % 32;
+	idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+	if (idx > 3) {
+		printk(KERN_ERR "Invalid reference to IOAPIC pin "
+			"%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
+			ioapic_pin);
+		return gsi;
+	}
+	if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+		return gsi;
+	}
+
+	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+	io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+		    edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
+		    active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
+	return gsi;
+}
+
+#endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/
+#endif /*CONFIG_ACPI_BOOT*/
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
new file mode 100644
index 00000000000..05d9f8f363a
--- /dev/null
+++ b/arch/i386/kernel/msr.c
@@ -0,0 +1,346 @@
+/* ----------------------------------------------------------------------- *
+ *   
+ *   Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * msr.c
+ *
+ * x86 MSR access device
+ *
+ * This device is accessed by lseek() to the appropriate register number
+ * and then read/write in chunks of 8 bytes.  A larger size means multiple
+ * reads or writes of the same register.
+ *
+ * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
+ * an SMP box will direct the access to CPU %d.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/major.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+static struct class_simple *msr_class;
+
+/* Note: "err" is handled in a funny way below.  Otherwise one version
+   of gcc or another breaks. */
+
+static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
+{
+	int err;
+
+	asm volatile ("1:	wrmsr\n"
+		      "2:\n"
+		      ".section .fixup,\"ax\"\n"
+		      "3:	movl %4,%0\n"
+		      "	jmp 2b\n"
+		      ".previous\n"
+		      ".section __ex_table,\"a\"\n"
+		      "	.align 4\n" "	.long 1b,3b\n" ".previous":"=&bDS" (err)
+		      :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0));
+
+	return err;
+}
+
+static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
+{
+	int err;
+
+	asm volatile ("1:	rdmsr\n"
+		      "2:\n"
+		      ".section .fixup,\"ax\"\n"
+		      "3:	movl %4,%0\n"
+		      "	jmp 2b\n"
+		      ".previous\n"
+		      ".section __ex_table,\"a\"\n"
+		      "	.align 4\n"
+		      "	.long 1b,3b\n"
+		      ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx)
+		      :"c"(reg), "i"(-EIO), "0"(0));
+
+	return err;
+}
+
+#ifdef CONFIG_SMP
+
+struct msr_command {
+	int cpu;
+	int err;
+	u32 reg;
+	u32 data[2];
+};
+
+static void msr_smp_wrmsr(void *cmd_block)
+{
+	struct msr_command *cmd = (struct msr_command *)cmd_block;
+
+	if (cmd->cpu == smp_processor_id())
+		cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
+}
+
+static void msr_smp_rdmsr(void *cmd_block)
+{
+	struct msr_command *cmd = (struct msr_command *)cmd_block;
+
+	if (cmd->cpu == smp_processor_id())
+		cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
+}
+
+static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
+{
+	struct msr_command cmd;
+	int ret;
+
+	preempt_disable();
+	if (cpu == smp_processor_id()) {
+		ret = wrmsr_eio(reg, eax, edx);
+	} else {
+		cmd.cpu = cpu;
+		cmd.reg = reg;
+		cmd.data[0] = eax;
+		cmd.data[1] = edx;
+
+		smp_call_function(msr_smp_wrmsr, &cmd, 1, 1);
+		ret = cmd.err;
+	}
+	preempt_enable();
+	return ret;
+}
+
+static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx)
+{
+	struct msr_command cmd;
+	int ret;
+
+	preempt_disable();
+	if (cpu == smp_processor_id()) {
+		ret = rdmsr_eio(reg, eax, edx);
+	} else {
+		cmd.cpu = cpu;
+		cmd.reg = reg;
+
+		smp_call_function(msr_smp_rdmsr, &cmd, 1, 1);
+
+		*eax = cmd.data[0];
+		*edx = cmd.data[1];
+
+		ret = cmd.err;
+	}
+	preempt_enable();
+	return ret;
+}
+
+#else				/* ! CONFIG_SMP */
+
+static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
+{
+	return wrmsr_eio(reg, eax, edx);
+}
+
+static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
+{
+	return rdmsr_eio(reg, eax, edx);
+}
+
+#endif				/* ! CONFIG_SMP */
+
+static loff_t msr_seek(struct file *file, loff_t offset, int orig)
+{
+	loff_t ret = -EINVAL;
+
+	lock_kernel();
+	switch (orig) {
+	case 0:
+		file->f_pos = offset;
+		ret = file->f_pos;
+		break;
+	case 1:
+		file->f_pos += offset;
+		ret = file->f_pos;
+	}
+	unlock_kernel();
+	return ret;
+}
+
+static ssize_t msr_read(struct file *file, char __user * buf,
+			size_t count, loff_t * ppos)
+{
+	u32 __user *tmp = (u32 __user *) buf;
+	u32 data[2];
+	size_t rv;
+	u32 reg = *ppos;
+	int cpu = iminor(file->f_dentry->d_inode);
+	int err;
+
+	if (count % 8)
+		return -EINVAL;	/* Invalid chunk size */
+
+	for (rv = 0; count; count -= 8) {
+		err = do_rdmsr(cpu, reg, &data[0], &data[1]);
+		if (err)
+			return err;
+		if (copy_to_user(tmp, &data, 8))
+			return -EFAULT;
+		tmp += 2;
+	}
+
+	return ((char __user *)tmp) - buf;
+}
+
+static ssize_t msr_write(struct file *file, const char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	const u32 __user *tmp = (const u32 __user *)buf;
+	u32 data[2];
+	size_t rv;
+	u32 reg = *ppos;
+	int cpu = iminor(file->f_dentry->d_inode);
+	int err;
+
+	if (count % 8)
+		return -EINVAL;	/* Invalid chunk size */
+
+	for (rv = 0; count; count -= 8) {
+		if (copy_from_user(&data, tmp, 8))
+			return -EFAULT;
+		err = do_wrmsr(cpu, reg, data[0], data[1]);
+		if (err)
+			return err;
+		tmp += 2;
+	}
+
+	return ((char __user *)tmp) - buf;
+}
+
+static int msr_open(struct inode *inode, struct file *file)
+{
+	unsigned int cpu = iminor(file->f_dentry->d_inode);
+	struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+
+	if (cpu >= NR_CPUS || !cpu_online(cpu))
+		return -ENXIO;	/* No such CPU */
+	if (!cpu_has(c, X86_FEATURE_MSR))
+		return -EIO;	/* MSR not supported */
+
+	return 0;
+}
+
+/*
+ * File operations we support
+ */
+static struct file_operations msr_fops = {
+	.owner = THIS_MODULE,
+	.llseek = msr_seek,
+	.read = msr_read,
+	.write = msr_write,
+	.open = msr_open,
+};
+
+static int msr_class_simple_device_add(int i)
+{
+	int err = 0;
+	struct class_device *class_err;
+
+	class_err = class_simple_device_add(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i);
+	if (IS_ERR(class_err)) 
+		err = PTR_ERR(class_err);
+	return err;
+}
+
+static int __devinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		msr_class_simple_device_add(cpu);
+		break;
+	case CPU_DEAD:
+		class_simple_device_remove(MKDEV(MSR_MAJOR, cpu));	
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block msr_class_cpu_notifier =
+{
+	.notifier_call = msr_class_cpu_callback,
+};
+
+static int __init msr_init(void)
+{
+	int i, err = 0;
+	i = 0;
+
+	if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) {
+		printk(KERN_ERR "msr: unable to get major %d for msr\n",
+		       MSR_MAJOR);
+		err = -EBUSY;
+		goto out;
+	}
+	msr_class = class_simple_create(THIS_MODULE, "msr");
+	if (IS_ERR(msr_class)) {
+		err = PTR_ERR(msr_class);
+		goto out_chrdev;
+	}
+	for_each_online_cpu(i) {
+		err = msr_class_simple_device_add(i);
+		if (err != 0)
+			goto out_class;
+	}
+	register_cpu_notifier(&msr_class_cpu_notifier);
+
+	err = 0;
+	goto out;
+
+out_class:
+	i = 0;
+	for_each_online_cpu(i)
+		class_simple_device_remove(MKDEV(MSR_MAJOR, i));
+	class_simple_destroy(msr_class);
+out_chrdev:
+	unregister_chrdev(MSR_MAJOR, "cpu/msr");
+out:
+	return err;
+}
+
+static void __exit msr_exit(void)
+{
+	int cpu = 0;
+	for_each_online_cpu(cpu)
+		class_simple_device_remove(MKDEV(MSR_MAJOR, cpu));
+	class_simple_destroy(msr_class);
+	unregister_chrdev(MSR_MAJOR, "cpu/msr");
+	unregister_cpu_notifier(&msr_class_cpu_notifier);
+}
+
+module_init(msr_init);
+module_exit(msr_exit)
+
+MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
+MODULE_DESCRIPTION("x86 generic MSR driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
new file mode 100644
index 00000000000..f5b0c5081bd
--- /dev/null
+++ b/arch/i386/kernel/nmi.c
@@ -0,0 +1,570 @@
+/*
+ *  linux/arch/i386/nmi.c
+ *
+ *  NMI watchdog support on APIC systems
+ *
+ *  Started by Ingo Molnar <mingo@redhat.com>
+ *
+ *  Fixes:
+ *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
+ *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
+ *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
+ *  Pavel Machek and
+ *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/sysdev.h>
+#include <linux/sysctl.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/nmi.h>
+
+#include "mach_traps.h"
+
+unsigned int nmi_watchdog = NMI_NONE;
+extern int unknown_nmi_panic;
+static unsigned int nmi_hz = HZ;
+static unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
+static unsigned int nmi_p4_cccr_val;
+extern void show_registers(struct pt_regs *regs);
+
+/*
+ * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
+ * - it may be reserved by some other driver, or not
+ * - when not reserved by some other driver, it may be used for
+ *   the NMI watchdog, or not
+ *
+ * This is maintained separately from nmi_active because the NMI
+ * watchdog may also be driven from the I/O APIC timer.
+ */
+static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
+static unsigned int lapic_nmi_owner;
+#define LAPIC_NMI_WATCHDOG	(1<<0)
+#define LAPIC_NMI_RESERVED	(1<<1)
+
+/* nmi_active:
+ * +1: the lapic NMI watchdog is active, but can be disabled
+ *  0: the lapic NMI watchdog has not been set up, and cannot
+ *     be enabled
+ * -1: the lapic NMI watchdog is disabled, but can be enabled
+ */
+int nmi_active;
+
+#define K7_EVNTSEL_ENABLE	(1 << 22)
+#define K7_EVNTSEL_INT		(1 << 20)
+#define K7_EVNTSEL_OS		(1 << 17)
+#define K7_EVNTSEL_USR		(1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
+#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+#define P6_EVNTSEL0_ENABLE	(1 << 22)
+#define P6_EVNTSEL_INT		(1 << 20)
+#define P6_EVNTSEL_OS		(1 << 17)
+#define P6_EVNTSEL_USR		(1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
+#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
+#define MSR_P4_MISC_ENABLE	0x1A0
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
+#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL	(1<<12)
+#define MSR_P4_PERFCTR0		0x300
+#define MSR_P4_CCCR0		0x360
+#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
+#define P4_ESCR_OS		(1<<3)
+#define P4_ESCR_USR		(1<<2)
+#define P4_CCCR_OVF_PMI0	(1<<26)
+#define P4_CCCR_OVF_PMI1	(1<<27)
+#define P4_CCCR_THRESHOLD(N)	((N)<<20)
+#define P4_CCCR_COMPLEMENT	(1<<19)
+#define P4_CCCR_COMPARE		(1<<18)
+#define P4_CCCR_REQUIRED	(3<<16)
+#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
+#define P4_CCCR_ENABLE		(1<<12)
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+   CRU_ESCR0 (with any non-null event selector) through a complemented
+   max threshold. [IA32-Vol3, Section 14.9.9] */
+#define MSR_P4_IQ_COUNTER0	0x30C
+#define P4_NMI_CRU_ESCR0	(P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
+#define P4_NMI_IQ_CCCR0	\
+	(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|	\
+	 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+
+int __init check_nmi_watchdog (void)
+{
+	unsigned int prev_nmi_count[NR_CPUS];
+	int cpu;
+
+	printk(KERN_INFO "testing NMI watchdog ... ");
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
+	local_irq_enable();
+	mdelay((10*1000)/nmi_hz); // wait 10 ticks
+
+	/* FIXME: Only boot CPU is online at this stage.  Check CPUs
+           as they come up. */
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+#ifdef CONFIG_SMP
+		/* Check cpu_callin_map here because that is set
+		   after the timer is started. */
+		if (!cpu_isset(cpu, cpu_callin_map))
+			continue;
+#endif
+		if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
+			printk("CPU#%d: NMI appears to be stuck!\n", cpu);
+			nmi_active = 0;
+			lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
+			return -1;
+		}
+	}
+	printk("OK.\n");
+
+	/* now that we know it works we can reduce NMI frequency to
+	   something more reasonable; makes a difference in some configs */
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		nmi_hz = 1;
+
+	return 0;
+}
+
+static int __init setup_nmi_watchdog(char *str)
+{
+	int nmi;
+
+	get_option(&str, &nmi);
+
+	if (nmi >= NMI_INVALID)
+		return 0;
+	if (nmi == NMI_NONE)
+		nmi_watchdog = nmi;
+	/*
+	 * If any other x86 CPU has a local APIC, then
+	 * please test the NMI stuff there and send me the
+	 * missing bits. Right now Intel P6/P4 and AMD K7 only.
+	 */
+	if ((nmi == NMI_LOCAL_APIC) &&
+			(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+			(boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
+		nmi_watchdog = nmi;
+	if ((nmi == NMI_LOCAL_APIC) &&
+			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
+	  		(boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
+		nmi_watchdog = nmi;
+	/*
+	 * We can enable the IO-APIC watchdog
+	 * unconditionally.
+	 */
+	if (nmi == NMI_IO_APIC) {
+		nmi_active = 1;
+		nmi_watchdog = nmi;
+	}
+	return 1;
+}
+
+__setup("nmi_watchdog=", setup_nmi_watchdog);
+
+static void disable_lapic_nmi_watchdog(void)
+{
+	if (nmi_active <= 0)
+		return;
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+		break;
+	case X86_VENDOR_INTEL:
+		switch (boot_cpu_data.x86) {
+		case 6:
+			if (boot_cpu_data.x86_model > 0xd)
+				break;
+
+			wrmsr(MSR_P6_EVNTSEL0, 0, 0);
+			break;
+		case 15:
+			if (boot_cpu_data.x86_model > 0x3)
+				break;
+
+			wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
+			wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+			break;
+		}
+		break;
+	}
+	nmi_active = -1;
+	/* tell do_nmi() and others that we're not active any more */
+	nmi_watchdog = 0;
+}
+
+static void enable_lapic_nmi_watchdog(void)
+{
+	if (nmi_active < 0) {
+		nmi_watchdog = NMI_LOCAL_APIC;
+		setup_apic_nmi_watchdog();
+	}
+}
+
+int reserve_lapic_nmi(void)
+{
+	unsigned int old_owner;
+
+	spin_lock(&lapic_nmi_owner_lock);
+	old_owner = lapic_nmi_owner;
+	lapic_nmi_owner |= LAPIC_NMI_RESERVED;
+	spin_unlock(&lapic_nmi_owner_lock);
+	if (old_owner & LAPIC_NMI_RESERVED)
+		return -EBUSY;
+	if (old_owner & LAPIC_NMI_WATCHDOG)
+		disable_lapic_nmi_watchdog();
+	return 0;
+}
+
+void release_lapic_nmi(void)
+{
+	unsigned int new_owner;
+
+	spin_lock(&lapic_nmi_owner_lock);
+	new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
+	lapic_nmi_owner = new_owner;
+	spin_unlock(&lapic_nmi_owner_lock);
+	if (new_owner & LAPIC_NMI_WATCHDOG)
+		enable_lapic_nmi_watchdog();
+}
+
+void disable_timer_nmi_watchdog(void)
+{
+	if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
+		return;
+
+	unset_nmi_callback();
+	nmi_active = -1;
+	nmi_watchdog = NMI_NONE;
+}
+
+void enable_timer_nmi_watchdog(void)
+{
+	if (nmi_active < 0) {
+		nmi_watchdog = NMI_IO_APIC;
+		touch_nmi_watchdog();
+		nmi_active = 1;
+	}
+}
+
+#ifdef CONFIG_PM
+
+static int nmi_pm_active; /* nmi_active before suspend */
+
+static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
+{
+	nmi_pm_active = nmi_active;
+	disable_lapic_nmi_watchdog();
+	return 0;
+}
+
+static int lapic_nmi_resume(struct sys_device *dev)
+{
+	if (nmi_pm_active > 0)
+		enable_lapic_nmi_watchdog();
+	return 0;
+}
+
+
+static struct sysdev_class nmi_sysclass = {
+	set_kset_name("lapic_nmi"),
+	.resume		= lapic_nmi_resume,
+	.suspend	= lapic_nmi_suspend,
+};
+
+static struct sys_device device_lapic_nmi = {
+	.id	= 0,
+	.cls	= &nmi_sysclass,
+};
+
+static int __init init_lapic_nmi_sysfs(void)
+{
+	int error;
+
+	if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
+		return 0;
+
+	error = sysdev_class_register(&nmi_sysclass);
+	if (!error)
+		error = sysdev_register(&device_lapic_nmi);
+	return error;
+}
+/* must come after the local APIC's device_initcall() */
+late_initcall(init_lapic_nmi_sysfs);
+
+#endif	/* CONFIG_PM */
+
+/*
+ * Activate the NMI watchdog via the local APIC.
+ * Original code written by Keith Owens.
+ */
+
+static void clear_msr_range(unsigned int base, unsigned int n)
+{
+	unsigned int i;
+
+	for(i = 0; i < n; ++i)
+		wrmsr(base+i, 0, 0);
+}
+
+static void setup_k7_watchdog(void)
+{
+	unsigned int evntsel;
+
+	nmi_perfctr_msr = MSR_K7_PERFCTR0;
+
+	clear_msr_range(MSR_K7_EVNTSEL0, 4);
+	clear_msr_range(MSR_K7_PERFCTR0, 4);
+
+	evntsel = K7_EVNTSEL_INT
+		| K7_EVNTSEL_OS
+		| K7_EVNTSEL_USR
+		| K7_NMI_EVENT;
+
+	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+	Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
+	wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= K7_EVNTSEL_ENABLE;
+	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+}
+
+static void setup_p6_watchdog(void)
+{
+	unsigned int evntsel;
+
+	nmi_perfctr_msr = MSR_P6_PERFCTR0;
+
+	clear_msr_range(MSR_P6_EVNTSEL0, 2);
+	clear_msr_range(MSR_P6_PERFCTR0, 2);
+
+	evntsel = P6_EVNTSEL_INT
+		| P6_EVNTSEL_OS
+		| P6_EVNTSEL_USR
+		| P6_NMI_EVENT;
+
+	wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+	Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
+	wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= P6_EVNTSEL0_ENABLE;
+	wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+}
+
+static int setup_p4_watchdog(void)
+{
+	unsigned int misc_enable, dummy;
+
+	rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
+	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
+		return 0;
+
+	nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
+	nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
+#ifdef CONFIG_SMP
+	if (smp_num_siblings == 2)
+		nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
+#endif
+
+	if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
+		clear_msr_range(0x3F1, 2);
+	/* MSR 0x3F0 seems to have a default value of 0xFC00, but current
+	   docs doesn't fully define it, so leave it alone for now. */
+	if (boot_cpu_data.x86_model >= 0x3) {
+		/* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
+		clear_msr_range(0x3A0, 26);
+		clear_msr_range(0x3BC, 3);
+	} else {
+		clear_msr_range(0x3A0, 31);
+	}
+	clear_msr_range(0x3C0, 6);
+	clear_msr_range(0x3C8, 6);
+	clear_msr_range(0x3E0, 2);
+	clear_msr_range(MSR_P4_CCCR0, 18);
+	clear_msr_range(MSR_P4_PERFCTR0, 18);
+
+	wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
+	wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
+	Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
+	wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
+	return 1;
+}
+
+void setup_apic_nmi_watchdog (void)
+{
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
+			return;
+		setup_k7_watchdog();
+		break;
+	case X86_VENDOR_INTEL:
+		switch (boot_cpu_data.x86) {
+		case 6:
+			if (boot_cpu_data.x86_model > 0xd)
+				return;
+
+			setup_p6_watchdog();
+			break;
+		case 15:
+			if (boot_cpu_data.x86_model > 0x3)
+				return;
+
+			if (!setup_p4_watchdog())
+				return;
+			break;
+		default:
+			return;
+		}
+		break;
+	default:
+		return;
+	}
+	lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
+	nmi_active = 1;
+}
+
+/*
+ * the best way to detect whether a CPU has a 'hard lockup' problem
+ * is to check it's local APIC timer IRQ counts. If they are not
+ * changing then that CPU has some problem.
+ *
+ * as these watchdog NMI IRQs are generated on every CPU, we only
+ * have to check the current processor.
+ *
+ * since NMIs don't listen to _any_ locks, we have to be extremely
+ * careful not to rely on unsafe variables. The printk might lock
+ * up though, so we have to break up any console locks first ...
+ * [when there will be more tty-related locks, break them up
+ *  here too!]
+ */
+
+static unsigned int
+	last_irq_sums [NR_CPUS],
+	alert_counter [NR_CPUS];
+
+void touch_nmi_watchdog (void)
+{
+	int i;
+
+	/*
+	 * Just reset the alert counters, (other CPUs might be
+	 * spinning on locks we hold):
+	 */
+	for (i = 0; i < NR_CPUS; i++)
+		alert_counter[i] = 0;
+}
+
+extern void die_nmi(struct pt_regs *, const char *msg);
+
+void nmi_watchdog_tick (struct pt_regs * regs)
+{
+
+	/*
+	 * Since current_thread_info()-> is always on the stack, and we
+	 * always switch the stack NMI-atomically, it's safe to use
+	 * smp_processor_id().
+	 */
+	int sum, cpu = smp_processor_id();
+
+	sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
+
+	if (last_irq_sums[cpu] == sum) {
+		/*
+		 * Ayiee, looks like this CPU is stuck ...
+		 * wait a few IRQs (5 seconds) before doing the oops ...
+		 */
+		alert_counter[cpu]++;
+		if (alert_counter[cpu] == 5*nmi_hz)
+			die_nmi(regs, "NMI Watchdog detected LOCKUP");
+	} else {
+		last_irq_sums[cpu] = sum;
+		alert_counter[cpu] = 0;
+	}
+	if (nmi_perfctr_msr) {
+		if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
+			/*
+			 * P4 quirks:
+			 * - An overflown perfctr will assert its interrupt
+			 *   until the OVF flag in its CCCR is cleared.
+			 * - LVTPC is masked on interrupt and must be
+			 *   unmasked by the LVTPC handler.
+			 */
+			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
+			apic_write(APIC_LVTPC, APIC_DM_NMI);
+		}
+		else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
+			/* Only P6 based Pentium M need to re-unmask
+			 * the apic vector but it doesn't hurt
+			 * other P6 variant */
+			apic_write(APIC_LVTPC, APIC_DM_NMI);
+		}
+		wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+	}
+}
+
+#ifdef CONFIG_SYSCTL
+
+static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
+{
+	unsigned char reason = get_nmi_reason();
+	char buf[64];
+
+	if (!(reason & 0xc0)) {
+		sprintf(buf, "NMI received for unknown reason %02x\n", reason);
+		die_nmi(regs, buf);
+	}
+	return 0;
+}
+
+/*
+ * proc handler for /proc/sys/kernel/unknown_nmi_panic
+ */
+int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
+			void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int old_state;
+
+	old_state = unknown_nmi_panic;
+	proc_dointvec(table, write, file, buffer, length, ppos);
+	if (!!old_state == !!unknown_nmi_panic)
+		return 0;
+
+	if (unknown_nmi_panic) {
+		if (reserve_lapic_nmi() < 0) {
+			unknown_nmi_panic = 0;
+			return -EBUSY;
+		} else {
+			set_nmi_callback(unknown_nmi_panic_callback);
+		}
+	} else {
+		release_lapic_nmi();
+		unset_nmi_callback();
+	}
+	return 0;
+}
+
+#endif
+
+EXPORT_SYMBOL(nmi_active);
+EXPORT_SYMBOL(nmi_watchdog);
+EXPORT_SYMBOL(reserve_lapic_nmi);
+EXPORT_SYMBOL(release_lapic_nmi);
+EXPORT_SYMBOL(disable_timer_nmi_watchdog);
+EXPORT_SYMBOL(enable_timer_nmi_watchdog);
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
new file mode 100644
index 00000000000..e51edf0a656
--- /dev/null
+++ b/arch/i386/kernel/numaq.c
@@ -0,0 +1,79 @@
+/*
+ * Written by: Patricia Gaughen, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <gone@us.ibm.com>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <asm/numaq.h>
+#include <asm/topology.h>
+
+#define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
+
+/*
+ * Function: smp_dump_qct()
+ *
+ * Description: gets memory layout from the quad config table.  This
+ * function also updates node_online_map with the nodes (quads) present.
+ */
+static void __init smp_dump_qct(void)
+{
+	int node;
+	struct eachquadmem *eq;
+	struct sys_cfg_data *scd =
+		(struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR);
+
+	nodes_clear(node_online_map);
+	for_each_node(node) {
+		if (scd->quads_present31_0 & (1 << node)) {
+			node_set_online(node);
+			eq = &scd->eq[node];
+			/* Convert to pages */
+			node_start_pfn[node] = MB_TO_PAGES(
+				eq->hi_shrd_mem_start - eq->priv_mem_size);
+			node_end_pfn[node] = MB_TO_PAGES(
+				eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
+
+			memory_present(node,
+				node_start_pfn[node], node_end_pfn[node]);
+			node_remap_size[node] = node_memmap_size_bytes(node,
+							node_start_pfn[node],
+							node_end_pfn[node]);
+		}
+	}
+}
+
+/*
+ * Unlike Summit, we don't really care to let the NUMA-Q
+ * fall back to flat mode.  Don't compile for NUMA-Q
+ * unless you really need it!
+ */
+int __init get_memcfg_numaq(void)
+{
+	smp_dump_qct();
+	return 1;
+}
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
new file mode 100644
index 00000000000..4de2e03c7b4
--- /dev/null
+++ b/arch/i386/kernel/pci-dma.c
@@ -0,0 +1,147 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * On i386 there is no hardware dynamic DMA address translation,
+ * so consistent alloc/free are merely page allocation/freeing.
+ * The rest of the dynamic DMA mapping interface is implemented
+ * in asm/pci.h.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+
+struct dma_coherent_mem {
+	void		*virt_base;
+	u32		device_base;
+	int		size;
+	int		flags;
+	unsigned long	*bitmap;
+};
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, unsigned int __nocast gfp)
+{
+	void *ret;
+	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+	int order = get_order(size);
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+	if (mem) {
+		int page = bitmap_find_free_region(mem->bitmap, mem->size,
+						     order);
+		if (page >= 0) {
+			*dma_handle = mem->device_base + (page << PAGE_SHIFT);
+			ret = mem->virt_base + (page << PAGE_SHIFT);
+			memset(ret, 0, size);
+			return ret;
+		}
+		if (mem->flags & DMA_MEMORY_EXCLUSIVE)
+			return NULL;
+	}
+
+	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+		gfp |= GFP_DMA;
+
+	ret = (void *)__get_free_pages(gfp, order);
+
+	if (ret != NULL) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_phys(ret);
+	}
+	return ret;
+}
+
+void dma_free_coherent(struct device *dev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle)
+{
+	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+	int order = get_order(size);
+	
+	if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
+		int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+
+		bitmap_release_region(mem->bitmap, page, order);
+	} else
+		free_pages((unsigned long)vaddr, order);
+}
+
+int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+				dma_addr_t device_addr, size_t size, int flags)
+{
+	void __iomem *mem_base;
+	int pages = size >> PAGE_SHIFT;
+	int bitmap_size = (pages + 31)/32;
+
+	if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
+		goto out;
+	if (!size)
+		goto out;
+	if (dev->dma_mem)
+		goto out;
+
+	/* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
+
+	mem_base = ioremap(bus_addr, size);
+	if (!mem_base)
+		goto out;
+
+	dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
+	if (!dev->dma_mem)
+		goto out;
+	memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
+	dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
+	if (!dev->dma_mem->bitmap)
+		goto free1_out;
+	memset(dev->dma_mem->bitmap, 0, bitmap_size);
+
+	dev->dma_mem->virt_base = mem_base;
+	dev->dma_mem->device_base = device_addr;
+	dev->dma_mem->size = pages;
+	dev->dma_mem->flags = flags;
+
+	if (flags & DMA_MEMORY_MAP)
+		return DMA_MEMORY_MAP;
+
+	return DMA_MEMORY_IO;
+
+ free1_out:
+	kfree(dev->dma_mem->bitmap);
+ out:
+	return 0;
+}
+EXPORT_SYMBOL(dma_declare_coherent_memory);
+
+void dma_release_declared_memory(struct device *dev)
+{
+	struct dma_coherent_mem *mem = dev->dma_mem;
+	
+	if(!mem)
+		return;
+	dev->dma_mem = NULL;
+	iounmap(mem->virt_base);
+	kfree(mem->bitmap);
+	kfree(mem);
+}
+EXPORT_SYMBOL(dma_release_declared_memory);
+
+void *dma_mark_declared_memory_occupied(struct device *dev,
+					dma_addr_t device_addr, size_t size)
+{
+	struct dma_coherent_mem *mem = dev->dma_mem;
+	int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	int pos, err;
+
+	if (!mem)
+		return ERR_PTR(-EINVAL);
+
+	pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
+	err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
+	if (err != 0)
+		return ERR_PTR(err);
+	return mem->virt_base + (pos << PAGE_SHIFT);
+}
+EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
new file mode 100644
index 00000000000..c36fedf40e9
--- /dev/null
+++ b/arch/i386/kernel/process.c
@@ -0,0 +1,848 @@
+/*
+ *  linux/arch/i386/kernel/process.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <stdarg.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/config.h>
+#include <linux/utsname.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/mc146818rtc.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/irq.h>
+#include <asm/desc.h>
+#ifdef CONFIG_MATH_EMULATION
+#include <asm/math_emu.h>
+#endif
+
+#include <linux/irq.h>
+#include <linux/err.h>
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+static int hlt_counter;
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+	return ((unsigned long *)tsk->thread.esp)[3];
+}
+
+/*
+ * Powermanagement idle function, if any..
+ */
+void (*pm_idle)(void);
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
+
+void disable_hlt(void)
+{
+	hlt_counter++;
+}
+
+EXPORT_SYMBOL(disable_hlt);
+
+void enable_hlt(void)
+{
+	hlt_counter--;
+}
+
+EXPORT_SYMBOL(enable_hlt);
+
+/*
+ * We use this if we don't have any better
+ * idle routine..
+ */
+void default_idle(void)
+{
+	if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
+		local_irq_disable();
+		if (!need_resched())
+			safe_halt();
+		else
+			local_irq_enable();
+	} else {
+		cpu_relax();
+	}
+}
+
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->work.need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle (void)
+{
+	int oldval;
+
+	local_irq_enable();
+
+	/*
+	 * Deal with another CPU just having chosen a thread to
+	 * run here:
+	 */
+	oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
+
+	if (!oldval) {
+		set_thread_flag(TIF_POLLING_NRFLAG);
+		asm volatile(
+			"2:"
+			"testl %0, %1;"
+			"rep; nop;"
+			"je 2b;"
+			: : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
+
+		clear_thread_flag(TIF_POLLING_NRFLAG);
+	} else {
+		set_need_resched();
+	}
+}
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+	/* endless idle loop with no priority at all */
+	while (1) {
+		while (!need_resched()) {
+			void (*idle)(void);
+
+			if (__get_cpu_var(cpu_idle_state))
+				__get_cpu_var(cpu_idle_state) = 0;
+
+			rmb();
+			idle = pm_idle;
+
+			if (!idle)
+				idle = default_idle;
+
+			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+			idle();
+		}
+		schedule();
+	}
+}
+
+void cpu_idle_wait(void)
+{
+	unsigned int cpu, this_cpu = get_cpu();
+	cpumask_t map;
+
+	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+	put_cpu();
+
+	cpus_clear(map);
+	for_each_online_cpu(cpu) {
+		per_cpu(cpu_idle_state, cpu) = 1;
+		cpu_set(cpu, map);
+	}
+
+	__get_cpu_var(cpu_idle_state) = 0;
+
+	wmb();
+	do {
+		ssleep(1);
+		for_each_online_cpu(cpu) {
+			if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+				cpu_clear(cpu, map);
+		}
+		cpus_and(map, map, cpu_online_map);
+	} while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ */
+static void mwait_idle(void)
+{
+	local_irq_enable();
+
+	if (!need_resched()) {
+		set_thread_flag(TIF_POLLING_NRFLAG);
+		do {
+			__monitor((void *)&current_thread_info()->flags, 0, 0);
+			if (need_resched())
+				break;
+			__mwait(0, 0);
+		} while (!need_resched());
+		clear_thread_flag(TIF_POLLING_NRFLAG);
+	}
+}
+
+void __init select_idle_routine(const struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_MWAIT)) {
+		printk("monitor/mwait feature present.\n");
+		/*
+		 * Skip, if setup has overridden idle.
+		 * One CPU supports mwait => All CPUs supports mwait
+		 */
+		if (!pm_idle) {
+			printk("using mwait in idle threads.\n");
+			pm_idle = mwait_idle;
+		}
+	}
+}
+
+static int __init idle_setup (char *str)
+{
+	if (!strncmp(str, "poll", 4)) {
+		printk("using polling idle threads.\n");
+		pm_idle = poll_idle;
+#ifdef CONFIG_X86_SMP
+		if (smp_num_siblings > 1)
+			printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
+#endif
+	} else if (!strncmp(str, "halt", 4)) {
+		printk("using halt in idle threads.\n");
+		pm_idle = default_idle;
+	}
+
+	boot_option_idle_override = 1;
+	return 1;
+}
+
+__setup("idle=", idle_setup);
+
+void show_regs(struct pt_regs * regs)
+{
+	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+
+	printk("\n");
+	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
+	print_symbol("EIP is at %s\n", regs->eip);
+
+	if (regs->xcs & 3)
+		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+	printk(" EFLAGS: %08lx    %s  (%s)\n",
+	       regs->eflags, print_tainted(), system_utsname.release);
+	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+		regs->eax,regs->ebx,regs->ecx,regs->edx);
+	printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+		regs->esi, regs->edi, regs->ebp);
+	printk(" DS: %04x ES: %04x\n",
+		0xffff & regs->xds,0xffff & regs->xes);
+
+	__asm__("movl %%cr0, %0": "=r" (cr0));
+	__asm__("movl %%cr2, %0": "=r" (cr2));
+	__asm__("movl %%cr3, %0": "=r" (cr3));
+	/* This could fault if %cr4 does not exist */
+	__asm__("1: movl %%cr4, %0		\n"
+		"2:				\n"
+		".section __ex_table,\"a\"	\n"
+		".long 1b,2b			\n"
+		".previous			\n"
+		: "=r" (cr4): "0" (0));
+	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+	show_trace(NULL, &regs->esp);
+}
+
+/*
+ * This gets run with %ebx containing the
+ * function to call, and %edx containing
+ * the "args".
+ */
+extern void kernel_thread_helper(void);
+__asm__(".section .text\n"
+	".align 4\n"
+	"kernel_thread_helper:\n\t"
+	"movl %edx,%eax\n\t"
+	"pushl %edx\n\t"
+	"call *%ebx\n\t"
+	"pushl %eax\n\t"
+	"call do_exit\n"
+	".previous");
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.ebx = (unsigned long) fn;
+	regs.edx = (unsigned long) arg;
+
+	regs.xds = __USER_DS;
+	regs.xes = __USER_DS;
+	regs.orig_eax = -1;
+	regs.eip = (unsigned long) kernel_thread_helper;
+	regs.xcs = __KERNEL_CS;
+	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+
+	/* Ok, create the new process.. */
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+	struct task_struct *tsk = current;
+	struct thread_struct *t = &tsk->thread;
+
+	/* The process may have allocated an io port bitmap... nuke it. */
+	if (unlikely(NULL != t->io_bitmap_ptr)) {
+		int cpu = get_cpu();
+		struct tss_struct *tss = &per_cpu(init_tss, cpu);
+
+		kfree(t->io_bitmap_ptr);
+		t->io_bitmap_ptr = NULL;
+		/*
+		 * Careful, clear this in the TSS too:
+		 */
+		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
+		t->io_bitmap_max = 0;
+		tss->io_bitmap_owner = NULL;
+		tss->io_bitmap_max = 0;
+		tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+		put_cpu();
+	}
+}
+
+void flush_thread(void)
+{
+	struct task_struct *tsk = current;
+
+	memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
+	/*
+	 * Forget coprocessor state..
+	 */
+	clear_fpu(tsk);
+	clear_used_math();
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+	if (dead_task->mm) {
+		// temporary debugging check
+		if (dead_task->mm->context.size) {
+			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+					dead_task->comm,
+					dead_task->mm->context.ldt,
+					dead_task->mm->context.size);
+			BUG();
+		}
+	}
+
+	release_vm86_irqs(dead_task);
+}
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+	unlazy_fpu(tsk);
+}
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+	unsigned long unused,
+	struct task_struct * p, struct pt_regs * regs)
+{
+	struct pt_regs * childregs;
+	struct task_struct *tsk;
+	int err;
+
+	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+	*childregs = *regs;
+	childregs->eax = 0;
+	childregs->esp = esp;
+
+	p->thread.esp = (unsigned long) childregs;
+	p->thread.esp0 = (unsigned long) (childregs+1);
+
+	p->thread.eip = (unsigned long) ret_from_fork;
+
+	savesegment(fs,p->thread.fs);
+	savesegment(gs,p->thread.gs);
+
+	tsk = current;
+	if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
+		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+		if (!p->thread.io_bitmap_ptr) {
+			p->thread.io_bitmap_max = 0;
+			return -ENOMEM;
+		}
+		memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
+			IO_BITMAP_BYTES);
+	}
+
+	/*
+	 * Set a new TLS for the child thread?
+	 */
+	if (clone_flags & CLONE_SETTLS) {
+		struct desc_struct *desc;
+		struct user_desc info;
+		int idx;
+
+		err = -EFAULT;
+		if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
+			goto out;
+		err = -EINVAL;
+		if (LDT_empty(&info))
+			goto out;
+
+		idx = info.entry_number;
+		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+			goto out;
+
+		desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+
+	err = 0;
+ out:
+	if (err && p->thread.io_bitmap_ptr) {
+		kfree(p->thread.io_bitmap_ptr);
+		p->thread.io_bitmap_max = 0;
+	}
+	return err;
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+	int i;
+
+/* changed the size calculations - should hopefully work better. lbt */
+	dump->magic = CMAGIC;
+	dump->start_code = 0;
+	dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+	dump->u_dsize -= dump->u_tsize;
+	dump->u_ssize = 0;
+	for (i = 0; i < 8; i++)
+		dump->u_debugreg[i] = current->thread.debugreg[i];  
+
+	if (dump->start_stack < TASK_SIZE)
+		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
+
+	dump->regs.ebx = regs->ebx;
+	dump->regs.ecx = regs->ecx;
+	dump->regs.edx = regs->edx;
+	dump->regs.esi = regs->esi;
+	dump->regs.edi = regs->edi;
+	dump->regs.ebp = regs->ebp;
+	dump->regs.eax = regs->eax;
+	dump->regs.ds = regs->xds;
+	dump->regs.es = regs->xes;
+	savesegment(fs,dump->regs.fs);
+	savesegment(gs,dump->regs.gs);
+	dump->regs.orig_eax = regs->orig_eax;
+	dump->regs.eip = regs->eip;
+	dump->regs.cs = regs->xcs;
+	dump->regs.eflags = regs->eflags;
+	dump->regs.esp = regs->esp;
+	dump->regs.ss = regs->xss;
+
+	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+}
+
+/* 
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+	struct pt_regs ptregs;
+	
+	ptregs = *(struct pt_regs *)
+		((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
+	ptregs.xcs &= 0xffff;
+	ptregs.xds &= 0xffff;
+	ptregs.xes &= 0xffff;
+	ptregs.xss &= 0xffff;
+
+	elf_core_copy_regs(regs, &ptregs);
+
+	return 1;
+}
+
+static inline void
+handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
+{
+	if (!next->io_bitmap_ptr) {
+		/*
+		 * Disable the bitmap via an invalid offset. We still cache
+		 * the previous bitmap owner and the IO bitmap contents:
+		 */
+		tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+		return;
+	}
+	if (likely(next == tss->io_bitmap_owner)) {
+		/*
+		 * Previous owner of the bitmap (hence the bitmap content)
+		 * matches the next task, we dont have to do anything but
+		 * to set a valid offset in the TSS:
+		 */
+		tss->io_bitmap_base = IO_BITMAP_OFFSET;
+		return;
+	}
+	/*
+	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
+	 * and we let the task to get a GPF in case an I/O instruction
+	 * is performed.  The handler of the GPF will verify that the
+	 * faulting task has a valid I/O bitmap and, it true, does the
+	 * real copy and restart the instruction.  This will save us
+	 * redundant copies when the currently switched task does not
+	 * perform any I/O during its timeslice.
+	 */
+	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+}
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+		__asm__("movl %0,%%db" #register  \
+			: /* no output */ \
+			:"r" (thread->debugreg[register]))
+
+/*
+ *	switch_to(x,yn) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. Lazy FP saving no longer makes any sense
+ * with modern CPU's, and this simplifies a lot of things (SMP
+ * and UP become the same).
+ *
+ * NOTE! We used to use the x86 hardware context switching. The
+ * reason for not using it any more becomes apparent when you
+ * try to recover gracefully from saved state that is no longer
+ * valid (stale segment register values in particular). With the
+ * hardware task-switch, there is no way to fix up bad state in
+ * a reasonable manner.
+ *
+ * The fact that Intel documents the hardware task-switching to
+ * be slow is a fairly red herring - this code is not noticeably
+ * faster. However, there _is_ some room for improvement here,
+ * so the performance issues may eventually be a valid point.
+ * More important, however, is the fact that this allows us much
+ * more flexibility.
+ *
+ * The return value (in %eax) will be the "prev" task after
+ * the task-switch, and shows up in ret_from_fork in entry.S,
+ * for example.
+ */
+struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+	struct thread_struct *prev = &prev_p->thread,
+				 *next = &next_p->thread;
+	int cpu = smp_processor_id();
+	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+
+	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+
+	__unlazy_fpu(prev_p);
+
+	/*
+	 * Reload esp0, LDT and the page table pointer:
+	 */
+	load_esp0(tss, next);
+
+	/*
+	 * Load the per-thread Thread-Local Storage descriptor.
+	 */
+	load_TLS(next, cpu);
+
+	/*
+	 * Save away %fs and %gs. No need to save %es and %ds, as
+	 * those are always kernel segments while inside the kernel.
+	 */
+	asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
+	asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
+
+	/*
+	 * Restore %fs and %gs if needed.
+	 */
+	if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
+		loadsegment(fs, next->fs);
+		loadsegment(gs, next->gs);
+	}
+
+	/*
+	 * Now maybe reload the debug registers
+	 */
+	if (unlikely(next->debugreg[7])) {
+		loaddebug(next, 0);
+		loaddebug(next, 1);
+		loaddebug(next, 2);
+		loaddebug(next, 3);
+		/* no 4 and 5 */
+		loaddebug(next, 6);
+		loaddebug(next, 7);
+	}
+
+	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
+		handle_io_bitmap(next, tss);
+
+	return prev_p;
+}
+
+asmlinkage int sys_fork(struct pt_regs regs)
+{
+	return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+}
+
+asmlinkage int sys_clone(struct pt_regs regs)
+{
+	unsigned long clone_flags;
+	unsigned long newsp;
+	int __user *parent_tidptr, *child_tidptr;
+
+	clone_flags = regs.ebx;
+	newsp = regs.ecx;
+	parent_tidptr = (int __user *)regs.edx;
+	child_tidptr = (int __user *)regs.edi;
+	if (!newsp)
+		newsp = regs.esp;
+	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys_vfork(struct pt_regs regs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(struct pt_regs regs)
+{
+	int error;
+	char * filename;
+
+	filename = getname((char __user *) regs.ebx);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+	error = do_execve(filename,
+			(char __user * __user *) regs.ecx,
+			(char __user * __user *) regs.edx,
+			&regs);
+	if (error == 0) {
+		task_lock(current);
+		current->ptrace &= ~PT_DTRACE;
+		task_unlock(current);
+		/* Make sure we don't return using sysenter.. */
+		set_thread_flag(TIF_IRET);
+	}
+	putname(filename);
+out:
+	return error;
+}
+
+#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
+#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long ebp, esp, eip;
+	unsigned long stack_page;
+	int count = 0;
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+	stack_page = (unsigned long)p->thread_info;
+	esp = p->thread.esp;
+	if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
+		return 0;
+	/* include/asm-i386/system.h:switch_to() pushes ebp last. */
+	ebp = *(unsigned long *) esp;
+	do {
+		if (ebp < stack_page || ebp > top_ebp+stack_page)
+			return 0;
+		eip = *(unsigned long *) (ebp+4);
+		if (!in_sched_functions(eip))
+			return eip;
+		ebp = *(unsigned long *) ebp;
+	} while (count++ < 16);
+	return 0;
+}
+
+/*
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
+ */
+static int get_free_idx(void)
+{
+	struct thread_struct *t = &current->thread;
+	int idx;
+
+	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+		if (desc_empty(t->tls_array + idx))
+			return idx + GDT_ENTRY_TLS_MIN;
+	return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
+{
+	struct thread_struct *t = &current->thread;
+	struct user_desc info;
+	struct desc_struct *desc;
+	int cpu, idx;
+
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		return -EFAULT;
+	idx = info.entry_number;
+
+	/*
+	 * index -1 means the kernel should try to find and
+	 * allocate an empty descriptor:
+	 */
+	if (idx == -1) {
+		idx = get_free_idx();
+		if (idx < 0)
+			return idx;
+		if (put_user(idx, &u_info->entry_number))
+			return -EFAULT;
+	}
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+	/*
+	 * We must not get preempted while modifying the TLS.
+	 */
+	cpu = get_cpu();
+
+	if (LDT_empty(&info)) {
+		desc->a = 0;
+		desc->b = 0;
+	} else {
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+	load_TLS(t, cpu);
+
+	put_cpu();
+
+	return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+	(((desc)->a >> 16) & 0x0000ffff) | \
+	(((desc)->b << 16) & 0x00ff0000) | \
+	( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+	((desc)->a & 0x0ffff) | \
+	 ((desc)->b & 0xf0000) )
+	
+#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
+#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
+{
+	struct user_desc info;
+	struct desc_struct *desc;
+	int idx;
+
+	if (get_user(idx, &u_info->entry_number))
+		return -EFAULT;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+	info.entry_number = idx;
+	info.base_addr = GET_BASE(desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+
+	if (copy_to_user(u_info, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (randomize_va_space)
+		sp -= get_random_int() % 8192;
+	return sp & ~0xf;
+}
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
new file mode 100644
index 00000000000..b2f17640cef
--- /dev/null
+++ b/arch/i386/kernel/ptrace.c
@@ -0,0 +1,717 @@
+/* ptrace.c */
+/* By Ross Biro 1/23/92 */
+/*
+ * Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/seccomp.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/debugreg.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x00044dd5
+
+/* set's the trap flag. */
+#define TRAP_FLAG 0x100
+
+/*
+ * Offset of eflags on child stack..
+ */
+#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs))
+
+static inline struct pt_regs *get_child_regs(struct task_struct *task)
+{
+	void *stack_top = (void *)task->thread.esp0;
+	return stack_top - sizeof(struct pt_regs);
+}
+
+/*
+ * this routine will get a word off of the processes privileged stack. 
+ * the offset is how far from the base addr as stored in the TSS.  
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */   
+static inline int get_stack_long(struct task_struct *task, int offset)
+{
+	unsigned char *stack;
+
+	stack = (unsigned char *)task->thread.esp0;
+	stack += offset;
+	return (*((int *)stack));
+}
+
+/*
+ * this routine will put a word on the processes privileged stack. 
+ * the offset is how far from the base addr as stored in the TSS.  
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int put_stack_long(struct task_struct *task, int offset,
+	unsigned long data)
+{
+	unsigned char * stack;
+
+	stack = (unsigned char *) task->thread.esp0;
+	stack += offset;
+	*(unsigned long *) stack = data;
+	return 0;
+}
+
+static int putreg(struct task_struct *child,
+	unsigned long regno, unsigned long value)
+{
+	switch (regno >> 2) {
+		case FS:
+			if (value && (value & 3) != 3)
+				return -EIO;
+			child->thread.fs = value;
+			return 0;
+		case GS:
+			if (value && (value & 3) != 3)
+				return -EIO;
+			child->thread.gs = value;
+			return 0;
+		case DS:
+		case ES:
+			if (value && (value & 3) != 3)
+				return -EIO;
+			value &= 0xffff;
+			break;
+		case SS:
+		case CS:
+			if ((value & 3) != 3)
+				return -EIO;
+			value &= 0xffff;
+			break;
+		case EFL:
+			value &= FLAG_MASK;
+			value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
+			break;
+	}
+	if (regno > GS*4)
+		regno -= 2*4;
+	put_stack_long(child, regno - sizeof(struct pt_regs), value);
+	return 0;
+}
+
+static unsigned long getreg(struct task_struct *child,
+	unsigned long regno)
+{
+	unsigned long retval = ~0UL;
+
+	switch (regno >> 2) {
+		case FS:
+			retval = child->thread.fs;
+			break;
+		case GS:
+			retval = child->thread.gs;
+			break;
+		case DS:
+		case ES:
+		case SS:
+		case CS:
+			retval = 0xffff;
+			/* fall through */
+		default:
+			if (regno > GS*4)
+				regno -= 2*4;
+			regno = regno - sizeof(struct pt_regs);
+			retval &= get_stack_long(child, regno);
+	}
+	return retval;
+}
+
+#define LDT_SEGMENT 4
+
+static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs)
+{
+	unsigned long addr, seg;
+
+	addr = regs->eip;
+	seg = regs->xcs & 0xffff;
+	if (regs->eflags & VM_MASK) {
+		addr = (addr & 0xffff) + (seg << 4);
+		return addr;
+	}
+
+	/*
+	 * We'll assume that the code segments in the GDT
+	 * are all zero-based. That is largely true: the
+	 * TLS segments are used for data, and the PNPBIOS
+	 * and APM bios ones we just ignore here.
+	 */
+	if (seg & LDT_SEGMENT) {
+		u32 *desc;
+		unsigned long base;
+
+		down(&child->mm->context.sem);
+		desc = child->mm->context.ldt + (seg & ~7);
+		base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000);
+
+		/* 16-bit code segment? */
+		if (!((desc[1] >> 22) & 1))
+			addr &= 0xffff;
+		addr += base;
+		up(&child->mm->context.sem);
+	}
+	return addr;
+}
+
+static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs)
+{
+	int i, copied;
+	unsigned char opcode[16];
+	unsigned long addr = convert_eip_to_linear(child, regs);
+
+	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
+	for (i = 0; i < copied; i++) {
+		switch (opcode[i]) {
+		/* popf */
+		case 0x9d:
+			return 1;
+		/* opcode and address size prefixes */
+		case 0x66: case 0x67:
+			continue;
+		/* irrelevant prefixes (segment overrides and repeats) */
+		case 0x26: case 0x2e:
+		case 0x36: case 0x3e:
+		case 0x64: case 0x65:
+		case 0xf0: case 0xf2: case 0xf3:
+			continue;
+
+		/*
+		 * pushf: NOTE! We should probably not let
+		 * the user see the TF bit being set. But
+		 * it's more pain than it's worth to avoid
+		 * it, and a debugger could emulate this
+		 * all in user space if it _really_ cares.
+		 */
+		case 0x9c:
+		default:
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static void set_singlestep(struct task_struct *child)
+{
+	struct pt_regs *regs = get_child_regs(child);
+
+	/*
+	 * Always set TIF_SINGLESTEP - this guarantees that 
+	 * we single-step system calls etc..  This will also
+	 * cause us to set TF when returning to user mode.
+	 */
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+	/*
+	 * If TF was already set, don't do anything else
+	 */
+	if (regs->eflags & TRAP_FLAG)
+		return;
+
+	/* Set TF on the kernel stack.. */
+	regs->eflags |= TRAP_FLAG;
+
+	/*
+	 * ..but if TF is changed by the instruction we will trace,
+	 * don't mark it as being "us" that set it, so that we
+	 * won't clear it by hand later.
+	 */
+	if (is_at_popf(child, regs))
+		return;
+	
+	child->ptrace |= PT_DTRACE;
+}
+
+static void clear_singlestep(struct task_struct *child)
+{
+	/* Always clear TIF_SINGLESTEP... */
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+	/* But touch TF only if it was set by us.. */
+	if (child->ptrace & PT_DTRACE) {
+		struct pt_regs *regs = get_child_regs(child);
+		regs->eflags &= ~TRAP_FLAG;
+		child->ptrace &= ~PT_DTRACE;
+	}
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{ 
+	clear_singlestep(child);
+}
+
+/*
+ * Perform get_thread_area on behalf of the traced child.
+ */
+static int
+ptrace_get_thread_area(struct task_struct *child,
+		       int idx, struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+	struct desc_struct *desc;
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+	(((desc)->a >> 16) & 0x0000ffff) | \
+	(((desc)->b << 16) & 0x00ff0000) | \
+	( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+	((desc)->a & 0x0ffff) | \
+	 ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
+#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+	info.entry_number = idx;
+	info.base_addr = GET_BASE(desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+
+	if (copy_to_user(user_desc, &info, sizeof(info)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Perform set_thread_area on behalf of the traced child.
+ */
+static int
+ptrace_set_thread_area(struct task_struct *child,
+		       int idx, struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+	struct desc_struct *desc;
+
+	if (copy_from_user(&info, user_desc, sizeof(info)))
+		return -EFAULT;
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+	if (LDT_empty(&info)) {
+		desc->a = 0;
+		desc->b = 0;
+	} else {
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+
+	return 0;
+}
+
+asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+{
+	struct task_struct *child;
+	struct user * dummy = NULL;
+	int i, ret;
+	unsigned long __user *datap = (unsigned long __user *)data;
+
+	lock_kernel();
+	ret = -EPERM;
+	if (request == PTRACE_TRACEME) {
+		/* are we already being traced? */
+		if (current->ptrace & PT_PTRACED)
+			goto out;
+		ret = security_ptrace(current->parent, current);
+		if (ret)
+			goto out;
+		/* set the ptrace bit in the process flags. */
+		current->ptrace |= PT_PTRACED;
+		ret = 0;
+		goto out;
+	}
+	ret = -ESRCH;
+	read_lock(&tasklist_lock);
+	child = find_task_by_pid(pid);
+	if (child)
+		get_task_struct(child);
+	read_unlock(&tasklist_lock);
+	if (!child)
+		goto out;
+
+	ret = -EPERM;
+	if (pid == 1)		/* you may not mess with init */
+		goto out_tsk;
+
+	if (request == PTRACE_ATTACH) {
+		ret = ptrace_attach(child);
+		goto out_tsk;
+	}
+
+	ret = ptrace_check_attach(child, request == PTRACE_KILL);
+	if (ret < 0)
+		goto out_tsk;
+
+	switch (request) {
+	/* when I and D space are separate, these will need to be fixed. */
+	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+	case PTRACE_PEEKDATA: {
+		unsigned long tmp;
+		int copied;
+
+		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+		ret = -EIO;
+		if (copied != sizeof(tmp))
+			break;
+		ret = put_user(tmp, datap);
+		break;
+	}
+
+	/* read the word at location addr in the USER area. */
+	case PTRACE_PEEKUSR: {
+		unsigned long tmp;
+
+		ret = -EIO;
+		if ((addr & 3) || addr < 0 || 
+		    addr > sizeof(struct user) - 3)
+			break;
+
+		tmp = 0;  /* Default return condition */
+		if(addr < FRAME_SIZE*sizeof(long))
+			tmp = getreg(child, addr);
+		if(addr >= (long) &dummy->u_debugreg[0] &&
+		   addr <= (long) &dummy->u_debugreg[7]){
+			addr -= (long) &dummy->u_debugreg[0];
+			addr = addr >> 2;
+			tmp = child->thread.debugreg[addr];
+		}
+		ret = put_user(tmp, datap);
+		break;
+	}
+
+	/* when I and D space are separate, this will have to be fixed. */
+	case PTRACE_POKETEXT: /* write the word at location addr. */
+	case PTRACE_POKEDATA:
+		ret = 0;
+		if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
+			break;
+		ret = -EIO;
+		break;
+
+	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+		ret = -EIO;
+		if ((addr & 3) || addr < 0 || 
+		    addr > sizeof(struct user) - 3)
+			break;
+
+		if (addr < FRAME_SIZE*sizeof(long)) {
+			ret = putreg(child, addr, data);
+			break;
+		}
+		/* We need to be very careful here.  We implicitly
+		   want to modify a portion of the task_struct, and we
+		   have to be selective about what portions we allow someone
+		   to modify. */
+
+		  ret = -EIO;
+		  if(addr >= (long) &dummy->u_debugreg[0] &&
+		     addr <= (long) &dummy->u_debugreg[7]){
+
+			  if(addr == (long) &dummy->u_debugreg[4]) break;
+			  if(addr == (long) &dummy->u_debugreg[5]) break;
+			  if(addr < (long) &dummy->u_debugreg[4] &&
+			     ((unsigned long) data) >= TASK_SIZE-3) break;
+			  
+			  /* Sanity-check data. Take one half-byte at once with
+			   * check = (val >> (16 + 4*i)) & 0xf. It contains the
+			   * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
+			   * 2 and 3 are LENi. Given a list of invalid values,
+			   * we do mask |= 1 << invalid_value, so that
+			   * (mask >> check) & 1 is a correct test for invalid
+			   * values.
+			   *
+			   * R/Wi contains the type of the breakpoint /
+			   * watchpoint, LENi contains the length of the watched
+			   * data in the watchpoint case.
+			   *
+			   * The invalid values are:
+			   * - LENi == 0x10 (undefined), so mask |= 0x0f00.
+			   * - R/Wi == 0x10 (break on I/O reads or writes), so
+			   *   mask |= 0x4444.
+			   * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
+			   *   0x1110.
+			   *
+			   * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
+			   *
+			   * See the Intel Manual "System Programming Guide",
+			   * 15.2.4
+			   *
+			   * Note that LENi == 0x10 is defined on x86_64 in long
+			   * mode (i.e. even for 32-bit userspace software, but
+			   * 64-bit kernel), so the x86_64 mask value is 0x5454.
+			   * See the AMD manual no. 24593 (AMD64 System
+			   * Programming)*/
+
+			  if(addr == (long) &dummy->u_debugreg[7]) {
+				  data &= ~DR_CONTROL_RESERVED;
+				  for(i=0; i<4; i++)
+					  if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+						  goto out_tsk;
+			  }
+
+			  addr -= (long) &dummy->u_debugreg;
+			  addr = addr >> 2;
+			  child->thread.debugreg[addr] = data;
+			  ret = 0;
+		  }
+		  break;
+
+	case PTRACE_SYSCALL:	/* continue and stop at next (return from) syscall */
+	case PTRACE_CONT:	/* restart after signal. */
+		ret = -EIO;
+		if ((unsigned long) data > _NSIG)
+			break;
+		if (request == PTRACE_SYSCALL) {
+			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		}
+		else {
+			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		}
+		child->exit_code = data;
+		/* make sure the single step bit is not set. */
+		clear_singlestep(child);
+		wake_up_process(child);
+		ret = 0;
+		break;
+
+/*
+ * make the child exit.  Best I can do is send it a sigkill. 
+ * perhaps it should be put in the status that it wants to 
+ * exit.
+ */
+	case PTRACE_KILL:
+		ret = 0;
+		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+			break;
+		child->exit_code = SIGKILL;
+		/* make sure the single step bit is not set. */
+		clear_singlestep(child);
+		wake_up_process(child);
+		break;
+
+	case PTRACE_SINGLESTEP:	/* set the trap flag. */
+		ret = -EIO;
+		if ((unsigned long) data > _NSIG)
+			break;
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		set_singlestep(child);
+		child->exit_code = data;
+		/* give it a chance to run. */
+		wake_up_process(child);
+		ret = 0;
+		break;
+
+	case PTRACE_DETACH:
+		/* detach a process that was attached. */
+		ret = ptrace_detach(child, data);
+		break;
+
+	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+	  	if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
+			ret = -EIO;
+			break;
+		}
+		for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
+			__put_user(getreg(child, i), datap);
+			datap++;
+		}
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+		unsigned long tmp;
+	  	if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) {
+			ret = -EIO;
+			break;
+		}
+		for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
+			__get_user(tmp, datap);
+			putreg(child, i, tmp);
+			datap++;
+		}
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
+		if (!access_ok(VERIFY_WRITE, datap,
+			       sizeof(struct user_i387_struct))) {
+			ret = -EIO;
+			break;
+		}
+		ret = 0;
+		if (!tsk_used_math(child))
+			init_fpu(child);
+		get_fpregs((struct user_i387_struct __user *)data, child);
+		break;
+	}
+
+	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
+		if (!access_ok(VERIFY_READ, datap,
+			       sizeof(struct user_i387_struct))) {
+			ret = -EIO;
+			break;
+		}
+		set_stopped_child_used_math(child);
+		set_fpregs(child, (struct user_i387_struct __user *)data);
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
+		if (!access_ok(VERIFY_WRITE, datap,
+			       sizeof(struct user_fxsr_struct))) {
+			ret = -EIO;
+			break;
+		}
+		if (!tsk_used_math(child))
+			init_fpu(child);
+		ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
+		break;
+	}
+
+	case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
+		if (!access_ok(VERIFY_READ, datap,
+			       sizeof(struct user_fxsr_struct))) {
+			ret = -EIO;
+			break;
+		}
+		set_stopped_child_used_math(child);
+		ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
+		break;
+	}
+
+	case PTRACE_GET_THREAD_AREA:
+		ret = ptrace_get_thread_area(child, addr,
+					(struct user_desc __user *) data);
+		break;
+
+	case PTRACE_SET_THREAD_AREA:
+		ret = ptrace_set_thread_area(child, addr,
+					(struct user_desc __user *) data);
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+out_tsk:
+	put_task_struct(child);
+out:
+	unlock_kernel();
+	return ret;
+}
+
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+{
+	struct siginfo info;
+
+	tsk->thread.trap_no = 1;
+	tsk->thread.error_code = error_code;
+
+	memset(&info, 0, sizeof(info));
+	info.si_signo = SIGTRAP;
+	info.si_code = TRAP_BRKPT;
+
+	/* User-mode eip? */
+	info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL;
+
+	/* Send us the fakey SIGTRAP */
+	force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/* notification of system call entry/exit
+ * - triggered by current->work.syscall_trace
+ */
+__attribute__((regparm(3)))
+void do_syscall_trace(struct pt_regs *regs, int entryexit)
+{
+	/* do the secure computing check first */
+	secure_computing(regs->orig_eax);
+
+	if (unlikely(current->audit_context)) {
+		if (!entryexit)
+			audit_syscall_entry(current, regs->orig_eax,
+					    regs->ebx, regs->ecx,
+					    regs->edx, regs->esi);
+		else
+			audit_syscall_exit(current, regs->eax);
+	}
+
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	/* Fake a debug trap */
+	if (test_thread_flag(TIF_SINGLESTEP))
+		send_sigtrap(current, regs, 0);
+
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+
+	/* the 0x80 provides a way for the tracing parent to distinguish
+	   between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c
new file mode 100644
index 00000000000..aaf89cb2bc5
--- /dev/null
+++ b/arch/i386/kernel/quirks.c
@@ -0,0 +1,52 @@
+/*
+ * This file contains work-arounds for x86 and x86_64 platform bugs.
+ */
+#include <linux/config.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
+
+static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
+{
+	u8 config, rev;
+	u32 word;
+
+	/* BIOS may enable hardware IRQ balancing for
+	 * E7520/E7320/E7525(revision ID 0x9 and below)
+	 * based platforms.
+	 * Disable SW irqbalance/affinity on those platforms.
+	 */
+	pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
+	if (rev > 0x9)
+		return;
+
+	printk(KERN_INFO "Intel E7520/7320/7525 detected.");
+
+	/* enable access to config space*/
+	pci_read_config_byte(dev, 0xf4, &config);
+	config |= 0x2;
+	pci_write_config_byte(dev, 0xf4, config);
+
+	/* read xTPR register */
+	raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
+
+	if (!(word & (1 << 13))) {
+		printk(KERN_INFO "Disabling irq balancing and affinity\n");
+#ifdef CONFIG_IRQBALANCE
+		irqbalance_disable("");
+#endif
+		noirqdebug_setup("");
+#ifdef CONFIG_PROC_FS
+		no_irq_affinity = 1;
+#endif
+	}
+
+	config &= ~0x2;
+	/* disable access to config space*/
+	pci_write_config_byte(dev, 0xf4, config);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7320_MCH,	quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7520_MCH,	quirk_intel_irqbalance);
+#endif
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
new file mode 100644
index 00000000000..3d7e994563d
--- /dev/null
+++ b/arch/i386/kernel/reboot.c
@@ -0,0 +1,382 @@
+/*
+ *  linux/arch/i386/kernel/reboot.c
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/efi.h>
+#include <linux/dmi.h>
+#include <asm/uaccess.h>
+#include <asm/apic.h>
+#include "mach_reboot.h"
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+
+static int reboot_mode;
+static int reboot_thru_bios;
+
+#ifdef CONFIG_SMP
+int reboot_smp = 0;
+static int reboot_cpu = -1;
+/* shamelessly grabbed from lib/vsprintf.c for readability */
+#define is_digit(c)	((c) >= '0' && (c) <= '9')
+#endif
+static int __init reboot_setup(char *str)
+{
+	while(1) {
+		switch (*str) {
+		case 'w': /* "warm" reboot (no memory testing etc) */
+			reboot_mode = 0x1234;
+			break;
+		case 'c': /* "cold" reboot (with memory testing etc) */
+			reboot_mode = 0x0;
+			break;
+		case 'b': /* "bios" reboot by jumping through the BIOS */
+			reboot_thru_bios = 1;
+			break;
+		case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
+			reboot_thru_bios = 0;
+			break;
+#ifdef CONFIG_SMP
+		case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
+			reboot_smp = 1;
+			if (is_digit(*(str+1))) {
+				reboot_cpu = (int) (*(str+1) - '0');
+				if (is_digit(*(str+2))) 
+					reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
+			}
+				/* we will leave sorting out the final value 
+				when we are ready to reboot, since we might not
+ 				have set up boot_cpu_id or smp_num_cpu */
+			break;
+#endif
+		}
+		if((str = strchr(str,',')) != NULL)
+			str++;
+		else
+			break;
+	}
+	return 1;
+}
+
+__setup("reboot=", reboot_setup);
+
+/*
+ * Reboot options and system auto-detection code provided by
+ * Dell Inc. so their systems "just work". :-)
+ */
+
+/*
+ * Some machines require the "reboot=b"  commandline option, this quirk makes that automatic.
+ */
+static int __init set_bios_reboot(struct dmi_system_id *d)
+{
+	if (!reboot_thru_bios) {
+		reboot_thru_bios = 1;
+		printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
+	}
+	return 0;
+}
+
+/*
+ * Some machines require the "reboot=s"  commandline option, this quirk makes that automatic.
+ */
+static int __init set_smp_reboot(struct dmi_system_id *d)
+{
+#ifdef CONFIG_SMP
+	if (!reboot_smp) {
+		reboot_smp = 1;
+		printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
+	}
+#endif
+	return 0;
+}
+
+/*
+ * Some machines require the "reboot=b,s"  commandline option, this quirk makes that automatic.
+ */
+static int __init set_smp_bios_reboot(struct dmi_system_id *d)
+{
+	set_smp_reboot(d);
+	set_bios_reboot(d);
+	return 0;
+}
+
+static struct dmi_system_id __initdata reboot_dmi_table[] = {
+	{	/* Handle problems with rebooting on Dell 1300's */
+		.callback = set_smp_bios_reboot,
+		.ident = "Dell PowerEdge 1300",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
+		},
+	},
+	{	/* Handle problems with rebooting on Dell 300's */
+		.callback = set_bios_reboot,
+		.ident = "Dell PowerEdge 300",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
+		},
+	},
+	{	/* Handle problems with rebooting on Dell 2400's */
+		.callback = set_bios_reboot,
+		.ident = "Dell PowerEdge 2400",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
+		},
+	},
+	{ }
+};
+
+static int __init reboot_init(void)
+{
+	dmi_check_system(reboot_dmi_table);
+	return 0;
+}
+
+core_initcall(reboot_init);
+
+/* The following code and data reboots the machine by switching to real
+   mode and jumping to the BIOS reset entry point, as if the CPU has
+   really been reset.  The previous version asked the keyboard
+   controller to pulse the CPU reset line, which is more thorough, but
+   doesn't work with at least one type of 486 motherboard.  It is easy
+   to stop this code working; hence the copious comments. */
+
+static unsigned long long
+real_mode_gdt_entries [3] =
+{
+	0x0000000000000000ULL,	/* Null descriptor */
+	0x00009a000000ffffULL,	/* 16-bit real-mode 64k code at 0x00000000 */
+	0x000092000100ffffULL	/* 16-bit real-mode 64k data at 0x00000100 */
+};
+
+static struct
+{
+	unsigned short       size __attribute__ ((packed));
+	unsigned long long * base __attribute__ ((packed));
+}
+real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
+real_mode_idt = { 0x3ff, NULL },
+no_idt = { 0, NULL };
+
+
+/* This is 16-bit protected mode code to disable paging and the cache,
+   switch to real mode and jump to the BIOS reset code.
+
+   The instruction that switches to real mode by writing to CR0 must be
+   followed immediately by a far jump instruction, which set CS to a
+   valid value for real mode, and flushes the prefetch queue to avoid
+   running instructions that have already been decoded in protected
+   mode.
+
+   Clears all the flags except ET, especially PG (paging), PE
+   (protected-mode enable) and TS (task switch for coprocessor state
+   save).  Flushes the TLB after paging has been disabled.  Sets CD and
+   NW, to disable the cache on a 486, and invalidates the cache.  This
+   is more like the state of a 486 after reset.  I don't know if
+   something else should be done for other chips.
+
+   More could be done here to set up the registers as if a CPU reset had
+   occurred; hopefully real BIOSs don't assume much. */
+
+static unsigned char real_mode_switch [] =
+{
+	0x66, 0x0f, 0x20, 0xc0,			/*    movl  %cr0,%eax        */
+	0x66, 0x83, 0xe0, 0x11,			/*    andl  $0x00000011,%eax */
+	0x66, 0x0d, 0x00, 0x00, 0x00, 0x60,	/*    orl   $0x60000000,%eax */
+	0x66, 0x0f, 0x22, 0xc0,			/*    movl  %eax,%cr0        */
+	0x66, 0x0f, 0x22, 0xd8,			/*    movl  %eax,%cr3        */
+	0x66, 0x0f, 0x20, 0xc3,			/*    movl  %cr0,%ebx        */
+	0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60,	/*    andl  $0x60000000,%ebx */
+	0x74, 0x02,				/*    jz    f                */
+	0x0f, 0x09,				/*    wbinvd                 */
+	0x24, 0x10,				/* f: andb  $0x10,al         */
+	0x66, 0x0f, 0x22, 0xc0			/*    movl  %eax,%cr0        */
+};
+static unsigned char jump_to_bios [] =
+{
+	0xea, 0x00, 0x00, 0xff, 0xff		/*    ljmp  $0xffff,$0x0000  */
+};
+
+/*
+ * Switch to real mode and then execute the code
+ * specified by the code and length parameters.
+ * We assume that length will aways be less that 100!
+ */
+void machine_real_restart(unsigned char *code, int length)
+{
+	unsigned long flags;
+
+	local_irq_disable();
+
+	/* Write zero to CMOS register number 0x0f, which the BIOS POST
+	   routine will recognize as telling it to do a proper reboot.  (Well
+	   that's what this book in front of me says -- it may only apply to
+	   the Phoenix BIOS though, it's not clear).  At the same time,
+	   disable NMIs by setting the top bit in the CMOS address register,
+	   as we're about to do peculiar things to the CPU.  I'm not sure if
+	   `outb_p' is needed instead of just `outb'.  Use it to be on the
+	   safe side.  (Yes, CMOS_WRITE does outb_p's. -  Paul G.)
+	 */
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	CMOS_WRITE(0x00, 0x8f);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	/* Remap the kernel at virtual address zero, as well as offset zero
+	   from the kernel segment.  This assumes the kernel segment starts at
+	   virtual address PAGE_OFFSET. */
+
+	memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+		sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
+
+	/*
+	 * Use `swapper_pg_dir' as our page directory.
+	 */
+	load_cr3(swapper_pg_dir);
+
+	/* Write 0x1234 to absolute memory location 0x472.  The BIOS reads
+	   this on booting to tell it to "Bypass memory test (also warm
+	   boot)".  This seems like a fairly standard thing that gets set by
+	   REBOOT.COM programs, and the previous reset routine did this
+	   too. */
+
+	*((unsigned short *)0x472) = reboot_mode;
+
+	/* For the switch to real mode, copy some code to low memory.  It has
+	   to be in the first 64k because it is running in 16-bit mode, and it
+	   has to have the same physical and virtual address, because it turns
+	   off paging.  Copy it near the end of the first page, out of the way
+	   of BIOS variables. */
+
+	memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100),
+		real_mode_switch, sizeof (real_mode_switch));
+	memcpy ((void *) (0x1000 - 100), code, length);
+
+	/* Set up the IDT for real mode. */
+
+	__asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt));
+
+	/* Set up a GDT from which we can load segment descriptors for real
+	   mode.  The GDT is not used in real mode; it is just needed here to
+	   prepare the descriptors. */
+
+	__asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt));
+
+	/* Load the data segment registers, and thus the descriptors ready for
+	   real mode.  The base address of each segment is 0x100, 16 times the
+	   selector value being loaded here.  This is so that the segment
+	   registers don't have to be reloaded after switching to real mode:
+	   the values are consistent for real mode operation already. */
+
+	__asm__ __volatile__ ("movl $0x0010,%%eax\n"
+				"\tmovl %%eax,%%ds\n"
+				"\tmovl %%eax,%%es\n"
+				"\tmovl %%eax,%%fs\n"
+				"\tmovl %%eax,%%gs\n"
+				"\tmovl %%eax,%%ss" : : : "eax");
+
+	/* Jump to the 16-bit code that we copied earlier.  It disables paging
+	   and the cache, switches to real mode, and jumps to the BIOS reset
+	   entry point. */
+
+	__asm__ __volatile__ ("ljmp $0x0008,%0"
+				:
+				: "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
+}
+
+void machine_restart(char * __unused)
+{
+#ifdef CONFIG_SMP
+	int cpuid;
+	
+	cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+	if (reboot_smp) {
+
+		/* check to see if reboot_cpu is valid 
+		   if its not, default to the BSP */
+		if ((reboot_cpu == -1) ||  
+		      (reboot_cpu > (NR_CPUS -1))  || 
+		      !physid_isset(cpuid, phys_cpu_present_map))
+			reboot_cpu = boot_cpu_physical_apicid;
+
+		reboot_smp = 0;  /* use this as a flag to only go through this once*/
+		/* re-run this function on the other CPUs
+		   it will fall though this section since we have 
+		   cleared reboot_smp, and do the reboot if it is the
+		   correct CPU, otherwise it halts. */
+		if (reboot_cpu != cpuid)
+			smp_call_function((void *)machine_restart , NULL, 1, 0);
+	}
+
+	/* if reboot_cpu is still -1, then we want a tradional reboot, 
+	   and if we are not running on the reboot_cpu,, halt */
+	if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
+		for (;;)
+		__asm__ __volatile__ ("hlt");
+	}
+	/*
+	 * Stop all CPUs and turn off local APICs and the IO-APIC, so
+	 * other OSs see a clean IRQ state.
+	 */
+	smp_send_stop();
+#endif /* CONFIG_SMP */
+
+	lapic_shutdown();
+
+#ifdef CONFIG_X86_IO_APIC
+	disable_IO_APIC();
+#endif
+
+	if (!reboot_thru_bios) {
+		if (efi_enabled) {
+			efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
+			__asm__ __volatile__("lidt %0": :"m" (no_idt));
+			__asm__ __volatile__("int3");
+		}
+		/* rebooting needs to touch the page at absolute addr 0 */
+		*((unsigned short *)__va(0x472)) = reboot_mode;
+		for (;;) {
+			mach_reboot();
+			/* That didn't work - force a triple fault.. */
+			__asm__ __volatile__("lidt %0": :"m" (no_idt));
+			__asm__ __volatile__("int3");
+		}
+	}
+	if (efi_enabled)
+		efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL);
+
+	machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
+}
+
+EXPORT_SYMBOL(machine_restart);
+
+void machine_halt(void)
+{
+}
+
+EXPORT_SYMBOL(machine_halt);
+
+void machine_power_off(void)
+{
+	lapic_shutdown();
+
+	if (efi_enabled)
+		efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
+	if (pm_power_off)
+		pm_power_off();
+}
+
+EXPORT_SYMBOL(machine_power_off);
+
diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c
new file mode 100644
index 00000000000..69e203a0d33
--- /dev/null
+++ b/arch/i386/kernel/scx200.c
@@ -0,0 +1,167 @@
+/* linux/arch/i386/kernel/scx200.c 
+
+   Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
+
+   National Semiconductor SCx200 support. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include <linux/scx200.h>
+
+/* Verify that the configuration block really is there */
+#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
+
+#define NAME "scx200"
+
+MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
+MODULE_DESCRIPTION("NatSemi SCx200 Driver");
+MODULE_LICENSE("GPL");
+
+unsigned scx200_gpio_base = 0;
+long scx200_gpio_shadow[2];
+
+unsigned scx200_cb_base = 0;
+
+static struct pci_device_id scx200_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS)   },
+	{ PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS)   },
+	{ },
+};
+MODULE_DEVICE_TABLE(pci,scx200_tbl);
+
+static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
+
+static struct pci_driver scx200_pci_driver = {
+	.name = "scx200",
+	.id_table = scx200_tbl,
+	.probe = scx200_probe,
+};
+
+static DEFINE_SPINLOCK(scx200_gpio_config_lock);
+
+static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int bank;
+	unsigned base;
+
+	if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
+	    pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
+		base = pci_resource_start(pdev, 0);
+		printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
+
+		if (request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO") == 0) {
+			printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
+			return -EBUSY;
+		}
+
+		scx200_gpio_base = base;
+
+		/* read the current values driven on the GPIO signals */
+		for (bank = 0; bank < 2; ++bank)
+			scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
+
+	} else {
+		/* find the base of the Configuration Block */
+		if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) {
+			scx200_cb_base = SCx200_CB_BASE_FIXED;
+		} else {
+			pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
+			if (scx200_cb_probe(base)) {
+				scx200_cb_base = base;
+			} else {
+				printk(KERN_WARNING NAME ": Configuration Block not found\n");
+				return -ENODEV;
+			}
+		}
+		printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
+	}
+
+	return 0;
+}
+
+u32 scx200_gpio_configure(int index, u32 mask, u32 bits)
+{
+	u32 config, new_config;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scx200_gpio_config_lock, flags);
+
+	outl(index, scx200_gpio_base + 0x20);
+	config = inl(scx200_gpio_base + 0x24);
+
+	new_config = (config & mask) | bits;
+	outl(new_config, scx200_gpio_base + 0x24);
+
+	spin_unlock_irqrestore(&scx200_gpio_config_lock, flags);
+
+	return config;
+}
+
+#if 0
+void scx200_gpio_dump(unsigned index)
+{
+	u32 config = scx200_gpio_configure(index, ~0, 0);
+	printk(KERN_DEBUG "GPIO%02u: 0x%08lx", index, (unsigned long)config);
+	
+	if (config & 1) 
+		printk(" OE"); /* output enabled */
+	else
+		printk(" TS"); /* tristate */
+	if (config & 2) 
+		printk(" PP"); /* push pull */
+	else
+		printk(" OD"); /* open drain */
+	if (config & 4) 
+		printk(" PUE"); /* pull up enabled */
+	else
+		printk(" PUD"); /* pull up disabled */
+	if (config & 8) 
+		printk(" LOCKED"); /* locked */
+	if (config & 16) 
+		printk(" LEVEL"); /* level input */
+	else
+		printk(" EDGE"); /* edge input */
+	if (config & 32) 
+		printk(" HI"); /* trigger on rising edge */
+	else
+		printk(" LO"); /* trigger on falling edge */
+	if (config & 64) 
+		printk(" DEBOUNCE"); /* debounce */
+	printk("\n");
+}
+#endif  /*  0  */
+
+static int __init scx200_init(void)
+{
+	printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
+
+	return pci_module_init(&scx200_pci_driver);
+}
+
+static void __exit scx200_cleanup(void)
+{
+	pci_unregister_driver(&scx200_pci_driver);
+	release_region(scx200_gpio_base, SCx200_GPIO_SIZE);
+}
+
+module_init(scx200_init);
+module_exit(scx200_cleanup);
+
+EXPORT_SYMBOL(scx200_gpio_base);
+EXPORT_SYMBOL(scx200_gpio_shadow);
+EXPORT_SYMBOL(scx200_gpio_configure);
+EXPORT_SYMBOL(scx200_cb_base);
+
+/*
+    Local variables:
+        compile-command: "make -k -C ../../.. SUBDIRS=arch/i386/kernel modules"
+        c-basic-offset: 8
+    End:
+*/
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
new file mode 100644
index 00000000000..469f496e55c
--- /dev/null
+++ b/arch/i386/kernel/semaphore.c
@@ -0,0 +1,297 @@
+/*
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Portions Copyright 1999 Red Hat, Inc.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <asm/semaphore.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ *  - only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - when we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleeper" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+static fastcall void __attribute_used__  __up(struct semaphore *sem)
+{
+	wake_up(&sem->wait);
+}
+
+static fastcall void __attribute_used__ __sched __down(struct semaphore * sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	unsigned long flags;
+
+	tsk->state = TASK_UNINTERRUPTIBLE;
+	spin_lock_irqsave(&sem->wait.lock, flags);
+	add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+	sem->sleepers++;
+	for (;;) {
+		int sleepers = sem->sleepers;
+
+		/*
+		 * Add "everybody else" into it. They aren't
+		 * playing, because we own the spinlock in
+		 * the wait_queue_head.
+		 */
+		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+			sem->sleepers = 0;
+			break;
+		}
+		sem->sleepers = 1;	/* us - see -1 above */
+		spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+		schedule();
+
+		spin_lock_irqsave(&sem->wait.lock, flags);
+		tsk->state = TASK_UNINTERRUPTIBLE;
+	}
+	remove_wait_queue_locked(&sem->wait, &wait);
+	wake_up_locked(&sem->wait);
+	spin_unlock_irqrestore(&sem->wait.lock, flags);
+	tsk->state = TASK_RUNNING;
+}
+
+static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem)
+{
+	int retval = 0;
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	unsigned long flags;
+
+	tsk->state = TASK_INTERRUPTIBLE;
+	spin_lock_irqsave(&sem->wait.lock, flags);
+	add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+	sem->sleepers++;
+	for (;;) {
+		int sleepers = sem->sleepers;
+
+		/*
+		 * With signals pending, this turns into
+		 * the trylock failure case - we won't be
+		 * sleeping, and we* can't get the lock as
+		 * it has contention. Just correct the count
+		 * and exit.
+		 */
+		if (signal_pending(current)) {
+			retval = -EINTR;
+			sem->sleepers = 0;
+			atomic_add(sleepers, &sem->count);
+			break;
+		}
+
+		/*
+		 * Add "everybody else" into it. They aren't
+		 * playing, because we own the spinlock in
+		 * wait_queue_head. The "-1" is because we're
+		 * still hoping to get the semaphore.
+		 */
+		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+			sem->sleepers = 0;
+			break;
+		}
+		sem->sleepers = 1;	/* us - see -1 above */
+		spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+		schedule();
+
+		spin_lock_irqsave(&sem->wait.lock, flags);
+		tsk->state = TASK_INTERRUPTIBLE;
+	}
+	remove_wait_queue_locked(&sem->wait, &wait);
+	wake_up_locked(&sem->wait);
+	spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+	tsk->state = TASK_RUNNING;
+	return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem)
+{
+	int sleepers;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sem->wait.lock, flags);
+	sleepers = sem->sleepers + 1;
+	sem->sleepers = 0;
+
+	/*
+	 * Add "everybody else" and us into it. They aren't
+	 * playing, because we own the spinlock in the
+	 * wait_queue_head.
+	 */
+	if (!atomic_add_negative(sleepers, &sem->count)) {
+		wake_up_locked(&sem->wait);
+	}
+
+	spin_unlock_irqrestore(&sem->wait.lock, flags);
+	return 1;
+}
+
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * %eax contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (%eax, %edx and %ecx) except %eax whish is either a return
+ * value or just clobbered..
+ */
+asm(
+".section .sched.text\n"
+".align 4\n"
+".globl __down_failed\n"
+"__down_failed:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+	"pushl %ebp\n\t"
+	"movl  %esp,%ebp\n\t"
+#endif
+	"pushl %edx\n\t"
+	"pushl %ecx\n\t"
+	"call __down\n\t"
+	"popl %ecx\n\t"
+	"popl %edx\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+	"movl %ebp,%esp\n\t"
+	"popl %ebp\n\t"
+#endif
+	"ret"
+);
+
+asm(
+".section .sched.text\n"
+".align 4\n"
+".globl __down_failed_interruptible\n"
+"__down_failed_interruptible:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+	"pushl %ebp\n\t"
+	"movl  %esp,%ebp\n\t"
+#endif
+	"pushl %edx\n\t"
+	"pushl %ecx\n\t"
+	"call __down_interruptible\n\t"
+	"popl %ecx\n\t"
+	"popl %edx\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+	"movl %ebp,%esp\n\t"
+	"popl %ebp\n\t"
+#endif
+	"ret"
+);
+
+asm(
+".section .sched.text\n"
+".align 4\n"
+".globl __down_failed_trylock\n"
+"__down_failed_trylock:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+	"pushl %ebp\n\t"
+	"movl  %esp,%ebp\n\t"
+#endif
+	"pushl %edx\n\t"
+	"pushl %ecx\n\t"
+	"call __down_trylock\n\t"
+	"popl %ecx\n\t"
+	"popl %edx\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+	"movl %ebp,%esp\n\t"
+	"popl %ebp\n\t"
+#endif
+	"ret"
+);
+
+asm(
+".section .sched.text\n"
+".align 4\n"
+".globl __up_wakeup\n"
+"__up_wakeup:\n\t"
+	"pushl %edx\n\t"
+	"pushl %ecx\n\t"
+	"call __up\n\t"
+	"popl %ecx\n\t"
+	"popl %edx\n\t"
+	"ret"
+);
+
+/*
+ * rw spinlock fallbacks
+ */
+#if defined(CONFIG_SMP)
+asm(
+".section .sched.text\n"
+".align	4\n"
+".globl	__write_lock_failed\n"
+"__write_lock_failed:\n\t"
+	LOCK "addl	$" RW_LOCK_BIAS_STR ",(%eax)\n"
+"1:	rep; nop\n\t"
+	"cmpl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
+	"jne	1b\n\t"
+	LOCK "subl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
+	"jnz	__write_lock_failed\n\t"
+	"ret"
+);
+
+asm(
+".section .sched.text\n"
+".align	4\n"
+".globl	__read_lock_failed\n"
+"__read_lock_failed:\n\t"
+	LOCK "incl	(%eax)\n"
+"1:	rep; nop\n\t"
+	"cmpl	$1,(%eax)\n\t"
+	"js	1b\n\t"
+	LOCK "decl	(%eax)\n\t"
+	"js	__read_lock_failed\n\t"
+	"ret"
+);
+#endif
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
new file mode 100644
index 00000000000..945ec73163c
--- /dev/null
+++ b/arch/i386/kernel/setup.c
@@ -0,0 +1,1535 @@
+/*
+ *  linux/arch/i386/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ *
+ *  Memory region support
+ *	David Parsons <orc@pell.chi.il.us>, July-August 1999
+ *
+ *  Added E820 sanitization routine (removes overlapping memory regions);
+ *  Brian Moyle <bmoyle@mvista.com>, February 2001
+ *
+ * Moved CPU detection code to cpu/${cpu}.c
+ *    Patrick Mochel <mochel@osdl.org>, March 2002
+ *
+ *  Provisions for empty E820 memory regions (reported by certain BIOSes).
+ *  Alex Achenbach <xela@slit.de>, December 2002.
+ *
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/acpi.h>
+#include <linux/apm_bios.h>
+#include <linux/initrd.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/console.h>
+#include <linux/mca.h>
+#include <linux/root_dev.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/init.h>
+#include <linux/edd.h>
+#include <linux/nodemask.h>
+#include <video/edid.h>
+#include <asm/e820.h>
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/arch_hooks.h>
+#include <asm/sections.h>
+#include <asm/io_apic.h>
+#include <asm/ist.h>
+#include <asm/io.h>
+#include "setup_arch_pre.h"
+#include <bios_ebda.h>
+
+/* This value is set up by the early boot code to point to the value
+   immediately after the boot time page tables.  It contains a *physical*
+   address, and must not be in the .bss segment! */
+unsigned long init_pg_tables_end __initdata = ~0UL;
+
+int disable_pse __initdata = 0;
+
+/*
+ * Machine setup..
+ */
+
+#ifdef CONFIG_EFI
+int efi_enabled = 0;
+EXPORT_SYMBOL(efi_enabled);
+#endif
+
+/* cpu data as detected by the assembly code in head.S */
+struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+/* common cpu data for all cpus */
+struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+
+unsigned long mmu_cr4_features;
+
+#ifdef	CONFIG_ACPI_INTERPRETER
+	int acpi_disabled = 0;
+#else
+	int acpi_disabled = 1;
+#endif
+EXPORT_SYMBOL(acpi_disabled);
+
+#ifdef	CONFIG_ACPI_BOOT
+int __initdata acpi_force = 0;
+extern acpi_interrupt_flags	acpi_sci_flags;
+#endif
+
+/* for MCA, but anyone else can use it if they want */
+unsigned int machine_id;
+unsigned int machine_submodel_id;
+unsigned int BIOS_revision;
+unsigned int mca_pentium_flag;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x10000000;
+
+/* Boot loader ID as an integer, for the benefit of proc_dointvec */
+int bootloader_type;
+
+/* user-defined highmem size */
+static unsigned int highmem_pages = -1;
+
+/*
+ * Setup options
+ */
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+struct apm_info apm_info;
+struct sys_desc_table_struct {
+	unsigned short length;
+	unsigned char table[0];
+};
+struct edid_info edid_info;
+struct ist_info ist_info;
+struct e820map e820;
+
+extern void early_cpu_init(void);
+extern void dmi_scan_machine(void);
+extern void generic_apic_probe(char *);
+extern int root_mountflags;
+
+unsigned long saved_videomode;
+
+#define RAMDISK_IMAGE_START_MASK  	0x07FF
+#define RAMDISK_PROMPT_FLAG		0x8000
+#define RAMDISK_LOAD_FLAG		0x4000	
+
+static char command_line[COMMAND_LINE_SIZE];
+
+unsigned char __initdata boot_params[PARAM_SIZE];
+
+static struct resource data_resource = {
+	.name	= "Kernel data",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+	.name	= "Kernel code",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource system_rom_resource = {
+	.name	= "System ROM",
+	.start	= 0xf0000,
+	.end	= 0xfffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource extension_rom_resource = {
+	.name	= "Extension ROM",
+	.start	= 0xe0000,
+	.end	= 0xeffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource adapter_rom_resources[] = { {
+	.name 	= "Adapter ROM",
+	.start	= 0xc8000,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+} };
+
+#define ADAPTER_ROM_RESOURCES \
+	(sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+
+static struct resource video_rom_resource = {
+	.name 	= "Video ROM",
+	.start	= 0xc0000,
+	.end	= 0xc7fff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource video_ram_resource = {
+	.name	= "Video RAM area",
+	.start	= 0xa0000,
+	.end	= 0xbffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource standard_io_resources[] = { {
+	.name	= "dma1",
+	.start	= 0x0000,
+	.end	= 0x001f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "pic1",
+	.start	= 0x0020,
+	.end	= 0x0021,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name   = "timer0",
+	.start	= 0x0040,
+	.end    = 0x0043,
+	.flags  = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name   = "timer1",
+	.start  = 0x0050,
+	.end    = 0x0053,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "keyboard",
+	.start	= 0x0060,
+	.end	= 0x006f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "dma page reg",
+	.start	= 0x0080,
+	.end	= 0x008f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "pic2",
+	.start	= 0x00a0,
+	.end	= 0x00a1,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "dma2",
+	.start	= 0x00c0,
+	.end	= 0x00df,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "fpu",
+	.start	= 0x00f0,
+	.end	= 0x00ff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+} };
+
+#define STANDARD_IO_RESOURCES \
+	(sizeof standard_io_resources / sizeof standard_io_resources[0])
+
+#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+
+static int __init romchecksum(unsigned char *rom, unsigned long length)
+{
+	unsigned char *p, sum = 0;
+
+	for (p = rom; p < rom + length; p++)
+		sum += *p;
+	return sum == 0;
+}
+
+static void __init probe_roms(void)
+{
+	unsigned long start, length, upper;
+	unsigned char *rom;
+	int	      i;
+
+	/* video rom */
+	upper = adapter_rom_resources[0].start;
+	for (start = video_rom_resource.start; start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		video_rom_resource.start = start;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = rom[2] * 512;
+
+		/* if checksum okay, trust length byte */
+		if (length && romchecksum(rom, length))
+			video_rom_resource.end = start + length - 1;
+
+		request_resource(&iomem_resource, &video_rom_resource);
+		break;
+	}
+
+	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
+	if (start < upper)
+		start = upper;
+
+	/* system rom */
+	request_resource(&iomem_resource, &system_rom_resource);
+	upper = system_rom_resource.start;
+
+	/* check for extension rom (ignore length byte!) */
+	rom = isa_bus_to_virt(extension_rom_resource.start);
+	if (romsignature(rom)) {
+		length = extension_rom_resource.end - extension_rom_resource.start + 1;
+		if (romchecksum(rom, length)) {
+			request_resource(&iomem_resource, &extension_rom_resource);
+			upper = extension_rom_resource.start;
+		}
+	}
+
+	/* check for adapter roms on 2k boundaries */
+	for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = rom[2] * 512;
+
+		/* but accept any length that fits if checksum okay */
+		if (!length || start + length > upper || !romchecksum(rom, length))
+			continue;
+
+		adapter_rom_resources[i].start = start;
+		adapter_rom_resources[i].end = start + length - 1;
+		request_resource(&iomem_resource, &adapter_rom_resources[i]);
+
+		start = adapter_rom_resources[i++].end & ~2047UL;
+	}
+}
+
+static void __init limit_regions(unsigned long long size)
+{
+	unsigned long long current_addr = 0;
+	int i;
+
+	if (efi_enabled) {
+		for (i = 0; i < memmap.nr_map; i++) {
+			current_addr = memmap.map[i].phys_addr +
+				       (memmap.map[i].num_pages << 12);
+			if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
+				if (current_addr >= size) {
+					memmap.map[i].num_pages -=
+						(((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
+					memmap.nr_map = i + 1;
+					return;
+				}
+			}
+		}
+	}
+	for (i = 0; i < e820.nr_map; i++) {
+		if (e820.map[i].type == E820_RAM) {
+			current_addr = e820.map[i].addr + e820.map[i].size;
+			if (current_addr >= size) {
+				e820.map[i].size -= current_addr-size;
+				e820.nr_map = i + 1;
+				return;
+			}
+		}
+	}
+}
+
+static void __init add_memory_region(unsigned long long start,
+                                  unsigned long long size, int type)
+{
+	int x;
+
+	if (!efi_enabled) {
+       		x = e820.nr_map;
+
+		if (x == E820MAX) {
+		    printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+		    return;
+		}
+
+		e820.map[x].addr = start;
+		e820.map[x].size = size;
+		e820.map[x].type = type;
+		e820.nr_map++;
+	}
+} /* add_memory_region */
+
+#define E820_DEBUG	1
+
+static void __init print_memory_map(char *who)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		printk(" %s: %016Lx - %016Lx ", who,
+			e820.map[i].addr,
+			e820.map[i].addr + e820.map[i].size);
+		switch (e820.map[i].type) {
+		case E820_RAM:	printk("(usable)\n");
+				break;
+		case E820_RESERVED:
+				printk("(reserved)\n");
+				break;
+		case E820_ACPI:
+				printk("(ACPI data)\n");
+				break;
+		case E820_NVS:
+				printk("(ACPI NVS)\n");
+				break;
+		default:	printk("type %lu\n", e820.map[i].type);
+				break;
+		}
+	}
+}
+
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries.  The following 
+ * replaces the original e820 map with a new one, removing overlaps.
+ *
+ */
+struct change_member {
+	struct e820entry *pbios; /* pointer to original bios entry */
+	unsigned long long addr; /* address for this change point */
+};
+static struct change_member change_point_list[2*E820MAX] __initdata;
+static struct change_member *change_point[2*E820MAX] __initdata;
+static struct e820entry *overlap_list[E820MAX] __initdata;
+static struct e820entry new_bios[E820MAX] __initdata;
+
+static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+{
+	struct change_member *change_tmp;
+	unsigned long current_type, last_type;
+	unsigned long long last_addr;
+	int chgidx, still_changing;
+	int overlap_entries;
+	int new_bios_entry;
+	int old_nr, new_nr, chg_nr;
+	int i;
+
+	/*
+		Visually we're performing the following (1,2,3,4 = memory types)...
+
+		Sample memory map (w/overlaps):
+		   ____22__________________
+		   ______________________4_
+		   ____1111________________
+		   _44_____________________
+		   11111111________________
+		   ____________________33__
+		   ___________44___________
+		   __________33333_________
+		   ______________22________
+		   ___________________2222_
+		   _________111111111______
+		   _____________________11_
+		   _________________4______
+
+		Sanitized equivalent (no overlap):
+		   1_______________________
+		   _44_____________________
+		   ___1____________________
+		   ____22__________________
+		   ______11________________
+		   _________1______________
+		   __________3_____________
+		   ___________44___________
+		   _____________33_________
+		   _______________2________
+		   ________________1_______
+		   _________________4______
+		   ___________________2____
+		   ____________________33__
+		   ______________________4_
+	*/
+
+	/* if there's only one memory region, don't bother */
+	if (*pnr_map < 2)
+		return -1;
+
+	old_nr = *pnr_map;
+
+	/* bail out if we find any unreasonable addresses in bios map */
+	for (i=0; i<old_nr; i++)
+		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
+			return -1;
+
+	/* create pointers for initial change-point information (for sorting) */
+	for (i=0; i < 2*old_nr; i++)
+		change_point[i] = &change_point_list[i];
+
+	/* record all known change-points (starting and ending addresses),
+	   omitting those that are for empty memory regions */
+	chgidx = 0;
+	for (i=0; i < old_nr; i++)	{
+		if (biosmap[i].size != 0) {
+			change_point[chgidx]->addr = biosmap[i].addr;
+			change_point[chgidx++]->pbios = &biosmap[i];
+			change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+			change_point[chgidx++]->pbios = &biosmap[i];
+		}
+	}
+	chg_nr = chgidx;    	/* true number of change-points */
+
+	/* sort change-point list by memory addresses (low -> high) */
+	still_changing = 1;
+	while (still_changing)	{
+		still_changing = 0;
+		for (i=1; i < chg_nr; i++)  {
+			/* if <current_addr> > <last_addr>, swap */
+			/* or, if current=<start_addr> & last=<end_addr>, swap */
+			if ((change_point[i]->addr < change_point[i-1]->addr) ||
+				((change_point[i]->addr == change_point[i-1]->addr) &&
+				 (change_point[i]->addr == change_point[i]->pbios->addr) &&
+				 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
+			   )
+			{
+				change_tmp = change_point[i];
+				change_point[i] = change_point[i-1];
+				change_point[i-1] = change_tmp;
+				still_changing=1;
+			}
+		}
+	}
+
+	/* create a new bios memory map, removing overlaps */
+	overlap_entries=0;	 /* number of entries in the overlap table */
+	new_bios_entry=0;	 /* index for creating new bios map entries */
+	last_type = 0;		 /* start with undefined memory type */
+	last_addr = 0;		 /* start with 0 as last starting address */
+	/* loop through change-points, determining affect on the new bios map */
+	for (chgidx=0; chgidx < chg_nr; chgidx++)
+	{
+		/* keep track of all overlapping bios entries */
+		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
+		{
+			/* add map entry to overlap list (> 1 entry implies an overlap) */
+			overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
+		}
+		else
+		{
+			/* remove entry from list (order independent, so swap with last) */
+			for (i=0; i<overlap_entries; i++)
+			{
+				if (overlap_list[i] == change_point[chgidx]->pbios)
+					overlap_list[i] = overlap_list[overlap_entries-1];
+			}
+			overlap_entries--;
+		}
+		/* if there are overlapping entries, decide which "type" to use */
+		/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+		current_type = 0;
+		for (i=0; i<overlap_entries; i++)
+			if (overlap_list[i]->type > current_type)
+				current_type = overlap_list[i]->type;
+		/* continue building up new bios map based on this information */
+		if (current_type != last_type)	{
+			if (last_type != 0)	 {
+				new_bios[new_bios_entry].size =
+					change_point[chgidx]->addr - last_addr;
+				/* move forward only if the new size was non-zero */
+				if (new_bios[new_bios_entry].size != 0)
+					if (++new_bios_entry >= E820MAX)
+						break; 	/* no more space left for new bios entries */
+			}
+			if (current_type != 0)	{
+				new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+				new_bios[new_bios_entry].type = current_type;
+				last_addr=change_point[chgidx]->addr;
+			}
+			last_type = current_type;
+		}
+	}
+	new_nr = new_bios_entry;   /* retain count for new bios entries */
+
+	/* copy new bios mapping into original location */
+	memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+	*pnr_map = new_nr;
+
+	return 0;
+}
+
+/*
+ * Copy the BIOS e820 map into a safe place.
+ *
+ * Sanity-check it while we're at it..
+ *
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory.  If we aren't, we'll fake a memory map.
+ *
+ * We check to see that the memory map contains at least 2 elements
+ * before we'll use it, because the detection code in setup.S may
+ * not be perfect and most every PC known to man has two memory
+ * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
+ * thinkpad 560x, for example, does not cooperate with the memory
+ * detection code.)
+ */
+static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+{
+	/* Only one memory region (or negative)? Ignore it */
+	if (nr_map < 2)
+		return -1;
+
+	do {
+		unsigned long long start = biosmap->addr;
+		unsigned long long size = biosmap->size;
+		unsigned long long end = start + size;
+		unsigned long type = biosmap->type;
+
+		/* Overflow in 64 bits? Ignore the memory map. */
+		if (start > end)
+			return -1;
+
+		/*
+		 * Some BIOSes claim RAM in the 640k - 1M region.
+		 * Not right. Fix it up.
+		 */
+		if (type == E820_RAM) {
+			if (start < 0x100000ULL && end > 0xA0000ULL) {
+				if (start < 0xA0000ULL)
+					add_memory_region(start, 0xA0000ULL-start, type);
+				if (end <= 0x100000ULL)
+					continue;
+				start = 0x100000ULL;
+				size = end - start;
+			}
+		}
+		add_memory_region(start, size, type);
+	} while (biosmap++,--nr_map);
+	return 0;
+}
+
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+struct edd edd;
+#ifdef CONFIG_EDD_MODULE
+EXPORT_SYMBOL(edd);
+#endif
+/**
+ * copy_edd() - Copy the BIOS EDD information
+ *              from boot_params into a safe place.
+ *
+ */
+static inline void copy_edd(void)
+{
+     memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
+     memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
+     edd.mbr_signature_nr = EDD_MBR_SIG_NR;
+     edd.edd_info_nr = EDD_NR;
+}
+#else
+static inline void copy_edd(void)
+{
+}
+#endif
+
+/*
+ * Do NOT EVER look at the BIOS memory size location.
+ * It does not work on many machines.
+ */
+#define LOWMEMSIZE()	(0x9f000)
+
+static void __init parse_cmdline_early (char ** cmdline_p)
+{
+	char c = ' ', *to = command_line, *from = saved_command_line;
+	int len = 0;
+	int userdef = 0;
+
+	/* Save unparsed command line copy for /proc/cmdline */
+	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+
+	for (;;) {
+		if (c != ' ')
+			goto next_char;
+		/*
+		 * "mem=nopentium" disables the 4MB page tables.
+		 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+		 * to <mem>, overriding the bios size.
+		 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+		 * <start> to <start>+<mem>, overriding the bios size.
+		 *
+		 * HPA tells me bootloaders need to parse mem=, so no new
+		 * option should be mem=  [also see Documentation/i386/boot.txt]
+		 */
+		if (!memcmp(from, "mem=", 4)) {
+			if (to != command_line)
+				to--;
+			if (!memcmp(from+4, "nopentium", 9)) {
+				from += 9+4;
+				clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+				disable_pse = 1;
+			} else {
+				/* If the user specifies memory size, we
+				 * limit the BIOS-provided memory map to
+				 * that size. exactmap can be used to specify
+				 * the exact map. mem=number can be used to
+				 * trim the existing memory map.
+				 */
+				unsigned long long mem_size;
+ 
+				mem_size = memparse(from+4, &from);
+				limit_regions(mem_size);
+				userdef=1;
+			}
+		}
+
+		else if (!memcmp(from, "memmap=", 7)) {
+			if (to != command_line)
+				to--;
+			if (!memcmp(from+7, "exactmap", 8)) {
+				from += 8+7;
+				e820.nr_map = 0;
+				userdef = 1;
+			} else {
+				/* If the user specifies memory size, we
+				 * limit the BIOS-provided memory map to
+				 * that size. exactmap can be used to specify
+				 * the exact map. mem=number can be used to
+				 * trim the existing memory map.
+				 */
+				unsigned long long start_at, mem_size;
+ 
+				mem_size = memparse(from+7, &from);
+				if (*from == '@') {
+					start_at = memparse(from+1, &from);
+					add_memory_region(start_at, mem_size, E820_RAM);
+				} else if (*from == '#') {
+					start_at = memparse(from+1, &from);
+					add_memory_region(start_at, mem_size, E820_ACPI);
+				} else if (*from == '$') {
+					start_at = memparse(from+1, &from);
+					add_memory_region(start_at, mem_size, E820_RESERVED);
+				} else {
+					limit_regions(mem_size);
+					userdef=1;
+				}
+			}
+		}
+
+		else if (!memcmp(from, "noexec=", 7))
+			noexec_setup(from + 7);
+
+
+#ifdef  CONFIG_X86_SMP
+		/*
+		 * If the BIOS enumerates physical processors before logical,
+		 * maxcpus=N at enumeration-time can be used to disable HT.
+		 */
+		else if (!memcmp(from, "maxcpus=", 8)) {
+			extern unsigned int maxcpus;
+
+			maxcpus = simple_strtoul(from + 8, NULL, 0);
+		}
+#endif
+
+#ifdef CONFIG_ACPI_BOOT
+		/* "acpi=off" disables both ACPI table parsing and interpreter */
+		else if (!memcmp(from, "acpi=off", 8)) {
+			disable_acpi();
+		}
+
+		/* acpi=force to over-ride black-list */
+		else if (!memcmp(from, "acpi=force", 10)) {
+			acpi_force = 1;
+			acpi_ht = 1;
+			acpi_disabled = 0;
+		}
+
+		/* acpi=strict disables out-of-spec workarounds */
+		else if (!memcmp(from, "acpi=strict", 11)) {
+			acpi_strict = 1;
+		}
+
+		/* Limit ACPI just to boot-time to enable HT */
+		else if (!memcmp(from, "acpi=ht", 7)) {
+			if (!acpi_force)
+				disable_acpi();
+			acpi_ht = 1;
+		}
+		
+		/* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
+		else if (!memcmp(from, "pci=noacpi", 10)) {
+			acpi_disable_pci();
+		}
+		/* "acpi=noirq" disables ACPI interrupt routing */
+		else if (!memcmp(from, "acpi=noirq", 10)) {
+			acpi_noirq_set();
+		}
+
+		else if (!memcmp(from, "acpi_sci=edge", 13))
+			acpi_sci_flags.trigger =  1;
+
+		else if (!memcmp(from, "acpi_sci=level", 14))
+			acpi_sci_flags.trigger = 3;
+
+		else if (!memcmp(from, "acpi_sci=high", 13))
+			acpi_sci_flags.polarity = 1;
+
+		else if (!memcmp(from, "acpi_sci=low", 12))
+			acpi_sci_flags.polarity = 3;
+
+#ifdef CONFIG_X86_IO_APIC
+		else if (!memcmp(from, "acpi_skip_timer_override", 24))
+			acpi_skip_timer_override = 1;
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+		/* disable IO-APIC */
+		else if (!memcmp(from, "noapic", 6))
+			disable_ioapic_setup();
+#endif /* CONFIG_X86_LOCAL_APIC */
+#endif /* CONFIG_ACPI_BOOT */
+
+		/*
+		 * highmem=size forces highmem to be exactly 'size' bytes.
+		 * This works even on boxes that have no highmem otherwise.
+		 * This also works to reduce highmem size on bigger boxes.
+		 */
+		else if (!memcmp(from, "highmem=", 8))
+			highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
+	
+		/*
+		 * vmalloc=size forces the vmalloc area to be exactly 'size'
+		 * bytes. This can be used to increase (or decrease) the
+		 * vmalloc area - the default is 128m.
+		 */
+		else if (!memcmp(from, "vmalloc=", 8))
+			__VMALLOC_RESERVE = memparse(from+8, &from);
+
+	next_char:
+		c = *(from++);
+		if (!c)
+			break;
+		if (COMMAND_LINE_SIZE <= ++len)
+			break;
+		*(to++) = c;
+	}
+	*to = '\0';
+	*cmdline_p = command_line;
+	if (userdef) {
+		printk(KERN_INFO "user-defined physical RAM map:\n");
+		print_memory_map("user");
+	}
+}
+
+/*
+ * Callback for efi_memory_walk.
+ */
+static int __init
+efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long *max_pfn = arg, pfn;
+
+	if (start < end) {
+		pfn = PFN_UP(end -1);
+		if (pfn > *max_pfn)
+			*max_pfn = pfn;
+	}
+	return 0;
+}
+
+
+/*
+ * Find the highest page frame number we have available
+ */
+void __init find_max_pfn(void)
+{
+	int i;
+
+	max_pfn = 0;
+	if (efi_enabled) {
+		efi_memmap_walk(efi_find_max_pfn, &max_pfn);
+		return;
+	}
+
+	for (i = 0; i < e820.nr_map; i++) {
+		unsigned long start, end;
+		/* RAM? */
+		if (e820.map[i].type != E820_RAM)
+			continue;
+		start = PFN_UP(e820.map[i].addr);
+		end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+		if (start >= end)
+			continue;
+		if (end > max_pfn)
+			max_pfn = end;
+	}
+}
+
+/*
+ * Determine low and high memory ranges:
+ */
+unsigned long __init find_max_low_pfn(void)
+{
+	unsigned long max_low_pfn;
+
+	max_low_pfn = max_pfn;
+	if (max_low_pfn > MAXMEM_PFN) {
+		if (highmem_pages == -1)
+			highmem_pages = max_pfn - MAXMEM_PFN;
+		if (highmem_pages + MAXMEM_PFN < max_pfn)
+			max_pfn = MAXMEM_PFN + highmem_pages;
+		if (highmem_pages + MAXMEM_PFN > max_pfn) {
+			printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
+			highmem_pages = 0;
+		}
+		max_low_pfn = MAXMEM_PFN;
+#ifndef CONFIG_HIGHMEM
+		/* Maximum memory usable is what is directly addressable */
+		printk(KERN_WARNING "Warning only %ldMB will be used.\n",
+					MAXMEM>>20);
+		if (max_pfn > MAX_NONPAE_PFN)
+			printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+		else
+			printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+		max_pfn = MAXMEM_PFN;
+#else /* !CONFIG_HIGHMEM */
+#ifndef CONFIG_X86_PAE
+		if (max_pfn > MAX_NONPAE_PFN) {
+			max_pfn = MAX_NONPAE_PFN;
+			printk(KERN_WARNING "Warning only 4GB will be used.\n");
+			printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+		}
+#endif /* !CONFIG_X86_PAE */
+#endif /* !CONFIG_HIGHMEM */
+	} else {
+		if (highmem_pages == -1)
+			highmem_pages = 0;
+#ifdef CONFIG_HIGHMEM
+		if (highmem_pages >= max_pfn) {
+			printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
+			highmem_pages = 0;
+		}
+		if (highmem_pages) {
+			if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
+				printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
+				highmem_pages = 0;
+			}
+			max_low_pfn -= highmem_pages;
+		}
+#else
+		if (highmem_pages)
+			printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
+#endif
+	}
+	return max_low_pfn;
+}
+
+/*
+ * Free all available memory for boot time allocation.  Used
+ * as a callback function by efi_memory_walk()
+ */
+
+static int __init
+free_available_memory(unsigned long start, unsigned long end, void *arg)
+{
+	/* check max_low_pfn */
+	if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
+		return 0;
+	if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
+		end = (max_low_pfn + 1) << PAGE_SHIFT;
+	if (start < end)
+		free_bootmem(start, end - start);
+
+	return 0;
+}
+/*
+ * Register fully available low RAM pages with the bootmem allocator.
+ */
+static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
+{
+	int i;
+
+	if (efi_enabled) {
+		efi_memmap_walk(free_available_memory, NULL);
+		return;
+	}
+	for (i = 0; i < e820.nr_map; i++) {
+		unsigned long curr_pfn, last_pfn, size;
+		/*
+		 * Reserve usable low memory
+		 */
+		if (e820.map[i].type != E820_RAM)
+			continue;
+		/*
+		 * We are rounding up the start address of usable memory:
+		 */
+		curr_pfn = PFN_UP(e820.map[i].addr);
+		if (curr_pfn >= max_low_pfn)
+			continue;
+		/*
+		 * ... and at the end of the usable range downwards:
+		 */
+		last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+
+		if (last_pfn > max_low_pfn)
+			last_pfn = max_low_pfn;
+
+		/*
+		 * .. finally, did all the rounding and playing
+		 * around just make the area go away?
+		 */
+		if (last_pfn <= curr_pfn)
+			continue;
+
+		size = last_pfn - curr_pfn;
+		free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
+	}
+}
+
+/*
+ * workaround for Dell systems that neglect to reserve EBDA
+ */
+static void __init reserve_ebda_region(void)
+{
+	unsigned int addr;
+	addr = get_bios_ebda();
+	if (addr)
+		reserve_bootmem(addr, PAGE_SIZE);	
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+void __init setup_bootmem_allocator(void);
+static unsigned long __init setup_memory(void)
+{
+	/*
+	 * partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	min_low_pfn = PFN_UP(init_pg_tables_end);
+
+	find_max_pfn();
+
+	max_low_pfn = find_max_low_pfn();
+
+#ifdef CONFIG_HIGHMEM
+	highstart_pfn = highend_pfn = max_pfn;
+	if (max_pfn > max_low_pfn) {
+		highstart_pfn = max_low_pfn;
+	}
+	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+		pages_to_mb(highend_pfn - highstart_pfn));
+#endif
+	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
+			pages_to_mb(max_low_pfn));
+
+	setup_bootmem_allocator();
+
+	return max_low_pfn;
+}
+
+void __init zone_sizes_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned int max_dma, low;
+
+	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	low = max_low_pfn;
+
+	if (low < max_dma)
+		zones_size[ZONE_DMA] = low;
+	else {
+		zones_size[ZONE_DMA] = max_dma;
+		zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+		zones_size[ZONE_HIGHMEM] = highend_pfn - low;
+#endif
+	}
+	free_area_init(zones_size);
+}
+#else
+extern unsigned long setup_memory(void);
+extern void zone_sizes_init(void);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+void __init setup_bootmem_allocator(void)
+{
+	unsigned long bootmap_size;
+	/*
+	 * Initialize the boot-time allocator (with low memory only):
+	 */
+	bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
+
+	register_bootmem_low_pages(max_low_pfn);
+
+	/*
+	 * Reserve the bootmem bitmap itself as well. We do this in two
+	 * steps (first step was init_bootmem()) because this catches
+	 * the (very unlikely) case of us accidentally initializing the
+	 * bootmem allocator with an invalid RAM area.
+	 */
+	reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
+			 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+
+	/*
+	 * reserve physical page 0 - it's a special BIOS page on many boxes,
+	 * enabling clean reboots, SMP operation, laptop functions.
+	 */
+	reserve_bootmem(0, PAGE_SIZE);
+
+	/* reserve EBDA region, it's a 4K region */
+	reserve_ebda_region();
+
+    /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
+       PCI prefetch into it (errata #56). Usually the page is reserved anyways,
+       unless you have no PS/2 mouse plugged in. */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+	    boot_cpu_data.x86 == 6)
+	     reserve_bootmem(0xa0000 - 4096, 4096);
+
+#ifdef CONFIG_SMP
+	/*
+	 * But first pinch a few for the stack/trampoline stuff
+	 * FIXME: Don't need the extra page at 4K, but need to fix
+	 * trampoline before removing it. (see the GDT stuff)
+	 */
+	reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
+#endif
+#ifdef CONFIG_ACPI_SLEEP
+	/*
+	 * Reserve low memory region for sleep support.
+	 */
+	acpi_reserve_bootmem();
+#endif
+#ifdef CONFIG_X86_FIND_SMP_CONFIG
+	/*
+	 * Find and reserve possible boot-time SMP configuration:
+	 */
+	find_smp_config();
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (LOADER_TYPE && INITRD_START) {
+		if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
+			reserve_bootmem(INITRD_START, INITRD_SIZE);
+			initrd_start =
+				INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+			initrd_end = initrd_start+INITRD_SIZE;
+		}
+		else {
+			printk(KERN_ERR "initrd extends beyond end of memory "
+			    "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+			    INITRD_START + INITRD_SIZE,
+			    max_low_pfn << PAGE_SHIFT);
+			initrd_start = 0;
+		}
+	}
+#endif
+}
+
+/*
+ * The node 0 pgdat is initialized before all of these because
+ * it's needed for bootmem.  node>0 pgdats have their virtual
+ * space allocated before the pagetables are in place to access
+ * them, so they can't be cleared then.
+ *
+ * This should all compile down to nothing when NUMA is off.
+ */
+void __init remapped_pgdat_init(void)
+{
+	int nid;
+
+	for_each_online_node(nid) {
+		if (nid != 0)
+			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+	}
+}
+
+/*
+ * Request address space for all standard RAM and ROM resources
+ * and also for regions reported as reserved by the e820.
+ */
+static void __init
+legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
+{
+	int i;
+
+	probe_roms();
+	for (i = 0; i < e820.nr_map; i++) {
+		struct resource *res;
+		if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
+			continue;
+		res = alloc_bootmem_low(sizeof(struct resource));
+		switch (e820.map[i].type) {
+		case E820_RAM:	res->name = "System RAM"; break;
+		case E820_ACPI:	res->name = "ACPI Tables"; break;
+		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
+		default:	res->name = "reserved";
+		}
+		res->start = e820.map[i].addr;
+		res->end = res->start + e820.map[i].size - 1;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		request_resource(&iomem_resource, res);
+		if (e820.map[i].type == E820_RAM) {
+			/*
+			 *  We don't know which RAM region contains kernel data,
+			 *  so we try it repeatedly and let the resource manager
+			 *  test it.
+			 */
+			request_resource(res, code_resource);
+			request_resource(res, data_resource);
+		}
+	}
+}
+
+/*
+ * Request address space for all standard resources
+ */
+static void __init register_memory(void)
+{
+	unsigned long gapstart, gapsize;
+	unsigned long long last;
+	int	      i;
+
+	if (efi_enabled)
+		efi_initialize_iomem_resources(&code_resource, &data_resource);
+	else
+		legacy_init_iomem_resources(&code_resource, &data_resource);
+
+	/* EFI systems may still have VGA */
+	request_resource(&iomem_resource, &video_ram_resource);
+
+	/* request I/O space for devices used on all i[345]86 PCs */
+	for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+		request_resource(&ioport_resource, &standard_io_resources[i]);
+
+	/*
+	 * Search for the bigest gap in the low 32 bits of the e820
+	 * memory space.
+	 */
+	last = 0x100000000ull;
+	gapstart = 0x10000000;
+	gapsize = 0x400000;
+	i = e820.nr_map;
+	while (--i >= 0) {
+		unsigned long long start = e820.map[i].addr;
+		unsigned long long end = start + e820.map[i].size;
+
+		/*
+		 * Since "last" is at most 4GB, we know we'll
+		 * fit in 32 bits if this condition is true
+		 */
+		if (last > end) {
+			unsigned long gap = last - end;
+
+			if (gap > gapsize) {
+				gapsize = gap;
+				gapstart = end;
+			}
+		}
+		if (start < last)
+			last = start;
+	}
+
+	/*
+	 * Start allocating dynamic PCI memory a bit into the gap,
+	 * aligned up to the nearest megabyte.
+	 *
+	 * Question: should we try to pad it up a bit (do something
+	 * like " + (gapsize >> 3)" in there too?). We now have the
+	 * technology.
+	 */
+	pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
+
+	printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
+		pci_mem_start, gapstart, gapsize);
+}
+
+/* Use inline assembly to define this because the nops are defined 
+   as inline assembly strings in the include files and we cannot 
+   get them easily into strings. */
+asm("\t.data\nintelnops: " 
+    GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
+    GENERIC_NOP7 GENERIC_NOP8); 
+asm("\t.data\nk8nops: " 
+    K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+    K8_NOP7 K8_NOP8); 
+asm("\t.data\nk7nops: " 
+    K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+    K7_NOP7 K7_NOP8); 
+    
+extern unsigned char intelnops[], k8nops[], k7nops[];
+static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
+     NULL,
+     intelnops,
+     intelnops + 1,
+     intelnops + 1 + 2,
+     intelnops + 1 + 2 + 3,
+     intelnops + 1 + 2 + 3 + 4,
+     intelnops + 1 + 2 + 3 + 4 + 5,
+     intelnops + 1 + 2 + 3 + 4 + 5 + 6,
+     intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+}; 
+static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
+     NULL,
+     k8nops,
+     k8nops + 1,
+     k8nops + 1 + 2,
+     k8nops + 1 + 2 + 3,
+     k8nops + 1 + 2 + 3 + 4,
+     k8nops + 1 + 2 + 3 + 4 + 5,
+     k8nops + 1 + 2 + 3 + 4 + 5 + 6,
+     k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+}; 
+static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
+     NULL,
+     k7nops,
+     k7nops + 1,
+     k7nops + 1 + 2,
+     k7nops + 1 + 2 + 3,
+     k7nops + 1 + 2 + 3 + 4,
+     k7nops + 1 + 2 + 3 + 4 + 5,
+     k7nops + 1 + 2 + 3 + 4 + 5 + 6,
+     k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+}; 
+static struct nop { 
+     int cpuid; 
+     unsigned char **noptable; 
+} noptypes[] = { 
+     { X86_FEATURE_K8, k8_nops }, 
+     { X86_FEATURE_K7, k7_nops }, 
+     { -1, NULL }
+}; 
+
+/* Replace instructions with better alternatives for this CPU type.
+
+   This runs before SMP is initialized to avoid SMP problems with
+   self modifying code. This implies that assymetric systems where
+   APs have less capabilities than the boot processor are not handled. 
+   In this case boot with "noreplacement". */ 
+void apply_alternatives(void *start, void *end) 
+{ 
+	struct alt_instr *a; 
+	int diff, i, k;
+        unsigned char **noptable = intel_nops; 
+	for (i = 0; noptypes[i].cpuid >= 0; i++) { 
+		if (boot_cpu_has(noptypes[i].cpuid)) { 
+			noptable = noptypes[i].noptable;
+			break;
+		}
+	} 
+	for (a = start; (void *)a < end; a++) { 
+		if (!boot_cpu_has(a->cpuid))
+			continue;
+		BUG_ON(a->replacementlen > a->instrlen); 
+		memcpy(a->instr, a->replacement, a->replacementlen); 
+		diff = a->instrlen - a->replacementlen; 
+		/* Pad the rest with nops */
+		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
+			k = diff;
+			if (k > ASM_NOP_MAX)
+				k = ASM_NOP_MAX;
+			memcpy(a->instr + i, noptable[k], k); 
+		} 
+	}
+} 
+
+static int no_replacement __initdata = 0; 
+ 
+void __init alternative_instructions(void)
+{
+	extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+	if (no_replacement) 
+		return;
+	apply_alternatives(__alt_instructions, __alt_instructions_end);
+}
+
+static int __init noreplacement_setup(char *s)
+{ 
+     no_replacement = 1; 
+     return 0; 
+} 
+
+__setup("noreplacement", noreplacement_setup); 
+
+static char * __init machine_specific_memory_setup(void);
+
+#ifdef CONFIG_MCA
+static void set_mca_bus(int x)
+{
+	MCA_bus = x;
+}
+#else
+static void set_mca_bus(int x) { }
+#endif
+
+/*
+ * Determine if we were loaded by an EFI loader.  If so, then we have also been
+ * passed the efi memmap, systab, etc., so we should use these data structures
+ * for initialization.  Note, the efi init code path is determined by the
+ * global efi_enabled. This allows the same kernel image to be used on existing
+ * systems (with a traditional BIOS) as well as on EFI systems.
+ */
+void __init setup_arch(char **cmdline_p)
+{
+	unsigned long max_low_pfn;
+
+	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+	pre_setup_arch_hook();
+	early_cpu_init();
+
+	/*
+	 * FIXME: This isn't an official loader_type right
+	 * now but does currently work with elilo.
+	 * If we were configured as an EFI kernel, check to make
+	 * sure that we were loaded correctly from elilo and that
+	 * the system table is valid.  If not, then initialize normally.
+	 */
+#ifdef CONFIG_EFI
+	if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
+		efi_enabled = 1;
+#endif
+
+ 	ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+ 	drive_info = DRIVE_INFO;
+ 	screen_info = SCREEN_INFO;
+	edid_info = EDID_INFO;
+	apm_info.bios = APM_BIOS_INFO;
+	ist_info = IST_INFO;
+	saved_videomode = VIDEO_MODE;
+	if( SYS_DESC_TABLE.length != 0 ) {
+		set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
+		machine_id = SYS_DESC_TABLE.table[0];
+		machine_submodel_id = SYS_DESC_TABLE.table[1];
+		BIOS_revision = SYS_DESC_TABLE.table[2];
+	}
+	bootloader_type = LOADER_TYPE;
+
+#ifdef CONFIG_BLK_DEV_RAM
+	rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+	rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+	rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+	ARCH_SETUP
+	if (efi_enabled)
+		efi_init();
+	else {
+		printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+		print_memory_map(machine_specific_memory_setup());
+	}
+
+	copy_edd();
+
+	if (!MOUNT_ROOT_RDONLY)
+		root_mountflags &= ~MS_RDONLY;
+	init_mm.start_code = (unsigned long) _text;
+	init_mm.end_code = (unsigned long) _etext;
+	init_mm.end_data = (unsigned long) _edata;
+	init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
+
+	code_resource.start = virt_to_phys(_text);
+	code_resource.end = virt_to_phys(_etext)-1;
+	data_resource.start = virt_to_phys(_etext);
+	data_resource.end = virt_to_phys(_edata)-1;
+
+	parse_cmdline_early(cmdline_p);
+
+	max_low_pfn = setup_memory();
+
+	/*
+	 * NOTE: before this point _nobody_ is allowed to allocate
+	 * any memory using the bootmem allocator.  Although the
+	 * alloctor is now initialised only the first 8Mb of the kernel
+	 * virtual address space has been mapped.  All allocations before
+	 * paging_init() has completed must use the alloc_bootmem_low_pages()
+	 * variant (which allocates DMA'able memory) and care must be taken
+	 * not to exceed the 8Mb limit.
+	 */
+
+#ifdef CONFIG_SMP
+	smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
+#endif
+	paging_init();
+	remapped_pgdat_init();
+	zone_sizes_init();
+
+	/*
+	 * NOTE: at this point the bootmem allocator is fully available.
+	 */
+
+#ifdef CONFIG_EARLY_PRINTK
+	{
+		char *s = strstr(*cmdline_p, "earlyprintk=");
+		if (s) {
+			extern void setup_early_printk(char *);
+
+			setup_early_printk(s);
+			printk("early console enabled\n");
+		}
+	}
+#endif
+
+
+	dmi_scan_machine();
+
+#ifdef CONFIG_X86_GENERICARCH
+	generic_apic_probe(*cmdline_p);
+#endif	
+	if (efi_enabled)
+		efi_map_memmap();
+
+	/*
+	 * Parse the ACPI tables for possible boot-time SMP configuration.
+	 */
+	acpi_boot_table_init();
+	acpi_boot_init();
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (smp_found_config)
+		get_smp_config();
+#endif
+
+	register_memory();
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+		conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+}
+
+#include "setup_arch_post.h"
+/*
+ * Local Variables:
+ * mode:c
+ * c-file-style:"k&r"
+ * c-basic-offset:8
+ * End:
+ */
diff --git a/arch/i386/kernel/sigframe.h b/arch/i386/kernel/sigframe.h
new file mode 100644
index 00000000000..d21b14f5c25
--- /dev/null
+++ b/arch/i386/kernel/sigframe.h
@@ -0,0 +1,21 @@
+struct sigframe
+{
+	char *pretcode;
+	int sig;
+	struct sigcontext sc;
+	struct _fpstate fpstate;
+	unsigned long extramask[_NSIG_WORDS-1];
+	char retcode[8];
+};
+
+struct rt_sigframe
+{
+	char *pretcode;
+	int sig;
+	struct siginfo *pinfo;
+	void *puc;
+	struct siginfo info;
+	struct ucontext uc;
+	struct _fpstate fpstate;
+	char retcode[8];
+};
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
new file mode 100644
index 00000000000..ef3602e1c05
--- /dev/null
+++ b/arch/i386/kernel/signal.c
@@ -0,0 +1,665 @@
+/*
+ *  linux/arch/i386/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <asm/processor.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+#include "sigframe.h"
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int
+sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+	struct pt_regs * regs = (struct pt_regs *) &history0;
+	sigset_t saveset;
+
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sighand->siglock);
+	saveset = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	regs->eax = -EINTR;
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (do_signal(regs, &saveset))
+			return -EINTR;
+	}
+}
+
+asmlinkage int
+sys_rt_sigsuspend(struct pt_regs regs)
+{
+	sigset_t saveset, newset;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (regs.ecx != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (copy_from_user(&newset, (sigset_t __user *)regs.ebx, sizeof(newset)))
+		return -EFAULT;
+	sigdelsetmask(&newset, ~_BLOCKABLE);
+
+	spin_lock_irq(&current->sighand->siglock);
+	saveset = current->blocked;
+	current->blocked = newset;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	regs.eax = -EINTR;
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (do_signal(&regs, &saveset))
+			return -EINTR;
+	}
+}
+
+asmlinkage int 
+sys_sigaction(int sig, const struct old_sigaction __user *act,
+	      struct old_sigaction __user *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
+
+asmlinkage int
+sys_sigaltstack(unsigned long ebx)
+{
+	/* This is needed to make gcc realize it doesn't own the "struct pt_regs" */
+	struct pt_regs *regs = (struct pt_regs *)&ebx;
+	const stack_t __user *uss = (const stack_t __user *)ebx;
+	stack_t __user *uoss = (stack_t __user *)regs->ecx;
+
+	return do_sigaltstack(uss, uoss, regs->esp);
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
+{
+	unsigned int err = 0;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+
+#define COPY_SEG(seg)							\
+	{ unsigned short tmp;						\
+	  err |= __get_user(tmp, &sc->seg);				\
+	  regs->x##seg = tmp; }
+
+#define COPY_SEG_STRICT(seg)						\
+	{ unsigned short tmp;						\
+	  err |= __get_user(tmp, &sc->seg);				\
+	  regs->x##seg = tmp|3; }
+
+#define GET_SEG(seg)							\
+	{ unsigned short tmp;						\
+	  err |= __get_user(tmp, &sc->seg);				\
+	  loadsegment(seg,tmp); }
+
+#define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
+			 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
+			 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
+
+	GET_SEG(gs);
+	GET_SEG(fs);
+	COPY_SEG(es);
+	COPY_SEG(ds);
+	COPY(edi);
+	COPY(esi);
+	COPY(ebp);
+	COPY(esp);
+	COPY(ebx);
+	COPY(edx);
+	COPY(ecx);
+	COPY(eip);
+	COPY_SEG_STRICT(cs);
+	COPY_SEG_STRICT(ss);
+	
+	{
+		unsigned int tmpflags;
+		err |= __get_user(tmpflags, &sc->eflags);
+		regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+		regs->orig_eax = -1;		/* disable syscall checks */
+	}
+
+	{
+		struct _fpstate __user * buf;
+		err |= __get_user(buf, &sc->fpstate);
+		if (buf) {
+			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
+				goto badframe;
+			err |= restore_i387(buf);
+		} else {
+			struct task_struct *me = current;
+			if (used_math()) {
+				clear_fpu(me);
+				clear_used_math();
+			}
+		}
+	}
+
+	err |= __get_user(*peax, &sc->eax);
+	return err;
+
+badframe:
+	return 1;
+}
+
+asmlinkage int sys_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *) &__unused;
+	struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8);
+	sigset_t set;
+	int eax;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__get_user(set.sig[0], &frame->sc.oldmask)
+	    || (_NSIG_WORDS > 1
+		&& __copy_from_user(&set.sig[1], &frame->extramask,
+				    sizeof(frame->extramask))))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+	
+	if (restore_sigcontext(regs, &frame->sc, &eax))
+		goto badframe;
+	return eax;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}	
+
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *) &__unused;
+	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4);
+	sigset_t set;
+	int eax;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+	
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+		goto badframe;
+
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT)
+		goto badframe;
+
+	return eax;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}	
+
+/*
+ * Set up a signal frame.
+ */
+
+static int
+setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
+		 struct pt_regs *regs, unsigned long mask)
+{
+	int tmp, err = 0;
+
+	tmp = 0;
+	__asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+	__asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
+
+	err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
+	err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
+	err |= __put_user(regs->edi, &sc->edi);
+	err |= __put_user(regs->esi, &sc->esi);
+	err |= __put_user(regs->ebp, &sc->ebp);
+	err |= __put_user(regs->esp, &sc->esp);
+	err |= __put_user(regs->ebx, &sc->ebx);
+	err |= __put_user(regs->edx, &sc->edx);
+	err |= __put_user(regs->ecx, &sc->ecx);
+	err |= __put_user(regs->eax, &sc->eax);
+	err |= __put_user(current->thread.trap_no, &sc->trapno);
+	err |= __put_user(current->thread.error_code, &sc->err);
+	err |= __put_user(regs->eip, &sc->eip);
+	err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
+	err |= __put_user(regs->eflags, &sc->eflags);
+	err |= __put_user(regs->esp, &sc->esp_at_signal);
+	err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
+
+	tmp = save_i387(fpstate);
+	if (tmp < 0)
+	  err = 1;
+	else
+	  err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+
+	/* non-iBCS2 extensions.. */
+	err |= __put_user(mask, &sc->oldmask);
+	err |= __put_user(current->thread.cr2, &sc->cr2);
+
+	return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+	unsigned long esp;
+
+	/* Default to using normal stack */
+	esp = regs->esp;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(esp) == 0)
+			esp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	/* This is the legacy signal stack switching. */
+	else if ((regs->xss & 0xffff) != __USER_DS &&
+		 !(ka->sa.sa_flags & SA_RESTORER) &&
+		 ka->sa.sa_restorer) {
+		esp = (unsigned long) ka->sa.sa_restorer;
+	}
+
+	return (void __user *)((esp - frame_size) & -8ul);
+}
+
+/* These symbols are defined with the addresses in the vsyscall page.
+   See vsyscall-sigreturn.S.  */
+extern void __user __kernel_sigreturn;
+extern void __user __kernel_rt_sigreturn;
+
+static void setup_frame(int sig, struct k_sigaction *ka,
+			sigset_t *set, struct pt_regs * regs)
+{
+	void __user *restorer;
+	struct sigframe __user *frame;
+	int err = 0;
+	int usig;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	usig = current_thread_info()->exec_domain
+		&& current_thread_info()->exec_domain->signal_invmap
+		&& sig < 32
+		? current_thread_info()->exec_domain->signal_invmap[sig]
+		: sig;
+
+	err = __put_user(usig, &frame->sig);
+	if (err)
+		goto give_sigsegv;
+
+	err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+	if (err)
+		goto give_sigsegv;
+
+	if (_NSIG_WORDS > 1) {
+		err = __copy_to_user(&frame->extramask, &set->sig[1],
+				      sizeof(frame->extramask));
+		if (err)
+			goto give_sigsegv;
+	}
+
+	restorer = &__kernel_sigreturn;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+
+	/* Set up to return from userspace.  */
+	err |= __put_user(restorer, &frame->pretcode);
+	 
+	/*
+	 * This is popl %eax ; movl $,%eax ; int $0x80
+	 *
+	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+	 * reasons and because gdb uses it as a signature to notice
+	 * signal handler stack frames.
+	 */
+	err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
+	err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
+	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
+
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up registers for signal handler */
+	regs->esp = (unsigned long) frame;
+	regs->eip = (unsigned long) ka->sa.sa_handler;
+	regs->eax = (unsigned long) sig;
+	regs->edx = (unsigned long) 0;
+	regs->ecx = (unsigned long) 0;
+
+	set_fs(USER_DS);
+	regs->xds = __USER_DS;
+	regs->xes = __USER_DS;
+	regs->xss = __USER_DS;
+	regs->xcs = __USER_CS;
+
+	/*
+	 * Clear TF when entering the signal handler, but
+	 * notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	regs->eflags &= ~TF_MASK;
+	if (test_thread_flag(TIF_SINGLESTEP))
+		ptrace_notify(SIGTRAP);
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+		current->comm, current->pid, frame, regs->eip, frame->pretcode);
+#endif
+
+	return;
+
+give_sigsegv:
+	force_sigsegv(sig, current);
+}
+
+static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			   sigset_t *set, struct pt_regs * regs)
+{
+	void __user *restorer;
+	struct rt_sigframe __user *frame;
+	int err = 0;
+	int usig;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	usig = current_thread_info()->exec_domain
+		&& current_thread_info()->exec_domain->signal_invmap
+		&& sig < 32
+		? current_thread_info()->exec_domain->signal_invmap[sig]
+		: sig;
+
+	err |= __put_user(usig, &frame->sig);
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, info);
+	if (err)
+		goto give_sigsegv;
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->esp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+			        regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up to return from userspace.  */
+	restorer = &__kernel_rt_sigreturn;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+	err |= __put_user(restorer, &frame->pretcode);
+	 
+	/*
+	 * This is movl $,%eax ; int $0x80
+	 *
+	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+	 * reasons and because gdb uses it as a signature to notice
+	 * signal handler stack frames.
+	 */
+	err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
+	err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
+	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
+
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up registers for signal handler */
+	regs->esp = (unsigned long) frame;
+	regs->eip = (unsigned long) ka->sa.sa_handler;
+	regs->eax = (unsigned long) usig;
+	regs->edx = (unsigned long) &frame->info;
+	regs->ecx = (unsigned long) &frame->uc;
+
+	set_fs(USER_DS);
+	regs->xds = __USER_DS;
+	regs->xes = __USER_DS;
+	regs->xss = __USER_DS;
+	regs->xcs = __USER_CS;
+
+	/*
+	 * Clear TF when entering the signal handler, but
+	 * notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	regs->eflags &= ~TF_MASK;
+	if (test_thread_flag(TIF_SINGLESTEP))
+		ptrace_notify(SIGTRAP);
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+		current->comm, current->pid, frame, regs->eip, frame->pretcode);
+#endif
+
+	return;
+
+give_sigsegv:
+	force_sigsegv(sig, current);
+}
+
+/*
+ * OK, we're invoking a handler
+ */	
+
+static void
+handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
+	      sigset_t *oldset,	struct pt_regs * regs)
+{
+	/* Are we from a system call? */
+	if (regs->orig_eax >= 0) {
+		/* If so, check system call restarting.. */
+		switch (regs->eax) {
+		        case -ERESTART_RESTARTBLOCK:
+			case -ERESTARTNOHAND:
+				regs->eax = -EINTR;
+				break;
+
+			case -ERESTARTSYS:
+				if (!(ka->sa.sa_flags & SA_RESTART)) {
+					regs->eax = -EINTR;
+					break;
+				}
+			/* fallthrough */
+			case -ERESTARTNOINTR:
+				regs->eax = regs->orig_eax;
+				regs->eip -= 2;
+		}
+	}
+
+	/*
+	 * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
+	 * that register information in the sigcontext is correct.
+	 */
+	if (unlikely(regs->eflags & TF_MASK)
+	    && likely(current->ptrace & PT_DTRACE)) {
+		current->ptrace &= ~PT_DTRACE;
+		regs->eflags &= ~TF_MASK;
+	}
+
+	/* Set up the stack frame */
+	if (ka->sa.sa_flags & SA_SIGINFO)
+		setup_rt_frame(sig, ka, info, oldset, regs);
+	else
+		setup_frame(sig, ka, oldset, regs);
+
+	if (!(ka->sa.sa_flags & SA_NODEFER)) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+		sigaddset(&current->blocked,sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
+{
+	siginfo_t info;
+	int signr;
+	struct k_sigaction ka;
+
+	/*
+	 * We want the common case to go fast, which
+	 * is why we may in certain cases get here from
+	 * kernel mode. Just return without doing anything
+	 * if so.
+	 */
+	if ((regs->xcs & 3) != 3)
+		return 1;
+
+	if (current->flags & PF_FREEZE) {
+		refrigerator(0);
+		goto no_signal;
+	}
+
+	if (!oldset)
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+	if (signr > 0) {
+		/* Reenable any watchpoints before delivering the
+		 * signal to user space. The processor register will
+		 * have been cleared if the watchpoint triggered
+		 * inside the kernel.
+		 */
+		if (unlikely(current->thread.debugreg[7])) {
+			__asm__("movl %0,%%db7"	: : "r" (current->thread.debugreg[7]));
+		}
+
+		/* Whee!  Actually deliver the signal.  */
+		handle_signal(signr, &info, &ka, oldset, regs);
+		return 1;
+	}
+
+ no_signal:
+	/* Did we come from a system call? */
+	if (regs->orig_eax >= 0) {
+		/* Restart the system call - no handlers present */
+		if (regs->eax == -ERESTARTNOHAND ||
+		    regs->eax == -ERESTARTSYS ||
+		    regs->eax == -ERESTARTNOINTR) {
+			regs->eax = regs->orig_eax;
+			regs->eip -= 2;
+		}
+		if (regs->eax == -ERESTART_RESTARTBLOCK){
+			regs->eax = __NR_restart_syscall;
+			regs->eip -= 2;
+		}
+	}
+	return 0;
+}
+
+/*
+ * notification of userspace execution resumption
+ * - triggered by current->work.notify_resume
+ */
+__attribute__((regparm(3)))
+void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
+		      __u32 thread_info_flags)
+{
+	/* Pending single-step? */
+	if (thread_info_flags & _TIF_SINGLESTEP) {
+		regs->eflags |= TF_MASK;
+		clear_thread_flag(TIF_SINGLESTEP);
+	}
+	/* deal with pending signal delivery */
+	if (thread_info_flags & _TIF_SIGPENDING)
+		do_signal(regs,oldset);
+	
+	clear_thread_flag(TIF_IRET);
+}
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
new file mode 100644
index 00000000000..6223c33ac91
--- /dev/null
+++ b/arch/i386/kernel/smp.c
@@ -0,0 +1,612 @@
+/*
+ *	Intel SMP support routines.
+ *
+ *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	This code is released under the GNU General Public License version 2 or
+ *	later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#include <mach_apic.h>
+
+/*
+ *	Some notes on x86 processor bugs affecting SMP operation:
+ *
+ *	Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ *	The Linux implications for SMP are handled as follows:
+ *
+ *	Pentium III / [Xeon]
+ *		None of the E1AP-E3AP errata are visible to the user.
+ *
+ *	E1AP.	see PII A1AP
+ *	E2AP.	see PII A2AP
+ *	E3AP.	see PII A3AP
+ *
+ *	Pentium II / [Xeon]
+ *		None of the A1AP-A3AP errata are visible to the user.
+ *
+ *	A1AP.	see PPro 1AP
+ *	A2AP.	see PPro 2AP
+ *	A3AP.	see PPro 7AP
+ *
+ *	Pentium Pro
+ *		None of 1AP-9AP errata are visible to the normal user,
+ *	except occasional delivery of 'spurious interrupt' as trap #15.
+ *	This is very rare and a non-problem.
+ *
+ *	1AP.	Linux maps APIC as non-cacheable
+ *	2AP.	worked around in hardware
+ *	3AP.	fixed in C0 and above steppings microcode update.
+ *		Linux does not use excessive STARTUP_IPIs.
+ *	4AP.	worked around in hardware
+ *	5AP.	symmetric IO mode (normal Linux operation) not affected.
+ *		'noapic' mode has vector 0xf filled out properly.
+ *	6AP.	'noapic' mode might be affected - fixed in later steppings
+ *	7AP.	We do not assume writes to the LVT deassering IRQs
+ *	8AP.	We do not enable low power mode (deep sleep) during MP bootup
+ *	9AP.	We do not use mixed mode
+ *
+ *	Pentium
+ *		There is a marginal case where REP MOVS on 100MHz SMP
+ *	machines with B stepping processors can fail. XXX should provide
+ *	an L1cache=Writethrough or L1cache=off option.
+ *
+ *		B stepping CPUs may hang. There are hardware work arounds
+ *	for this. We warn about it in case your board doesn't have the work
+ *	arounds. Basically thats so I can tell anyone with a B stepping
+ *	CPU and SMP problems "tough".
+ *
+ *	Specific items [From Pentium Processor Specification Update]
+ *
+ *	1AP.	Linux doesn't use remote read
+ *	2AP.	Linux doesn't trust APIC errors
+ *	3AP.	We work around this
+ *	4AP.	Linux never generated 3 interrupts of the same priority
+ *		to cause a lost local interrupt.
+ *	5AP.	Remote read is never used
+ *	6AP.	not affected - worked around in hardware
+ *	7AP.	not affected - worked around in hardware
+ *	8AP.	worked around in hardware - we get explicit CS errors if not
+ *	9AP.	only 'noapic' mode affected. Might generate spurious
+ *		interrupts, we log only the first one and count the
+ *		rest silently.
+ *	10AP.	not affected - worked around in hardware
+ *	11AP.	Linux reads the APIC between writes to avoid this, as per
+ *		the documentation. Make sure you preserve this as it affects
+ *		the C stepping chips too.
+ *	12AP.	not affected - worked around in hardware
+ *	13AP.	not affected - worked around in hardware
+ *	14AP.	we always deassert INIT during bootup
+ *	15AP.	not affected - worked around in hardware
+ *	16AP.	not affected - worked around in hardware
+ *	17AP.	not affected - worked around in hardware
+ *	18AP.	not affected - worked around in hardware
+ *	19AP.	not affected - worked around in BIOS
+ *
+ *	If this sounds worrying believe me these bugs are either ___RARE___,
+ *	or are signal timing bugs worked around in hardware and there's
+ *	about nothing of note with C stepping upwards.
+ */
+
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+	return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+	return SET_APIC_DEST_FIELD(mask);
+}
+
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+	/*
+	 * Subtle. In the case of the 'never do double writes' workaround
+	 * we have to lock out interrupts to be safe.  As we don't care
+	 * of the value read we use an atomic rmw access to avoid costly
+	 * cli/sti.  Otherwise we use an even cheaper single atomic write
+	 * to the APIC.
+	 */
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+
+	/*
+	 * No need to touch the target chip field
+	 */
+	cfg = __prepare_ICR(shortcut, vector);
+
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write_around(APIC_ICR, cfg);
+}
+
+void fastcall send_IPI_self(int vector)
+{
+	__send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+{
+	unsigned long mask = cpus_addr(cpumask)[0];
+	unsigned long cfg;
+	unsigned long flags;
+
+	local_irq_save(flags);
+		
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+		
+	/*
+	 * prepare target chip field
+	 */
+	cfg = __prepare_ICR2(mask);
+	apic_write_around(APIC_ICR2, cfg);
+		
+	/*
+	 * program the ICR 
+	 */
+	cfg = __prepare_ICR(0, vector);
+			
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write_around(APIC_ICR, cfg);
+
+	local_irq_restore(flags);
+}
+
+void send_IPI_mask_sequence(cpumask_t mask, int vector)
+{
+	unsigned long cfg, flags;
+	unsigned int query_cpu;
+
+	/*
+	 * Hack. The clustered APIC addressing mode doesn't allow us to send 
+	 * to an arbitrary mask, so I do a unicasts to each CPU instead. This 
+	 * should be modified to do 1 message per cluster ID - mbligh
+	 */ 
+
+	local_irq_save(flags);
+
+	for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
+		if (cpu_isset(query_cpu, mask)) {
+		
+			/*
+			 * Wait for idle.
+			 */
+			apic_wait_icr_idle();
+		
+			/*
+			 * prepare target chip field
+			 */
+			cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
+			apic_write_around(APIC_ICR2, cfg);
+		
+			/*
+			 * program the ICR 
+			 */
+			cfg = __prepare_ICR(0, vector);
+			
+			/*
+			 * Send the IPI. The write to APIC_ICR fires this off.
+			 */
+			apic_write_around(APIC_ICR, cfg);
+		}
+	}
+	local_irq_restore(flags);
+}
+
+#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
+
+/*
+ *	Smarter SMP flushing macros. 
+ *		c/o Linus Torvalds.
+ *
+ *	These mean you can really definitely utterly forget about
+ *	writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *	Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static cpumask_t flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static DEFINE_SPINLOCK(tlbstate_lock);
+#define FLUSH_ALL	0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context, 
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+static inline void leave_mm (unsigned long cpu)
+{
+	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+		BUG();
+	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+	load_cr3(swapper_pg_dir);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+ * 	Stop ipi delivery for the old mm. This is not synchronized with
+ * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * 	for the wrong mm, and in the worst case we perform a superflous
+ * 	tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *	was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ * 	Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+ * 	Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ *	flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ * 	Atomically set the bit [other cpus will start sending flush ipis],
+ * 	and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
+{
+	unsigned long cpu;
+
+	cpu = get_cpu();
+
+	if (!cpu_isset(cpu, flush_cpumask))
+		goto out;
+		/* 
+		 * This was a BUG() but until someone can quote me the
+		 * line from the intel manual that guarantees an IPI to
+		 * multiple CPUs is retried _only_ on the erroring CPUs
+		 * its staying as a return
+		 *
+		 * BUG();
+		 */
+		 
+	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+			if (flush_va == FLUSH_ALL)
+				local_flush_tlb();
+			else
+				__flush_tlb_one(flush_va);
+		} else
+			leave_mm(cpu);
+	}
+	ack_APIC_irq();
+	smp_mb__before_clear_bit();
+	cpu_clear(cpu, flush_cpumask);
+	smp_mb__after_clear_bit();
+out:
+	put_cpu_no_resched();
+}
+
+static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+						unsigned long va)
+{
+	cpumask_t tmp;
+	/*
+	 * A couple of (to be removed) sanity checks:
+	 *
+	 * - we do not send IPIs to not-yet booted CPUs.
+	 * - current CPU must not be in mask
+	 * - mask must exist :)
+	 */
+	BUG_ON(cpus_empty(cpumask));
+
+	cpus_and(tmp, cpumask, cpu_online_map);
+	BUG_ON(!cpus_equal(cpumask, tmp));
+	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+	BUG_ON(!mm);
+
+	/*
+	 * i'm not happy about this global shared spinlock in the
+	 * MM hot path, but we'll see how contended it is.
+	 * Temporarily this turns IRQs off, so that lockups are
+	 * detected by the NMI watchdog.
+	 */
+	spin_lock(&tlbstate_lock);
+	
+	flush_mm = mm;
+	flush_va = va;
+#if NR_CPUS <= BITS_PER_LONG
+	atomic_set_mask(cpumask, &flush_cpumask);
+#else
+	{
+		int k;
+		unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
+		unsigned long *cpu_mask = (unsigned long *)&cpumask;
+		for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
+			atomic_set_mask(cpu_mask[k], &flush_mask[k]);
+	}
+#endif
+	/*
+	 * We have to send the IPI only to
+	 * CPUs affected.
+	 */
+	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+	while (!cpus_empty(flush_cpumask))
+		/* nothing. lockup detection does not belong here */
+		mb();
+
+	flush_mm = NULL;
+	flush_va = 0;
+	spin_unlock(&tlbstate_lock);
+}
+	
+void flush_tlb_current_task(void)
+{
+	struct mm_struct *mm = current->mm;
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	local_flush_tlb();
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+	preempt_enable();
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	if (current->active_mm == mm) {
+		if (current->mm)
+			local_flush_tlb();
+		else
+			leave_mm(smp_processor_id());
+	}
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+
+	preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	if (current->active_mm == mm) {
+		if(current->mm)
+			__flush_tlb_one(va);
+		 else
+		 	leave_mm(smp_processor_id());
+	}
+
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, va);
+
+	preempt_enable();
+}
+
+static void do_flush_tlb_all(void* info)
+{
+	unsigned long cpu = smp_processor_id();
+
+	__flush_tlb_all();
+	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+		leave_mm(cpu);
+}
+
+void flush_tlb_all(void)
+{
+	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+	void (*func) (void *info);
+	void *info;
+	atomic_t started;
+	atomic_t finished;
+	int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+			int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+{
+	struct call_data_struct data;
+	int cpus = num_online_cpus()-1;
+
+	if (!cpus)
+		return 0;
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	spin_lock(&call_lock);
+	call_data = &data;
+	mb();
+	
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+	spin_unlock(&call_lock);
+
+	return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+	/*
+	 * Remove this CPU:
+	 */
+	cpu_clear(smp_processor_id(), cpu_online_map);
+	local_irq_disable();
+	disable_local_APIC();
+	if (cpu_data[smp_processor_id()].hlt_works_ok)
+		for(;;) __asm__("hlt");
+	for (;;);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+	smp_call_function(stop_this_cpu, NULL, 1, 0);
+
+	local_irq_disable();
+	disable_local_APIC();
+	local_irq_enable();
+}
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+{
+	ack_APIC_irq();
+}
+
+fastcall void smp_call_function_interrupt(struct pt_regs *regs)
+{
+	void (*func) (void *info) = call_data->func;
+	void *info = call_data->info;
+	int wait = call_data->wait;
+
+	ack_APIC_irq();
+	/*
+	 * Notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(&call_data->started);
+	/*
+	 * At this point the info structure may be out of scope unless wait==1
+	 */
+	irq_enter();
+	(*func)(info);
+	irq_exit();
+
+	if (wait) {
+		mb();
+		atomic_inc(&call_data->finished);
+	}
+}
+
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
new file mode 100644
index 00000000000..332ee7a1d1a
--- /dev/null
+++ b/arch/i386/kernel/smpboot.c
@@ -0,0 +1,1145 @@
+/*
+ *	x86 SMP booting functions
+ *
+ *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	Much of the core SMP work is based on previous work by Thomas Radke, to
+ *	whom a great many thanks are extended.
+ *
+ *	Thanks to Intel for making available several different Pentium,
+ *	Pentium Pro and Pentium-II/Xeon MP machines.
+ *	Original development of Linux SMP code supported by Caldera.
+ *
+ *	This code is released under the GNU General Public License version 2 or
+ *	later.
+ *
+ *	Fixes
+ *		Felix Koop	:	NR_CPUS used properly
+ *		Jose Renau	:	Handle single CPU case.
+ *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
+ *		Greg Wright	:	Fix for kernel stacks panic.
+ *		Erich Boleyn	:	MP v1.4 and additional changes.
+ *	Matthias Sattler	:	Changes for 2.1 kernel map.
+ *	Michel Lespinasse	:	Changes for 2.1 kernel map.
+ *	Michael Chastain	:	Change trampoline.S to gnu as.
+ *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
+ *		Ingo Molnar	:	Added APIC timers, based on code
+ *					from Jose Renau
+ *		Ingo Molnar	:	various cleanups and rewrites
+ *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
+ *		Martin J. Bligh	: 	Added support for multi-quad systems
+ *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
+*		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+
+#include <mach_apic.h>
+#include <mach_wakecpu.h>
+#include <smpboot_hooks.h>
+
+/* Set if we find a B stepping CPU */
+static int __initdata smp_b_stepping;
+
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+EXPORT_SYMBOL(phys_proc_id);
+
+/* bitmap of online cpus */
+cpumask_t cpu_online_map;
+
+cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+u8 x86_cpu_to_apicid[NR_CPUS] =
+			{ [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end  [];
+static unsigned char *trampoline_base;
+static int trampoline_exec;
+
+static void map_cpu_to_logical_apicid(void);
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+	return virt_to_phys(trampoline_base);
+}
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+	/*
+	 * Has to be in very low memory so we can execute
+	 * real-mode AP code.
+	 */
+	if (__pa(trampoline_base) >= 0x9F000)
+		BUG();
+	/*
+	 * Make the SMP trampoline executable:
+	 */
+	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+static void __init smp_store_cpu_info(int id)
+{
+	struct cpuinfo_x86 *c = cpu_data + id;
+
+	*c = boot_cpu_data;
+	if (id!=0)
+		identify_cpu(c);
+	/*
+	 * Mask B, Pentium, but not Pentium MMX
+	 */
+	if (c->x86_vendor == X86_VENDOR_INTEL &&
+	    c->x86 == 5 &&
+	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
+	    c->x86_model <= 3)
+		/*
+		 * Remember we have B step Pentia with bugs
+		 */
+		smp_b_stepping = 1;
+
+	/*
+	 * Certain Athlons might work (for various values of 'work') in SMP
+	 * but they are not certified as MP capable.
+	 */
+	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+
+		/* Athlon 660/661 is valid. */	
+		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
+			goto valid_k7;
+
+		/* Duron 670 is valid */
+		if ((c->x86_model==7) && (c->x86_mask==0))
+			goto valid_k7;
+
+		/*
+		 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
+		 * It's worth noting that the A5 stepping (662) of some Athlon XP's
+		 * have the MP bit set.
+		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
+		 */
+		if (((c->x86_model==6) && (c->x86_mask>=2)) ||
+		    ((c->x86_model==7) && (c->x86_mask>=1)) ||
+		     (c->x86_model> 7))
+			if (cpu_has_mp)
+				goto valid_k7;
+
+		/* If we get here, it's not a certified SMP capable AMD system. */
+		tainted |= TAINT_UNSAFE_SMP;
+	}
+
+valid_k7:
+	;
+}
+
+/*
+ * TSC synchronization.
+ *
+ * We first check whether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+static void __init synchronize_tsc_bp (void)
+{
+	int i;
+	unsigned long long t0;
+	unsigned long long sum, avg;
+	long long delta;
+	unsigned long one_usec;
+	int buggy = 0;
+
+	printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
+
+	/* convert from kcyc/sec to cyc/usec */
+	one_usec = cpu_khz / 1000;
+
+	atomic_set(&tsc_start_flag, 1);
+	wmb();
+
+	/*
+	 * We loop a few times to get a primed instruction cache,
+	 * then the last pass is more or less synchronized and
+	 * the BP and APs set their cycle counters to zero all at
+	 * once. This reduces the chance of having random offsets
+	 * between the processors, and guarantees that the maximum
+	 * delay between the cycle counters is never bigger than
+	 * the latency of information-passing (cachelines) between
+	 * two CPUs.
+	 */
+	for (i = 0; i < NR_LOOPS; i++) {
+		/*
+		 * all APs synchronize but they loop on '== num_cpus'
+		 */
+		while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
+			mb();
+		atomic_set(&tsc_count_stop, 0);
+		wmb();
+		/*
+		 * this lets the APs save their current TSC:
+		 */
+		atomic_inc(&tsc_count_start);
+
+		rdtscll(tsc_values[smp_processor_id()]);
+		/*
+		 * We clear the TSC in the last loop:
+		 */
+		if (i == NR_LOOPS-1)
+			write_tsc(0, 0);
+
+		/*
+		 * Wait for all APs to leave the synchronization point:
+		 */
+		while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
+			mb();
+		atomic_set(&tsc_count_start, 0);
+		wmb();
+		atomic_inc(&tsc_count_stop);
+	}
+
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_isset(i, cpu_callout_map)) {
+			t0 = tsc_values[i];
+			sum += t0;
+		}
+	}
+	avg = sum;
+	do_div(avg, num_booting_cpus());
+
+	sum = 0;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_isset(i, cpu_callout_map))
+			continue;
+		delta = tsc_values[i] - avg;
+		if (delta < 0)
+			delta = -delta;
+		/*
+		 * We report bigger than 2 microseconds clock differences.
+		 */
+		if (delta > 2*one_usec) {
+			long realdelta;
+			if (!buggy) {
+				buggy = 1;
+				printk("\n");
+			}
+			realdelta = delta;
+			do_div(realdelta, one_usec);
+			if (tsc_values[i] < avg)
+				realdelta = -realdelta;
+
+			printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+		}
+
+		sum += delta;
+	}
+	if (!buggy)
+		printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+	int i;
+
+	/*
+	 * Not every cpu is online at the time
+	 * this gets called, so we first wait for the BP to
+	 * finish SMP initialization:
+	 */
+	while (!atomic_read(&tsc_start_flag)) mb();
+
+	for (i = 0; i < NR_LOOPS; i++) {
+		atomic_inc(&tsc_count_start);
+		while (atomic_read(&tsc_count_start) != num_booting_cpus())
+			mb();
+
+		rdtscll(tsc_values[smp_processor_id()]);
+		if (i == NR_LOOPS-1)
+			write_tsc(0, 0);
+
+		atomic_inc(&tsc_count_stop);
+		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+	}
+}
+#undef NR_LOOPS
+
+extern void calibrate_delay(void);
+
+static atomic_t init_deasserted;
+
+static void __init smp_callin(void)
+{
+	int cpuid, phys_id;
+	unsigned long timeout;
+
+	/*
+	 * If waken up by an INIT in an 82489DX configuration
+	 * we may get here before an INIT-deassert IPI reaches
+	 * our local APIC.  We have to wait for the IPI or we'll
+	 * lock up on an APIC access.
+	 */
+	wait_for_init_deassert(&init_deasserted);
+
+	/*
+	 * (This works even if the APIC is not enabled.)
+	 */
+	phys_id = GET_APIC_ID(apic_read(APIC_ID));
+	cpuid = smp_processor_id();
+	if (cpu_isset(cpuid, cpu_callin_map)) {
+		printk("huh, phys CPU#%d, CPU#%d already present??\n",
+					phys_id, cpuid);
+		BUG();
+	}
+	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+	/*
+	 * STARTUP IPIs are fragile beasts as they might sometimes
+	 * trigger some glue motherboard logic. Complete APIC bus
+	 * silence for 1 second, this overestimates the time the
+	 * boot CPU is spending to send the up to 2 STARTUP IPIs
+	 * by a factor of two. This should be enough.
+	 */
+
+	/*
+	 * Waiting 2s total for startup (udelay is not yet working)
+	 */
+	timeout = jiffies + 2*HZ;
+	while (time_before(jiffies, timeout)) {
+		/*
+		 * Has the boot CPU finished it's STARTUP sequence?
+		 */
+		if (cpu_isset(cpuid, cpu_callout_map))
+			break;
+		rep_nop();
+	}
+
+	if (!time_before(jiffies, timeout)) {
+		printk("BUG: CPU%d started up but did not get a callout!\n",
+			cpuid);
+		BUG();
+	}
+
+	/*
+	 * the boot CPU has finished the init stage and is spinning
+	 * on callin_map until we finish. We are free to set up this
+	 * CPU, first the APIC. (this is probably redundant on most
+	 * boards)
+	 */
+
+	Dprintk("CALLIN, before setup_local_APIC().\n");
+	smp_callin_clear_local_apic();
+	setup_local_APIC();
+	map_cpu_to_logical_apicid();
+
+	/*
+	 * Get our bogomips.
+	 */
+	calibrate_delay();
+	Dprintk("Stack at about %p\n",&cpuid);
+
+	/*
+	 * Save our processor parameters
+	 */
+ 	smp_store_cpu_info(cpuid);
+
+	disable_APIC_timer();
+
+	/*
+	 * Allow the master to continue.
+	 */
+	cpu_set(cpuid, cpu_callin_map);
+
+	/*
+	 *      Synchronize the TSC with the BP
+	 */
+	if (cpu_has_tsc && cpu_khz)
+		synchronize_tsc_ap();
+}
+
+static int cpucount;
+
+/*
+ * Activate a secondary processor.
+ */
+static void __init start_secondary(void *unused)
+{
+	/*
+	 * Dont put anything before smp_callin(), SMP
+	 * booting is too fragile that we want to limit the
+	 * things done here to the most necessary things.
+	 */
+	cpu_init();
+	smp_callin();
+	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+		rep_nop();
+	setup_secondary_APIC_clock();
+	if (nmi_watchdog == NMI_IO_APIC) {
+		disable_8259A_irq(0);
+		enable_NMI_through_LVT0(NULL);
+		enable_8259A_irq(0);
+	}
+	enable_APIC_timer();
+	/*
+	 * low-memory mappings have been cleared, flush them from
+	 * the local TLBs too.
+	 */
+	local_flush_tlb();
+	cpu_set(smp_processor_id(), cpu_online_map);
+
+	/* We can take interrupts now: we're officially "up". */
+	local_irq_enable();
+
+	wmb();
+	cpu_idle();
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+	/*
+	 * We don't actually need to load the full TSS,
+	 * basically just the stack pointer and the eip.
+	 */
+
+	asm volatile(
+		"movl %0,%%esp\n\t"
+		"jmp *%1"
+		:
+		:"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+extern struct {
+	void * esp;
+	unsigned short ss;
+} stack_start;
+
+#ifdef CONFIG_NUMA
+
+/* which logical CPUs are on which nodes */
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+/* which node each logical CPU is on */
+int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_2_node);
+
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
+{
+	printk("Mapping cpu %d to node %d\n", cpu, node);
+	cpu_set(cpu, node_2_cpu_mask[node]);
+	cpu_2_node[cpu] = node;
+}
+
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu)
+{
+	int node;
+
+	printk("Unmapping cpu %d from all nodes\n", cpu);
+	for (node = 0; node < MAX_NUMNODES; node ++)
+		cpu_clear(cpu, node_2_cpu_mask[node]);
+	cpu_2_node[cpu] = 0;
+}
+#else /* !CONFIG_NUMA */
+
+#define map_cpu_to_node(cpu, node)	({})
+#define unmap_cpu_to_node(cpu)	({})
+
+#endif /* CONFIG_NUMA */
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+static void map_cpu_to_logical_apicid(void)
+{
+	int cpu = smp_processor_id();
+	int apicid = logical_smp_processor_id();
+
+	cpu_2_logical_apicid[cpu] = apicid;
+	map_cpu_to_node(cpu, apicid_to_node(apicid));
+}
+
+static void unmap_cpu_to_logical_apicid(int cpu)
+{
+	cpu_2_logical_apicid[cpu] = BAD_APICID;
+	unmap_cpu_to_node(cpu);
+}
+
+#if APIC_DEBUG
+static inline void __inquire_remote_apic(int apicid)
+{
+	int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+	char *names[] = { "ID", "VERSION", "SPIV" };
+	int timeout, status;
+
+	printk("Inquiring remote APIC #%d...\n", apicid);
+
+	for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+		printk("... APIC #%d %s: ", apicid, names[i]);
+
+		/*
+		 * Wait for idle.
+		 */
+		apic_wait_icr_idle();
+
+		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+		timeout = 0;
+		do {
+			udelay(100);
+			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+		switch (status) {
+		case APIC_ICR_RR_VALID:
+			status = apic_read(APIC_RRR);
+			printk("%08x\n", status);
+			break;
+		default:
+			printk("failed\n");
+		}
+	}
+}
+#endif
+
+#ifdef WAKE_SECONDARY_VIA_NMI
+/* 
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+static int __init
+wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+{
+	unsigned long send_status = 0, accept_status = 0;
+	int timeout, maxlvt;
+
+	/* Target chip */
+	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+
+	/* Boot on the stack */
+	/* Kick the second */
+	apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+
+	Dprintk("Waiting for send to finish...\n");
+	timeout = 0;
+	do {
+		Dprintk("+");
+		udelay(100);
+		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+	} while (send_status && (timeout++ < 1000));
+
+	/*
+	 * Give the other CPU some time to accept the IPI.
+	 */
+	udelay(200);
+	/*
+	 * Due to the Pentium erratum 3AP.
+	 */
+	maxlvt = get_maxlvt();
+	if (maxlvt > 3) {
+		apic_read_around(APIC_SPIV);
+		apic_write(APIC_ESR, 0);
+	}
+	accept_status = (apic_read(APIC_ESR) & 0xEF);
+	Dprintk("NMI sent.\n");
+
+	if (send_status)
+		printk("APIC never delivered???\n");
+	if (accept_status)
+		printk("APIC delivery error (%lx).\n", accept_status);
+
+	return (send_status | accept_status);
+}
+#endif	/* WAKE_SECONDARY_VIA_NMI */
+
+#ifdef WAKE_SECONDARY_VIA_INIT
+static int __init
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+{
+	unsigned long send_status = 0, accept_status = 0;
+	int maxlvt, timeout, num_starts, j;
+
+	/*
+	 * Be paranoid about clearing APIC errors.
+	 */
+	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+		apic_read_around(APIC_SPIV);
+		apic_write(APIC_ESR, 0);
+		apic_read(APIC_ESR);
+	}
+
+	Dprintk("Asserting INIT.\n");
+
+	/*
+	 * Turn INIT on target chip
+	 */
+	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+	/*
+	 * Send IPI
+	 */
+	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+				| APIC_DM_INIT);
+
+	Dprintk("Waiting for send to finish...\n");
+	timeout = 0;
+	do {
+		Dprintk("+");
+		udelay(100);
+		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+	} while (send_status && (timeout++ < 1000));
+
+	mdelay(10);
+
+	Dprintk("Deasserting INIT.\n");
+
+	/* Target chip */
+	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+	/* Send IPI */
+	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+	Dprintk("Waiting for send to finish...\n");
+	timeout = 0;
+	do {
+		Dprintk("+");
+		udelay(100);
+		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+	} while (send_status && (timeout++ < 1000));
+
+	atomic_set(&init_deasserted, 1);
+
+	/*
+	 * Should we send STARTUP IPIs ?
+	 *
+	 * Determine this based on the APIC version.
+	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+	 */
+	if (APIC_INTEGRATED(apic_version[phys_apicid]))
+		num_starts = 2;
+	else
+		num_starts = 0;
+
+	/*
+	 * Run STARTUP IPI loop.
+	 */
+	Dprintk("#startup loops: %d.\n", num_starts);
+
+	maxlvt = get_maxlvt();
+
+	for (j = 1; j <= num_starts; j++) {
+		Dprintk("Sending STARTUP #%d.\n",j);
+		apic_read_around(APIC_SPIV);
+		apic_write(APIC_ESR, 0);
+		apic_read(APIC_ESR);
+		Dprintk("After apic_write.\n");
+
+		/*
+		 * STARTUP IPI
+		 */
+
+		/* Target chip */
+		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+		/* Boot on the stack */
+		/* Kick the second */
+		apic_write_around(APIC_ICR, APIC_DM_STARTUP
+					| (start_eip >> 12));
+
+		/*
+		 * Give the other CPU some time to accept the IPI.
+		 */
+		udelay(300);
+
+		Dprintk("Startup point 1.\n");
+
+		Dprintk("Waiting for send to finish...\n");
+		timeout = 0;
+		do {
+			Dprintk("+");
+			udelay(100);
+			send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+		} while (send_status && (timeout++ < 1000));
+
+		/*
+		 * Give the other CPU some time to accept the IPI.
+		 */
+		udelay(200);
+		/*
+		 * Due to the Pentium erratum 3AP.
+		 */
+		if (maxlvt > 3) {
+			apic_read_around(APIC_SPIV);
+			apic_write(APIC_ESR, 0);
+		}
+		accept_status = (apic_read(APIC_ESR) & 0xEF);
+		if (send_status || accept_status)
+			break;
+	}
+	Dprintk("After Startup.\n");
+
+	if (send_status)
+		printk("APIC never delivered???\n");
+	if (accept_status)
+		printk("APIC delivery error (%lx).\n", accept_status);
+
+	return (send_status | accept_status);
+}
+#endif	/* WAKE_SECONDARY_VIA_INIT */
+
+extern cpumask_t cpu_initialized;
+
+static int __init do_boot_cpu(int apicid)
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
+ */
+{
+	struct task_struct *idle;
+	unsigned long boot_error;
+	int timeout, cpu;
+	unsigned long start_eip;
+	unsigned short nmi_high = 0, nmi_low = 0;
+
+	cpu = ++cpucount;
+	/*
+	 * We can't use kernel_thread since we must avoid to
+	 * reschedule the child.
+	 */
+	idle = fork_idle(cpu);
+	if (IS_ERR(idle))
+		panic("failed fork for CPU %d", cpu);
+	idle->thread.eip = (unsigned long) start_secondary;
+	/* start_eip had better be page-aligned! */
+	start_eip = setup_trampoline();
+
+	/* So we see what's up   */
+	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+	/* Stack for startup_32 can be just as for start_secondary onwards */
+	stack_start.esp = (void *) idle->thread.esp;
+
+	irq_ctx_init(cpu);
+
+	/*
+	 * This grunge runs the startup process for
+	 * the targeted processor.
+	 */
+
+	atomic_set(&init_deasserted, 0);
+
+	Dprintk("Setting warm reset code and vector.\n");
+
+	store_NMI_vector(&nmi_high, &nmi_low);
+
+	smpboot_setup_warm_reset_vector(start_eip);
+
+	/*
+	 * Starting actual IPI sequence...
+	 */
+	boot_error = wakeup_secondary_cpu(apicid, start_eip);
+
+	if (!boot_error) {
+		/*
+		 * allow APs to start initializing.
+		 */
+		Dprintk("Before Callout %d.\n", cpu);
+		cpu_set(cpu, cpu_callout_map);
+		Dprintk("After Callout %d.\n", cpu);
+
+		/*
+		 * Wait 5s total for a response
+		 */
+		for (timeout = 0; timeout < 50000; timeout++) {
+			if (cpu_isset(cpu, cpu_callin_map))
+				break;	/* It has booted */
+			udelay(100);
+		}
+
+		if (cpu_isset(cpu, cpu_callin_map)) {
+			/* number CPUs logically, starting from 1 (BSP is 0) */
+			Dprintk("OK.\n");
+			printk("CPU%d: ", cpu);
+			print_cpu_info(&cpu_data[cpu]);
+			Dprintk("CPU has booted.\n");
+		} else {
+			boot_error= 1;
+			if (*((volatile unsigned char *)trampoline_base)
+					== 0xA5)
+				/* trampoline started but...? */
+				printk("Stuck ??\n");
+			else
+				/* trampoline code not run */
+				printk("Not responding.\n");
+			inquire_remote_apic(apicid);
+		}
+	}
+	x86_cpu_to_apicid[cpu] = apicid;
+	if (boot_error) {
+		/* Try to put things back the way they were before ... */
+		unmap_cpu_to_logical_apicid(cpu);
+		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+		cpucount--;
+	}
+
+	/* mark "stuck" area as not stuck */
+	*((volatile unsigned long *)trampoline_base) = 0;
+
+	return boot_error;
+}
+
+static void smp_tune_scheduling (void)
+{
+	unsigned long cachesize;       /* kB   */
+	unsigned long bandwidth = 350; /* MB/s */
+	/*
+	 * Rough estimation for SMP scheduling, this is the number of
+	 * cycles it takes for a fully memory-limited process to flush
+	 * the SMP-local cache.
+	 *
+	 * (For a P5 this pretty much means we will choose another idle
+	 *  CPU almost always at wakeup time (this is due to the small
+	 *  L1 cache), on PIIs it's around 50-100 usecs, depending on
+	 *  the cache size)
+	 */
+
+	if (!cpu_khz) {
+		/*
+		 * this basically disables processor-affinity
+		 * scheduling on SMP without a TSC.
+		 */
+		return;
+	} else {
+		cachesize = boot_cpu_data.x86_cache_size;
+		if (cachesize == -1) {
+			cachesize = 16; /* Pentiums, 2x8kB cache */
+			bandwidth = 100;
+		}
+	}
+}
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+static int boot_cpu_logical_apicid;
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+
+static void __init smp_boot_cpus(unsigned int max_cpus)
+{
+	int apicid, cpu, bit, kicked;
+	unsigned long bogosum = 0;
+
+	/*
+	 * Setup boot CPU information
+	 */
+	smp_store_cpu_info(0); /* Final full version of the data */
+	printk("CPU%d: ", 0);
+	print_cpu_info(&cpu_data[0]);
+
+	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+	boot_cpu_logical_apicid = logical_smp_processor_id();
+	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+
+	current_thread_info()->cpu = 0;
+	smp_tune_scheduling();
+	cpus_clear(cpu_sibling_map[0]);
+	cpu_set(0, cpu_sibling_map[0]);
+
+	/*
+	 * If we couldn't find an SMP configuration at boot time,
+	 * get out of here now!
+	 */
+	if (!smp_found_config && !acpi_lapic) {
+		printk(KERN_NOTICE "SMP motherboard not detected.\n");
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		if (APIC_init_uniprocessor())
+			printk(KERN_NOTICE "Local APIC not detected."
+					   " Using dummy APIC emulation.\n");
+		map_cpu_to_logical_apicid();
+		return;
+	}
+
+	/*
+	 * Should not be necessary because the MP table should list the boot
+	 * CPU too, but we do it for the sake of robustness anyway.
+	 * Makes no sense to do this check in clustered apic mode, so skip it
+	 */
+	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+				boot_cpu_physical_apicid);
+		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+	}
+
+	/*
+	 * If we couldn't find a local APIC, then get out of here now!
+	 */
+	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+			boot_cpu_physical_apicid);
+		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		return;
+	}
+
+	verify_local_APIC();
+
+	/*
+	 * If SMP should be disabled, then really disable it!
+	 */
+	if (!max_cpus) {
+		smp_found_config = 0;
+		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		return;
+	}
+
+	connect_bsp_APIC();
+	setup_local_APIC();
+	map_cpu_to_logical_apicid();
+
+
+	setup_portio_remap();
+
+	/*
+	 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+	 *
+	 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+	 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
+	 * clustered apic ID.
+	 */
+	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+
+	kicked = 1;
+	for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
+		apicid = cpu_present_to_apicid(bit);
+		/*
+		 * Don't even attempt to start the boot CPU!
+		 */
+		if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
+			continue;
+
+		if (!check_apicid_present(bit))
+			continue;
+		if (max_cpus <= cpucount+1)
+			continue;
+
+		if (do_boot_cpu(apicid))
+			printk("CPU #%d not responding - cannot use it.\n",
+								apicid);
+		else
+			++kicked;
+	}
+
+	/*
+	 * Cleanup possible dangling ends...
+	 */
+	smpboot_restore_warm_reset_vector();
+
+	/*
+	 * Allow the user to impress friends.
+	 */
+	Dprintk("Before bogomips.\n");
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		if (cpu_isset(cpu, cpu_callout_map))
+			bogosum += cpu_data[cpu].loops_per_jiffy;
+	printk(KERN_INFO
+		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+		cpucount+1,
+		bogosum/(500000/HZ),
+		(bogosum/(5000/HZ))%100);
+	
+	Dprintk("Before bogocount - setting activated=1.\n");
+
+	if (smp_b_stepping)
+		printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+
+	/*
+	 * Don't taint if we are running SMP kernel on a single non-MP
+	 * approved Athlon
+	 */
+	if (tainted & TAINT_UNSAFE_SMP) {
+		if (cpucount)
+			printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
+		else
+			tainted &= ~TAINT_UNSAFE_SMP;
+	}
+
+	Dprintk("Boot done.\n");
+
+	/*
+	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
+	 * efficiently.
+	 */
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		cpus_clear(cpu_sibling_map[cpu]);
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		int siblings = 0;
+		int i;
+		if (!cpu_isset(cpu, cpu_callout_map))
+			continue;
+
+		if (smp_num_siblings > 1) {
+			for (i = 0; i < NR_CPUS; i++) {
+				if (!cpu_isset(i, cpu_callout_map))
+					continue;
+				if (phys_proc_id[cpu] == phys_proc_id[i]) {
+					siblings++;
+					cpu_set(i, cpu_sibling_map[cpu]);
+				}
+			}
+		} else {
+			siblings++;
+			cpu_set(cpu, cpu_sibling_map[cpu]);
+		}
+
+		if (siblings != smp_num_siblings)
+			printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		check_nmi_watchdog();
+
+	smpboot_setup_io_apic();
+
+	setup_boot_APIC_clock();
+
+	/*
+	 * Synchronize the TSC with the AP
+	 */
+	if (cpu_has_tsc && cpucount && cpu_khz)
+		synchronize_tsc_bp();
+}
+
+/* These are wrappers to interface to the new boot process.  Someone
+   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	smp_boot_cpus(max_cpus);
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+	cpu_set(smp_processor_id(), cpu_online_map);
+	cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+	/* This only works at boot for x86.  See "rewrite" above. */
+	if (cpu_isset(cpu, smp_commenced_mask)) {
+		local_irq_enable();
+		return -ENOSYS;
+	}
+
+	/* In case one didn't come up */
+	if (!cpu_isset(cpu, cpu_callin_map)) {
+		local_irq_enable();
+		return -EIO;
+	}
+
+	local_irq_enable();
+	/* Unleash the CPU! */
+	cpu_set(cpu, smp_commenced_mask);
+	while (!cpu_isset(cpu, cpu_online_map))
+		mb();
+	return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#ifdef CONFIG_X86_IO_APIC
+	setup_ioapic_dest();
+#endif
+	zap_low_mappings();
+	/*
+	 * Disable executability of the SMP trampoline:
+	 */
+	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+}
+
+void __init smp_intr_init(void)
+{
+	/*
+	 * IRQ0 must be given a fixed assignment and initialized,
+	 * because it's used before the IO-APIC is set up.
+	 */
+	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+	/*
+	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+	 * IPI, driven by wakeup.
+	 */
+	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+	/* IPI for invalidation */
+	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+	/* IPI for generic function call */
+	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+}
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
new file mode 100644
index 00000000000..7b3b27d6440
--- /dev/null
+++ b/arch/i386/kernel/srat.c
@@ -0,0 +1,456 @@
+/*
+ * Some of the code in this file has been gleaned from the 64 bit 
+ * discontigmem support code base.
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to Pat Gaughen <gone@us.ibm.com>
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/acpi.h>
+#include <linux/nodemask.h>
+#include <asm/srat.h>
+#include <asm/topology.h>
+
+/*
+ * proximity macros and definitions
+ */
+#define NODE_ARRAY_INDEX(x)	((x) / 8)	/* 8 bits/char */
+#define NODE_ARRAY_OFFSET(x)	((x) % 8)	/* 8 bits/char */
+#define BMAP_SET(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
+#define BMAP_TEST(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
+#define MAX_PXM_DOMAINS		256	/* 1 byte and no promises about values */
+/* bitmap length; _PXM is at most 255 */
+#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 
+static u8 pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
+
+#define MAX_CHUNKS_PER_NODE	4
+#define MAXCHUNKS		(MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
+struct node_memory_chunk_s {
+	unsigned long	start_pfn;
+	unsigned long	end_pfn;
+	u8	pxm;		// proximity domain of node
+	u8	nid;		// which cnode contains this chunk?
+	u8	bank;		// which mem bank on this node
+};
+static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
+
+static int num_memory_chunks;		/* total number of memory chunks */
+static int zholes_size_init;
+static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
+
+extern void * boot_ioremap(unsigned long, unsigned long);
+
+/* Identify CPU proximity domains */
+static void __init parse_cpu_affinity_structure(char *p)
+{
+	struct acpi_table_processor_affinity *cpu_affinity = 
+				(struct acpi_table_processor_affinity *) p;
+
+	if (!cpu_affinity->flags.enabled)
+		return;		/* empty entry */
+
+	/* mark this node as "seen" in node bitmap */
+	BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain);
+
+	printk("CPU 0x%02X in proximity domain 0x%02X\n",
+		cpu_affinity->apic_id, cpu_affinity->proximity_domain);
+}
+
+/*
+ * Identify memory proximity domains and hot-remove capabilities.
+ * Fill node memory chunk list structure.
+ */
+static void __init parse_memory_affinity_structure (char *sratp)
+{
+	unsigned long long paddr, size;
+	unsigned long start_pfn, end_pfn; 
+	u8 pxm;
+	struct node_memory_chunk_s *p, *q, *pend;
+	struct acpi_table_memory_affinity *memory_affinity =
+			(struct acpi_table_memory_affinity *) sratp;
+
+	if (!memory_affinity->flags.enabled)
+		return;		/* empty entry */
+
+	/* mark this node as "seen" in node bitmap */
+	BMAP_SET(pxm_bitmap, memory_affinity->proximity_domain);
+
+	/* calculate info for memory chunk structure */
+	paddr = memory_affinity->base_addr_hi;
+	paddr = (paddr << 32) | memory_affinity->base_addr_lo;
+	size = memory_affinity->length_hi;
+	size = (size << 32) | memory_affinity->length_lo;
+	
+	start_pfn = paddr >> PAGE_SHIFT;
+	end_pfn = (paddr + size) >> PAGE_SHIFT;
+	
+	pxm = memory_affinity->proximity_domain;
+
+	if (num_memory_chunks >= MAXCHUNKS) {
+		printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n",
+			size/(1024*1024), paddr);
+		return;
+	}
+
+	/* Insertion sort based on base address */
+	pend = &node_memory_chunk[num_memory_chunks];
+	for (p = &node_memory_chunk[0]; p < pend; p++) {
+		if (start_pfn < p->start_pfn)
+			break;
+	}
+	if (p < pend) {
+		for (q = pend; q >= p; q--)
+			*(q + 1) = *q;
+	}
+	p->start_pfn = start_pfn;
+	p->end_pfn = end_pfn;
+	p->pxm = pxm;
+
+	num_memory_chunks++;
+
+	printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n",
+		start_pfn, end_pfn,
+		memory_affinity->memory_type,
+		memory_affinity->proximity_domain,
+		(memory_affinity->flags.hot_pluggable ?
+		 "enabled and removable" : "enabled" ) );
+}
+
+#if MAX_NR_ZONES != 3
+#error "MAX_NR_ZONES != 3, chunk_to_zone requires review"
+#endif
+/* Take a chunk of pages from page frame cstart to cend and count the number
+ * of pages in each zone, returned via zones[].
+ */
+static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, 
+		unsigned long *zones)
+{
+	unsigned long max_dma;
+	extern unsigned long max_low_pfn;
+
+	int z;
+	unsigned long rend;
+
+	/* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
+	 * similarly scoped information and should be handled in a consistant
+	 * manner.
+	 */
+	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+
+	/* Split the hole into the zones in which it falls.  Repeatedly
+	 * take the segment in which the remaining hole starts, round it
+	 * to the end of that zone.
+	 */
+	memset(zones, 0, MAX_NR_ZONES * sizeof(long));
+	while (cstart < cend) {
+		if (cstart < max_dma) {
+			z = ZONE_DMA;
+			rend = (cend < max_dma)? cend : max_dma;
+
+		} else if (cstart < max_low_pfn) {
+			z = ZONE_NORMAL;
+			rend = (cend < max_low_pfn)? cend : max_low_pfn;
+
+		} else {
+			z = ZONE_HIGHMEM;
+			rend = cend;
+		}
+		zones[z] += rend - cstart;
+		cstart = rend;
+	}
+}
+
+/*
+ * The SRAT table always lists ascending addresses, so can always
+ * assume that the first "start" address that you see is the real
+ * start of the node, and that the current "end" address is after
+ * the previous one.
+ */
+static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
+{
+	/*
+	 * Only add present memory as told by the e820.
+	 * There is no guarantee from the SRAT that the memory it
+	 * enumerates is present at boot time because it represents
+	 * *possible* memory hotplug areas the same as normal RAM.
+	 */
+	if (memory_chunk->start_pfn >= max_pfn) {
+		printk (KERN_INFO "Ignoring SRAT pfns: 0x%08lx -> %08lx\n",
+			memory_chunk->start_pfn, memory_chunk->end_pfn);
+		return;
+	}
+	if (memory_chunk->nid != nid)
+		return;
+
+	if (!node_has_online_mem(nid))
+		node_start_pfn[nid] = memory_chunk->start_pfn;
+
+	if (node_start_pfn[nid] > memory_chunk->start_pfn)
+		node_start_pfn[nid] = memory_chunk->start_pfn;
+
+	if (node_end_pfn[nid] < memory_chunk->end_pfn)
+		node_end_pfn[nid] = memory_chunk->end_pfn;
+}
+
+/* Parse the ACPI Static Resource Affinity Table */
+static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
+{
+	u8 *start, *end, *p;
+	int i, j, nid;
+	u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */
+	u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */
+
+	start = (u8 *)(&(sratp->reserved) + 1);	/* skip header */
+	p = start;
+	end = (u8 *)sratp + sratp->header.length;
+
+	memset(pxm_bitmap, 0, sizeof(pxm_bitmap));	/* init proximity domain bitmap */
+	memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
+	memset(zholes_size, 0, sizeof(zholes_size));
+
+	/* -1 in these maps means not available */
+	memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
+	memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
+
+	num_memory_chunks = 0;
+	while (p < end) {
+		switch (*p) {
+		case ACPI_SRAT_PROCESSOR_AFFINITY:
+			parse_cpu_affinity_structure(p);
+			break;
+		case ACPI_SRAT_MEMORY_AFFINITY:
+			parse_memory_affinity_structure(p);
+			break;
+		default:
+			printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]);
+			break;
+		}
+		p += p[1];
+		if (p[1] == 0) {
+			printk("acpi20_parse_srat: Entry length value is zero;"
+				" can't parse any further!\n");
+			break;
+		}
+	}
+
+	if (num_memory_chunks == 0) {
+		printk("could not finy any ACPI SRAT memory areas.\n");
+		goto out_fail;
+	}
+
+	/* Calculate total number of nodes in system from PXM bitmap and create
+	 * a set of sequential node IDs starting at zero.  (ACPI doesn't seem
+	 * to specify the range of _PXM values.)
+	 */
+	/*
+	 * MCD - we no longer HAVE to number nodes sequentially.  PXM domain
+	 * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
+	 * 32, so we will continue numbering them in this manner until MAX_NUMNODES
+	 * approaches MAX_PXM_DOMAINS for i386.
+	 */
+	nodes_clear(node_online_map);
+	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
+		if (BMAP_TEST(pxm_bitmap, i)) {
+			nid = num_online_nodes();
+			pxm_to_nid_map[i] = nid;
+			nid_to_pxm_map[nid] = i;
+			node_set_online(nid);
+		}
+	}
+	BUG_ON(num_online_nodes() == 0);
+
+	/* set cnode id in memory chunk structure */
+	for (i = 0; i < num_memory_chunks; i++)
+		node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm];
+
+	printk("pxm bitmap: ");
+	for (i = 0; i < sizeof(pxm_bitmap); i++) {
+		printk("%02X ", pxm_bitmap[i]);
+	}
+	printk("\n");
+	printk("Number of logical nodes in system = %d\n", num_online_nodes());
+	printk("Number of memory chunks in system = %d\n", num_memory_chunks);
+
+	for (j = 0; j < num_memory_chunks; j++){
+		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
+		printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
+		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
+		node_read_chunk(chunk->nid, chunk);
+	}
+ 
+	for_each_online_node(nid) {
+		unsigned long start = node_start_pfn[nid];
+		unsigned long end = node_end_pfn[nid];
+
+		memory_present(nid, start, end);
+		node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
+	}
+	return 1;
+out_fail:
+	return 0;
+}
+
+int __init get_memcfg_from_srat(void)
+{
+	struct acpi_table_header *header = NULL;
+	struct acpi_table_rsdp *rsdp = NULL;
+	struct acpi_table_rsdt *rsdt = NULL;
+	struct acpi_pointer *rsdp_address = NULL;
+	struct acpi_table_rsdt saved_rsdt;
+	int tables = 0;
+	int i = 0;
+
+	acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, rsdp_address);
+
+	if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) {
+		printk("%s: assigning address to rsdp\n", __FUNCTION__);
+		rsdp = (struct acpi_table_rsdp *)
+				(u32)rsdp_address->pointer.physical;
+	} else {
+		printk("%s: rsdp_address is not a physical pointer\n", __FUNCTION__);
+		goto out_err;
+	}
+	if (!rsdp) {
+		printk("%s: Didn't find ACPI root!\n", __FUNCTION__);
+		goto out_err;
+	}
+
+	printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
+		rsdp->oem_id);
+
+	if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) {
+		printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__);
+		goto out_err;
+	}
+
+	rsdt = (struct acpi_table_rsdt *)
+	    boot_ioremap(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt));
+
+	if (!rsdt) {
+		printk(KERN_WARNING
+		       "%s: ACPI: Invalid root system description tables (RSDT)\n",
+		       __FUNCTION__);
+		goto out_err;
+	}
+
+	header = & rsdt->header;
+
+	if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) {
+		printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
+		goto out_err;
+	}
+
+	/* 
+	 * The number of tables is computed by taking the 
+	 * size of all entries (header size minus total 
+	 * size of RSDT) divided by the size of each entry
+	 * (4-byte table pointers).
+	 */
+	tables = (header->length - sizeof(struct acpi_table_header)) / 4;
+
+	if (!tables)
+		goto out_err;
+
+	memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
+
+	if (saved_rsdt.header.length > sizeof(saved_rsdt)) {
+		printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
+		       saved_rsdt.header.length);
+		goto out_err;
+	}
+
+	printk("Begin SRAT table scan....\n");
+
+	for (i = 0; i < tables; i++) {
+		/* Map in header, then map in full table length. */
+		header = (struct acpi_table_header *)
+			boot_ioremap(saved_rsdt.entry[i], sizeof(struct acpi_table_header));
+		if (!header)
+			break;
+		header = (struct acpi_table_header *)
+			boot_ioremap(saved_rsdt.entry[i], header->length);
+		if (!header)
+			break;
+
+		if (strncmp((char *) &header->signature, "SRAT", 4))
+			continue;
+
+		/* we've found the srat table. don't need to look at any more tables */
+		return acpi20_parse_srat((struct acpi_table_srat *)header);
+	}
+out_err:
+	printk("failed to get NUMA memory information from SRAT table\n");
+	return 0;
+}
+
+/* For each node run the memory list to determine whether there are
+ * any memory holes.  For each hole determine which ZONE they fall
+ * into.
+ *
+ * NOTE#1: this requires knowledge of the zone boundries and so
+ * _cannot_ be performed before those are calculated in setup_memory.
+ * 
+ * NOTE#2: we rely on the fact that the memory chunks are ordered by
+ * start pfn number during setup.
+ */
+static void __init get_zholes_init(void)
+{
+	int nid;
+	int c;
+	int first;
+	unsigned long end = 0;
+
+	for_each_online_node(nid) {
+		first = 1;
+		for (c = 0; c < num_memory_chunks; c++){
+			if (node_memory_chunk[c].nid == nid) {
+				if (first) {
+					end = node_memory_chunk[c].end_pfn;
+					first = 0;
+
+				} else {
+					/* Record any gap between this chunk
+					 * and the previous chunk on this node
+					 * against the zones it spans.
+					 */
+					chunk_to_zones(end,
+						node_memory_chunk[c].start_pfn,
+						&zholes_size[nid * MAX_NR_ZONES]);
+				}
+			}
+		}
+	}
+}
+
+unsigned long * __init get_zholes_size(int nid)
+{
+	if (!zholes_size_init) {
+		zholes_size_init++;
+		get_zholes_init();
+	}
+	if (nid >= MAX_NUMNODES || !node_online(nid))
+		printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
+		       __FUNCTION__, nid, num_online_nodes());
+	return &zholes_size[nid * MAX_NR_ZONES];
+}
diff --git a/arch/i386/kernel/summit.c b/arch/i386/kernel/summit.c
new file mode 100644
index 00000000000..d0e01a3acf3
--- /dev/null
+++ b/arch/i386/kernel/summit.c
@@ -0,0 +1,180 @@
+/*
+ * arch/i386/kernel/summit.c - IBM Summit-Specific Code
+ *
+ * Written By: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (c) 2003 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/mach-summit/mach_mpparse.h>
+
+static struct rio_table_hdr *rio_table_hdr __initdata;
+static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;
+static struct rio_detail    *rio_devs[MAX_NUMNODES*4] __initdata;
+
+static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
+{
+	int twister = 0, node = 0;
+	int i, bus, num_buses;
+
+	for(i = 0; i < rio_table_hdr->num_rio_dev; i++){
+		if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id){
+			twister = rio_devs[i]->owner_id;
+			break;
+		}
+	}
+	if (i == rio_table_hdr->num_rio_dev){
+		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __FUNCTION__);
+		return last_bus;
+	}
+
+	for(i = 0; i < rio_table_hdr->num_scal_dev; i++){
+		if (scal_devs[i]->node_id == twister){
+			node = scal_devs[i]->node_id;
+			break;
+		}
+	}
+	if (i == rio_table_hdr->num_scal_dev){
+		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __FUNCTION__);
+		return last_bus;
+	}
+
+	switch (rio_devs[wpeg_num]->type){
+	case CompatWPEG:
+		/* The Compatability Winnipeg controls the 2 legacy buses,
+		 * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
+		 * a PCI-PCI bridge card is used in either slot: total 5 buses.
+		 */
+		num_buses = 5;
+		break;
+	case AltWPEG:
+		/* The Alternate Winnipeg controls the 2 133MHz buses [1 slot
+		 * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
+		 * the "extra" buses for each of those slots: total 7 buses.
+		 */
+		num_buses = 7;
+		break;
+	case LookOutAWPEG:
+	case LookOutBWPEG:
+		/* A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
+		 * & the "extra" buses for each of those slots: total 9 buses.
+		 */
+		num_buses = 9;
+		break;
+	default:
+		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __FUNCTION__);
+		return last_bus;
+	}
+
+	for(bus = last_bus; bus < last_bus + num_buses; bus++)
+		mp_bus_id_to_node[bus] = node;
+	return bus;
+}
+
+static int __init build_detail_arrays(void)
+{
+	unsigned long ptr;
+	int i, scal_detail_size, rio_detail_size;
+
+	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
+		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __FUNCTION__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+		return 0;
+	}
+
+	switch (rio_table_hdr->version){
+	default:
+		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __FUNCTION__, rio_table_hdr->version);
+		return 0;
+	case 2:
+		scal_detail_size = 11;
+		rio_detail_size = 13;
+		break;
+	case 3:
+		scal_detail_size = 12;
+		rio_detail_size = 15;
+		break;
+	}
+
+	ptr = (unsigned long)rio_table_hdr + 3;
+	for(i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
+		scal_devs[i] = (struct scal_detail *)ptr;
+
+	for(i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
+		rio_devs[i] = (struct rio_detail *)ptr;
+
+	return 1;
+}
+
+void __init setup_summit(void)
+{
+	unsigned long		ptr;
+	unsigned short		offset;
+	int			i, next_wpeg, next_bus = 0;
+
+	/* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
+	ptr = *(unsigned short *)phys_to_virt(0x40Eul);
+	ptr = (unsigned long)phys_to_virt(ptr << 4);
+
+	rio_table_hdr = NULL;
+	offset = 0x180;
+	while (offset){
+		/* The block id is stored in the 2nd word */
+		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
+			/* set the pointer past the offset & block id */
+			rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
+			break;
+		}
+		/* The next offset is stored in the 1st word.  0 means no more */
+		offset = *((unsigned short *)(ptr + offset));
+	}
+	if (!rio_table_hdr){
+		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __FUNCTION__);
+		return;
+	}
+
+	if (!build_detail_arrays())
+		return;
+
+	/* The first Winnipeg we're looking for has an index of 0 */
+	next_wpeg = 0;
+	do {
+		for(i = 0; i < rio_table_hdr->num_rio_dev; i++){
+			if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg){
+				/* It's the Winnipeg we're looking for! */
+				next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
+				next_wpeg++;
+				break;
+			}
+		}
+		/*
+		 * If we go through all Rio devices and don't find one with
+		 * the next index, it means we've found all the Winnipegs,
+		 * and thus all the PCI buses.
+		 */
+		if (i == rio_table_hdr->num_rio_dev)
+			next_wpeg = 0;
+	} while (next_wpeg != 0);
+}
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
new file mode 100644
index 00000000000..a4a61976ecb
--- /dev/null
+++ b/arch/i386/kernel/sys_i386.c
@@ -0,0 +1,252 @@
+/*
+ * linux/arch/i386/kernel/sys_i386.c
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/i386
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+
+#include <asm/uaccess.h>
+#include <asm/ipc.h>
+
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way Unix traditionally does this, though.
+ */
+asmlinkage int sys_pipe(unsigned long __user * fildes)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe(fd);
+	if (!error) {
+		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+/* common code for old and new mmaps */
+static inline long do_mmap2(
+	unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	unsigned long fd, unsigned long pgoff)
+{
+	int error = -EBADF;
+	struct file * file = NULL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			goto out;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+	up_write(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+out:
+	return error;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	unsigned long fd, unsigned long pgoff)
+{
+	return do_mmap2(addr, len, prot, flags, fd, pgoff);
+}
+
+/*
+ * Perform the select(nd, in, out, ex, tv) and mmap() system
+ * calls. Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
+{
+	struct mmap_arg_struct a;
+	int err = -EFAULT;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		goto out;
+
+	err = -EINVAL;
+	if (a.offset & ~PAGE_MASK)
+		goto out;
+
+	err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+out:
+	return err;
+}
+
+
+struct sel_arg_struct {
+	unsigned long n;
+	fd_set __user *inp, *outp, *exp;
+	struct timeval __user *tvp;
+};
+
+asmlinkage int old_select(struct sel_arg_struct __user *arg)
+{
+	struct sel_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	/* sys_select() does the appropriate kernel locking */
+	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+ * This is really horribly ugly.
+ */
+asmlinkage int sys_ipc (uint call, int first, int second,
+			int third, void __user *ptr, long fifth)
+{
+	int version, ret;
+
+	version = call >> 16; /* hack for backward compatibility */
+	call &= 0xffff;
+
+	switch (call) {
+	case SEMOP:
+		return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
+	case SEMTIMEDOP:
+		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
+					(const struct timespec __user *)fifth);
+
+	case SEMGET:
+		return sys_semget (first, second, third);
+	case SEMCTL: {
+		union semun fourth;
+		if (!ptr)
+			return -EINVAL;
+		if (get_user(fourth.__pad, (void __user * __user *) ptr))
+			return -EFAULT;
+		return sys_semctl (first, second, third, fourth);
+	}
+
+	case MSGSND:
+		return sys_msgsnd (first, (struct msgbuf __user *) ptr, 
+				   second, third);
+	case MSGRCV:
+		switch (version) {
+		case 0: {
+			struct ipc_kludge tmp;
+			if (!ptr)
+				return -EINVAL;
+			
+			if (copy_from_user(&tmp,
+					   (struct ipc_kludge __user *) ptr, 
+					   sizeof (tmp)))
+				return -EFAULT;
+			return sys_msgrcv (first, tmp.msgp, second,
+					   tmp.msgtyp, third);
+		}
+		default:
+			return sys_msgrcv (first,
+					   (struct msgbuf __user *) ptr,
+					   second, fifth, third);
+		}
+	case MSGGET:
+		return sys_msgget ((key_t) first, second);
+	case MSGCTL:
+		return sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
+
+	case SHMAT:
+		switch (version) {
+		default: {
+			ulong raddr;
+			ret = do_shmat (first, (char __user *) ptr, second, &raddr);
+			if (ret)
+				return ret;
+			return put_user (raddr, (ulong __user *) third);
+		}
+		case 1:	/* iBCS2 emulator entry point */
+			if (!segment_eq(get_fs(), get_ds()))
+				return -EINVAL;
+			/* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
+			return do_shmat (first, (char __user *) ptr, second, (ulong *) third);
+		}
+	case SHMDT: 
+		return sys_shmdt ((char __user *)ptr);
+	case SHMGET:
+		return sys_shmget (first, second, third);
+	case SHMCTL:
+		return sys_shmctl (first, second,
+				   (struct shmid_ds __user *) ptr);
+	default:
+		return -ENOSYS;
+	}
+}
+
+/*
+ * Old cruft
+ */
+asmlinkage int sys_uname(struct old_utsname __user * name)
+{
+	int err;
+	if (!name)
+		return -EFAULT;
+	down_read(&uts_sem);
+	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	up_read(&uts_sem);
+	return err?-EFAULT:0;
+}
+
+asmlinkage int sys_olduname(struct oldold_utsname __user * name)
+{
+	int error;
+
+	if (!name)
+		return -EFAULT;
+	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+		return -EFAULT;
+  
+  	down_read(&uts_sem);
+	
+	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
+	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
+	error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+	error |= __put_user(0,name->release+__OLD_UTS_LEN);
+	error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+	error |= __put_user(0,name->version+__OLD_UTS_LEN);
+	error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+	
+	up_read(&uts_sem);
+	
+	error = error ? -EFAULT : 0;
+
+	return error;
+}
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
new file mode 100644
index 00000000000..960d8bd137d
--- /dev/null
+++ b/arch/i386/kernel/sysenter.c
@@ -0,0 +1,65 @@
+/*
+ * linux/arch/i386/kernel/sysenter.c
+ *
+ * (C) Copyright 2002 Linus Torvalds
+ *
+ * This file contains the needed initializations to support sysenter.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/elf.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+#include <asm/pgtable.h>
+#include <asm/unistd.h>
+
+extern asmlinkage void sysenter_entry(void);
+
+void enable_sep_cpu(void *info)
+{
+	int cpu = get_cpu();
+	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+
+	tss->ss1 = __KERNEL_CS;
+	tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+	wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+	wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
+	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
+	put_cpu();	
+}
+
+/*
+ * These symbols are defined by vsyscall.o to mark the bounds
+ * of the ELF DSO images included therein.
+ */
+extern const char vsyscall_int80_start, vsyscall_int80_end;
+extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+
+static int __init sysenter_setup(void)
+{
+	void *page = (void *)get_zeroed_page(GFP_ATOMIC);
+
+	__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
+
+	if (!boot_cpu_has(X86_FEATURE_SEP)) {
+		memcpy(page,
+		       &vsyscall_int80_start,
+		       &vsyscall_int80_end - &vsyscall_int80_start);
+		return 0;
+	}
+
+	memcpy(page,
+	       &vsyscall_sysenter_start,
+	       &vsyscall_sysenter_end - &vsyscall_sysenter_start);
+
+	on_each_cpu(enable_sep_cpu, NULL, 1, 1);
+	return 0;
+}
+
+__initcall(sysenter_setup);
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
new file mode 100644
index 00000000000..9b55e30e449
--- /dev/null
+++ b/arch/i386/kernel/time.c
@@ -0,0 +1,476 @@
+/*
+ *  linux/arch/i386/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *
+ * This file contains the PC-specific time handling details:
+ * reading the RTC at bootup, etc..
+ * 1994-07-02    Alan Modra
+ *	fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
+ * 1995-03-26    Markus Kuhn
+ *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
+ *      precision CMOS clock update
+ * 1996-05-03    Ingo Molnar
+ *      fixed time warps in do_[slow|fast]_gettimeoffset()
+ * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
+ *		"A Kernel Model for Precision Timekeeping" by Dave Mills
+ * 1998-09-05    (Various)
+ *	More robust do_fast_gettimeoffset() algorithm implemented
+ *	(works with APM, Cyrix 6x86MX and Centaur C6),
+ *	monotonic gettimeofday() with fast_get_timeoffset(),
+ *	drift-proof precision TSC calibration on boot
+ *	(C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
+ *	Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
+ *	ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
+ * 1998-12-16    Andrea Arcangeli
+ *	Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
+ *	because was not accounting lost_ticks.
+ * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
+ *	Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *	serialize accesses to xtime/lost_ticks).
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/bcd.h>
+#include <linux/efi.h>
+#include <linux/mca.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/msr.h>
+#include <asm/delay.h>
+#include <asm/mpspec.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/timer.h>
+
+#include "mach_time.h"
+
+#include <linux/timex.h>
+#include <linux/config.h>
+
+#include <asm/hpet.h>
+
+#include <asm/arch_hooks.h>
+
+#include "io_ports.h"
+
+extern spinlock_t i8259A_lock;
+int pit_latch_buggy;              /* extern */
+
+#include "do_timer.h"
+
+u64 jiffies_64 = INITIAL_JIFFIES;
+
+EXPORT_SYMBOL(jiffies_64);
+
+unsigned long cpu_khz;	/* Detected as we calibrate the TSC */
+
+extern unsigned long wall_jiffies;
+
+DEFINE_SPINLOCK(rtc_lock);
+
+DEFINE_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
+
+struct timer_opts *cur_timer = &timer_none;
+
+/*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with.  It is required for NMI access to the
+ * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+	unsigned char val;
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	val = inb_p(RTC_PORT(1));
+	lock_cmos_suffix(addr);
+	return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	outb_p(val, RTC_PORT(1));
+	lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
+/*
+ * This version of gettimeofday has microsecond resolution
+ * and better than microsecond precision on fast x86 machines with TSC.
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+	unsigned long seq;
+	unsigned long usec, sec;
+	unsigned long max_ntp_tick;
+
+	do {
+		unsigned long lost;
+
+		seq = read_seqbegin(&xtime_lock);
+
+		usec = cur_timer->get_offset();
+		lost = jiffies - wall_jiffies;
+
+		/*
+		 * If time_adjust is negative then NTP is slowing the clock
+		 * so make sure not to go into next possible interval.
+		 * Better to lose some accuracy than have time go backwards..
+		 */
+		if (unlikely(time_adjust < 0)) {
+			max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
+			usec = min(usec, max_ntp_tick);
+
+			if (lost)
+				usec += lost * max_ntp_tick;
+		}
+		else if (unlikely(lost))
+			usec += lost * (USEC_PER_SEC / HZ);
+
+		sec = xtime.tv_sec;
+		usec += (xtime.tv_nsec / 1000);
+	} while (read_seqretry(&xtime_lock, seq));
+
+	while (usec >= 1000000) {
+		usec -= 1000000;
+		sec++;
+	}
+
+	tv->tv_sec = sec;
+	tv->tv_usec = usec;
+}
+
+EXPORT_SYMBOL(do_gettimeofday);
+
+int do_settimeofday(struct timespec *tv)
+{
+	time_t wtm_sec, sec = tv->tv_sec;
+	long wtm_nsec, nsec = tv->tv_nsec;
+
+	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	write_seqlock_irq(&xtime_lock);
+	/*
+	 * This is revolting. We need to set "xtime" correctly. However, the
+	 * value in this location is the value at the most recent update of
+	 * wall time.  Discover what correction gettimeofday() would have
+	 * made, and then undo it!
+	 */
+	nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
+	nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
+
+	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
+	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+
+	set_normalized_timespec(&xtime, sec, nsec);
+	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+
+	time_adjust = 0;		/* stop active adjtime() */
+	time_status |= STA_UNSYNC;
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_esterror = NTP_PHASE_LIMIT;
+	write_sequnlock_irq(&xtime_lock);
+	clock_was_set();
+	return 0;
+}
+
+EXPORT_SYMBOL(do_settimeofday);
+
+static int set_rtc_mmss(unsigned long nowtime)
+{
+	int retval;
+
+	WARN_ON(irqs_disabled());
+
+	/* gets recalled with irq locally disabled */
+	spin_lock_irq(&rtc_lock);
+	if (efi_enabled)
+		retval = efi_set_rtc_mmss(nowtime);
+	else
+		retval = mach_set_rtc_mmss(nowtime);
+	spin_unlock_irq(&rtc_lock);
+
+	return retval;
+}
+
+
+int timer_ack;
+
+/* monotonic_clock(): returns # of nanoseconds passed since time_init()
+ *		Note: This function is required to return accurate
+ *		time even in the absence of multiple timer ticks.
+ */
+unsigned long long monotonic_clock(void)
+{
+	return cur_timer->monotonic_clock();
+}
+EXPORT_SYMBOL(monotonic_clock);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
+unsigned long profile_pc(struct pt_regs *regs)
+{
+	unsigned long pc = instruction_pointer(regs);
+
+	if (in_lock_functions(pc))
+		return *(unsigned long *)(regs->ebp + 4);
+
+	return pc;
+}
+EXPORT_SYMBOL(profile_pc);
+#endif
+
+/*
+ * timer_interrupt() needs to keep up the real-time clock,
+ * as well as call the "do_timer()" routine every clocktick
+ */
+static inline void do_timer_interrupt(int irq, void *dev_id,
+					struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_IO_APIC
+	if (timer_ack) {
+		/*
+		 * Subtle, when I/O APICs are used we have to ack timer IRQ
+		 * manually to reset the IRR bit for do_slow_gettimeoffset().
+		 * This will also deassert NMI lines for the watchdog if run
+		 * on an 82489DX-based system.
+		 */
+		spin_lock(&i8259A_lock);
+		outb(0x0c, PIC_MASTER_OCW3);
+		/* Ack the IRQ; AEOI will end it automatically. */
+		inb(PIC_MASTER_POLL);
+		spin_unlock(&i8259A_lock);
+	}
+#endif
+
+	do_timer_interrupt_hook(regs);
+
+
+	if (MCA_bus) {
+		/* The PS/2 uses level-triggered interrupts.  You can't
+		turn them off, nor would you want to (any attempt to
+		enable edge-triggered interrupts usually gets intercepted by a
+		special hardware circuit).  Hence we have to acknowledge
+		the timer interrupt.  Through some incredibly stupid
+		design idea, the reset for IRQ 0 is done by setting the
+		high bit of the PPI port B (0x61).  Note that some PS/2s,
+		notably the 55SX, work fine if this is removed.  */
+
+		irq = inb_p( 0x61 );	/* read the current state */
+		outb_p( irq|0x80, 0x61 );	/* reset the IRQ */
+	}
+}
+
+/*
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
+ */
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	/*
+	 * Here we are in the timer irq handler. We just have irqs locally
+	 * disabled but we don't know if the timer_bh is running on the other
+	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+	 * the irq version of write_lock because as just said we have irq
+	 * locally disabled. -arca
+	 */
+	write_seqlock(&xtime_lock);
+
+	cur_timer->mark_offset();
+ 
+	do_timer_interrupt(irq, NULL, regs);
+
+	write_sequnlock(&xtime_lock);
+	return IRQ_HANDLED;
+}
+
+/* not static: needed by APM */
+unsigned long get_cmos_time(void)
+{
+	unsigned long retval;
+
+	spin_lock(&rtc_lock);
+
+	if (efi_enabled)
+		retval = efi_get_time();
+	else
+		retval = mach_get_cmos_time();
+
+	spin_unlock(&rtc_lock);
+
+	return retval;
+}
+static void sync_cmos_clock(unsigned long dummy);
+
+static struct timer_list sync_cmos_timer =
+                                      TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
+
+static void sync_cmos_clock(unsigned long dummy)
+{
+	struct timeval now, next;
+	int fail = 1;
+
+	/*
+	 * If we have an externally synchronized Linux clock, then update
+	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+	 * called as close as possible to 500 ms before the new second starts.
+	 * This code is run on a timer.  If the clock is set, that timer
+	 * may not expire at the correct time.  Thus, we adjust...
+	 */
+	if ((time_status & STA_UNSYNC) != 0)
+		/*
+		 * Not synced, exit, do not restart a timer (if one is
+		 * running, let it run out).
+		 */
+		return;
+
+	do_gettimeofday(&now);
+	if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+	    now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
+		fail = set_rtc_mmss(now.tv_sec);
+
+	next.tv_usec = USEC_AFTER - now.tv_usec;
+	if (next.tv_usec <= 0)
+		next.tv_usec += USEC_PER_SEC;
+
+	if (!fail)
+		next.tv_sec = 659;
+	else
+		next.tv_sec = 0;
+
+	if (next.tv_usec >= USEC_PER_SEC) {
+		next.tv_sec++;
+		next.tv_usec -= USEC_PER_SEC;
+	}
+	mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
+}
+
+void notify_arch_cmos_timer(void)
+{
+	mod_timer(&sync_cmos_timer, jiffies + 1);
+}
+
+static long clock_cmos_diff, sleep_start;
+
+static int timer_suspend(struct sys_device *dev, u32 state)
+{
+	/*
+	 * Estimate time zone so that set_time can update the clock
+	 */
+	clock_cmos_diff = -get_cmos_time();
+	clock_cmos_diff += get_seconds();
+	sleep_start = get_cmos_time();
+	return 0;
+}
+
+static int timer_resume(struct sys_device *dev)
+{
+	unsigned long flags;
+	unsigned long sec;
+	unsigned long sleep_length;
+
+#ifdef CONFIG_HPET_TIMER
+	if (is_hpet_enabled())
+		hpet_reenable();
+#endif
+	sec = get_cmos_time() + clock_cmos_diff;
+	sleep_length = (get_cmos_time() - sleep_start) * HZ;
+	write_seqlock_irqsave(&xtime_lock, flags);
+	xtime.tv_sec = sec;
+	xtime.tv_nsec = 0;
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+	jiffies += sleep_length;
+	wall_jiffies += sleep_length;
+	return 0;
+}
+
+static struct sysdev_class timer_sysclass = {
+	.resume = timer_resume,
+	.suspend = timer_suspend,
+	set_kset_name("timer"),
+};
+
+
+/* XXX this driverfs stuff should probably go elsewhere later -john */
+static struct sys_device device_timer = {
+	.id	= 0,
+	.cls	= &timer_sysclass,
+};
+
+static int time_init_device(void)
+{
+	int error = sysdev_class_register(&timer_sysclass);
+	if (!error)
+		error = sysdev_register(&device_timer);
+	return error;
+}
+
+device_initcall(time_init_device);
+
+#ifdef CONFIG_HPET_TIMER
+extern void (*late_time_init)(void);
+/* Duplicate of time_init() below, with hpet_enable part added */
+static void __init hpet_time_init(void)
+{
+	xtime.tv_sec = get_cmos_time();
+	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+	set_normalized_timespec(&wall_to_monotonic,
+		-xtime.tv_sec, -xtime.tv_nsec);
+
+	if (hpet_enable() >= 0) {
+		printk("Using HPET for base-timer\n");
+	}
+
+	cur_timer = select_timer();
+	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
+
+	time_init_hook();
+}
+#endif
+
+void __init time_init(void)
+{
+#ifdef CONFIG_HPET_TIMER
+	if (is_hpet_capable()) {
+		/*
+		 * HPET initialization needs to do memory-mapped io. So, let
+		 * us do a late initialization after mem_init().
+		 */
+		late_time_init = hpet_time_init;
+		return;
+	}
+#endif
+	xtime.tv_sec = get_cmos_time();
+	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+	set_normalized_timespec(&wall_to_monotonic,
+		-xtime.tv_sec, -xtime.tv_nsec);
+
+	cur_timer = select_timer();
+	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
+
+	time_init_hook();
+}
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
new file mode 100644
index 00000000000..244a31b04be
--- /dev/null
+++ b/arch/i386/kernel/time_hpet.c
@@ -0,0 +1,458 @@
+/*
+ *  linux/arch/i386/kernel/time_hpet.c
+ *  This code largely copied from arch/x86_64/kernel/time.c
+ *  See that file for credits.
+ *
+ *  2003-06-30    Venkatesh Pallipadi - Additional changes for HPET support
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/timer.h>
+#include <asm/fixmap.h>
+#include <asm/apic.h>
+
+#include <linux/timex.h>
+#include <linux/config.h>
+
+#include <asm/hpet.h>
+#include <linux/hpet.h>
+
+static unsigned long hpet_period;	/* fsecs / HPET clock */
+unsigned long hpet_tick;		/* hpet clks count per tick */
+unsigned long hpet_address;		/* hpet memory map physical address */
+
+static int use_hpet; 		/* can be used for runtime check of hpet */
+static int boot_hpet_disable; 	/* boottime override for HPET timer */
+static void __iomem * hpet_virt_address;	/* hpet kernel virtual address */
+
+#define FSEC_TO_USEC (1000000000UL)
+
+int hpet_readl(unsigned long a)
+{
+	return readl(hpet_virt_address + a);
+}
+
+static void hpet_writel(unsigned long d, unsigned long a)
+{
+	writel(d, hpet_virt_address + a);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * HPET counters dont wrap around on every tick. They just change the
+ * comparator value and continue. Next tick can be caught by checking
+ * for a change in the comparator value. Used in apic.c.
+ */
+static void __init wait_hpet_tick(void)
+{
+	unsigned int start_cmp_val, end_cmp_val;
+
+	start_cmp_val = hpet_readl(HPET_T0_CMP);
+	do {
+		end_cmp_val = hpet_readl(HPET_T0_CMP);
+	} while (start_cmp_val == end_cmp_val);
+}
+#endif
+
+static int hpet_timer_stop_set_go(unsigned long tick)
+{
+	unsigned int cfg;
+
+	/*
+	 * Stop the timers and reset the main counter.
+	 */
+	cfg = hpet_readl(HPET_CFG);
+	cfg &= ~HPET_CFG_ENABLE;
+	hpet_writel(cfg, HPET_CFG);
+	hpet_writel(0, HPET_COUNTER);
+	hpet_writel(0, HPET_COUNTER + 4);
+
+	/*
+	 * Set up timer 0, as periodic with first interrupt to happen at
+	 * hpet_tick, and period also hpet_tick.
+	 */
+	cfg = hpet_readl(HPET_T0_CFG);
+	cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
+	       HPET_TN_SETVAL | HPET_TN_32BIT;
+	hpet_writel(cfg, HPET_T0_CFG);
+
+	/*
+	 * The first write after writing TN_SETVAL to the config register sets
+	 * the counter value, the second write sets the threshold.
+	 */
+	hpet_writel(tick, HPET_T0_CMP);
+	hpet_writel(tick, HPET_T0_CMP);
+
+	/*
+ 	 * Go!
+ 	 */
+	cfg = hpet_readl(HPET_CFG);
+	cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+	hpet_writel(cfg, HPET_CFG);
+
+	return 0;
+}
+
+/*
+ * Check whether HPET was found by ACPI boot parse. If yes setup HPET
+ * counter 0 for kernel base timer.
+ */
+int __init hpet_enable(void)
+{
+	unsigned int id;
+	unsigned long tick_fsec_low, tick_fsec_high; /* tick in femto sec */
+	unsigned long hpet_tick_rem;
+
+	if (boot_hpet_disable)
+		return -1;
+
+	if (!hpet_address) {
+		return -1;
+	}
+	hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+	/*
+	 * Read the period, compute tick and quotient.
+	 */
+	id = hpet_readl(HPET_ID);
+
+	/*
+	 * We are checking for value '1' or more in number field if
+	 * CONFIG_HPET_EMULATE_RTC is set because we will need an
+	 * additional timer for RTC emulation.
+	 * However, we can do with one timer otherwise using the
+	 * the single HPET timer for system time.
+	 */
+	if (
+#ifdef CONFIG_HPET_EMULATE_RTC
+		!(id & HPET_ID_NUMBER) ||
+#endif
+	    !(id & HPET_ID_LEGSUP))
+		return -1;
+
+	hpet_period = hpet_readl(HPET_PERIOD);
+	if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD))
+		return -1;
+
+	/*
+	 * 64 bit math
+	 * First changing tick into fsec
+	 * Then 64 bit div to find number of hpet clk per tick
+	 */
+	ASM_MUL64_REG(tick_fsec_low, tick_fsec_high,
+			KERNEL_TICK_USEC, FSEC_TO_USEC);
+	ASM_DIV64_REG(hpet_tick, hpet_tick_rem,
+			hpet_period, tick_fsec_low, tick_fsec_high);
+
+	if (hpet_tick_rem > (hpet_period >> 1))
+		hpet_tick++; /* rounding the result */
+
+	if (hpet_timer_stop_set_go(hpet_tick))
+		return -1;
+
+	use_hpet = 1;
+
+#ifdef	CONFIG_HPET
+	{
+		struct hpet_data	hd;
+		unsigned int 		ntimer;
+
+		memset(&hd, 0, sizeof (hd));
+
+		ntimer = hpet_readl(HPET_ID);
+		ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+		ntimer++;
+
+		/*
+		 * Register with driver.
+		 * Timer0 and Timer1 is used by platform.
+		 */
+		hd.hd_phys_address = hpet_address;
+		hd.hd_address = hpet_virt_address;
+		hd.hd_nirqs = ntimer;
+		hd.hd_flags = HPET_DATA_PLATFORM;
+		hpet_reserve_timer(&hd, 0);
+#ifdef	CONFIG_HPET_EMULATE_RTC
+		hpet_reserve_timer(&hd, 1);
+#endif
+		hd.hd_irq[0] = HPET_LEGACY_8254;
+		hd.hd_irq[1] = HPET_LEGACY_RTC;
+		if (ntimer > 2) {
+			struct hpet __iomem	*hpet;
+			struct hpet_timer __iomem *timer;
+			int			i;
+
+			hpet = hpet_virt_address;
+
+			for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer;
+				timer++, i++)
+				hd.hd_irq[i] = (timer->hpet_config &
+					Tn_INT_ROUTE_CNF_MASK) >>
+					Tn_INT_ROUTE_CNF_SHIFT;
+
+		}
+
+		hpet_alloc(&hd);
+	}
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	wait_timer_tick = wait_hpet_tick;
+#endif
+	return 0;
+}
+
+int hpet_reenable(void)
+{
+	return hpet_timer_stop_set_go(hpet_tick);
+}
+
+int is_hpet_enabled(void)
+{
+	return use_hpet;
+}
+
+int is_hpet_capable(void)
+{
+	if (!boot_hpet_disable && hpet_address)
+		return 1;
+	return 0;
+}
+
+static int __init hpet_setup(char* str)
+{
+	if (str) {
+		if (!strncmp("disable", str, 7))
+			boot_hpet_disable = 1;
+	}
+	return 1;
+}
+
+__setup("hpet=", hpet_setup);
+
+#ifdef CONFIG_HPET_EMULATE_RTC
+/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
+ * is enabled, we support RTC interrupt functionality in software.
+ * RTC has 3 kinds of interrupts:
+ * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
+ *    is updated
+ * 2) Alarm Interrupt - generate an interrupt at a specific time of day
+ * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
+ *    2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
+ * (1) and (2) above are implemented using polling at a frequency of
+ * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
+ * overhead. (DEFAULT_RTC_INT_FREQ)
+ * For (3), we use interrupts at 64Hz or user specified periodic
+ * frequency, whichever is higher.
+ */
+#include <linux/mc146818rtc.h>
+#include <linux/rtc.h>
+
+extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+
+#define DEFAULT_RTC_INT_FREQ 	64
+#define RTC_NUM_INTS 		1
+
+static unsigned long UIE_on;
+static unsigned long prev_update_sec;
+
+static unsigned long AIE_on;
+static struct rtc_time alarm_time;
+
+static unsigned long PIE_on;
+static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
+static unsigned long PIE_count;
+
+static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
+
+/*
+ * Timer 1 for RTC, we do not use periodic interrupt feature,
+ * even if HPET supports periodic interrupts on Timer 1.
+ * The reason being, to set up a periodic interrupt in HPET, we need to
+ * stop the main counter. And if we do that everytime someone diables/enables
+ * RTC, we will have adverse effect on main kernel timer running on Timer 0.
+ * So, for the time being, simulate the periodic interrupt in software.
+ *
+ * hpet_rtc_timer_init() is called for the first time and during subsequent
+ * interuppts reinit happens through hpet_rtc_timer_reinit().
+ */
+int hpet_rtc_timer_init(void)
+{
+	unsigned int cfg, cnt;
+	unsigned long flags;
+
+	if (!is_hpet_enabled())
+		return 0;
+	/*
+	 * Set the counter 1 and enable the interrupts.
+	 */
+	if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
+		hpet_rtc_int_freq = PIE_freq;
+	else
+		hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
+
+	local_irq_save(flags);
+	cnt = hpet_readl(HPET_COUNTER);
+	cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
+	hpet_writel(cnt, HPET_T1_CMP);
+	local_irq_restore(flags);
+
+	cfg = hpet_readl(HPET_T1_CFG);
+	cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
+	hpet_writel(cfg, HPET_T1_CFG);
+
+	return 1;
+}
+
+static void hpet_rtc_timer_reinit(void)
+{
+	unsigned int cfg, cnt;
+
+	if (!(PIE_on | AIE_on | UIE_on))
+		return;
+
+	if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
+		hpet_rtc_int_freq = PIE_freq;
+	else
+		hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
+
+	/* It is more accurate to use the comparator value than current count.*/
+	cnt = hpet_readl(HPET_T1_CMP);
+	cnt += hpet_tick*HZ/hpet_rtc_int_freq;
+	hpet_writel(cnt, HPET_T1_CMP);
+
+	cfg = hpet_readl(HPET_T1_CFG);
+	cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
+	hpet_writel(cfg, HPET_T1_CFG);
+
+	return;
+}
+
+/*
+ * The functions below are called from rtc driver.
+ * Return 0 if HPET is not being used.
+ * Otherwise do the necessary changes and return 1.
+ */
+int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
+{
+	if (!is_hpet_enabled())
+		return 0;
+
+	if (bit_mask & RTC_UIE)
+		UIE_on = 0;
+	if (bit_mask & RTC_PIE)
+		PIE_on = 0;
+	if (bit_mask & RTC_AIE)
+		AIE_on = 0;
+
+	return 1;
+}
+
+int hpet_set_rtc_irq_bit(unsigned long bit_mask)
+{
+	int timer_init_reqd = 0;
+
+	if (!is_hpet_enabled())
+		return 0;
+
+	if (!(PIE_on | AIE_on | UIE_on))
+		timer_init_reqd = 1;
+
+	if (bit_mask & RTC_UIE) {
+		UIE_on = 1;
+	}
+	if (bit_mask & RTC_PIE) {
+		PIE_on = 1;
+		PIE_count = 0;
+	}
+	if (bit_mask & RTC_AIE) {
+		AIE_on = 1;
+	}
+
+	if (timer_init_reqd)
+		hpet_rtc_timer_init();
+
+	return 1;
+}
+
+int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
+{
+	if (!is_hpet_enabled())
+		return 0;
+
+	alarm_time.tm_hour = hrs;
+	alarm_time.tm_min = min;
+	alarm_time.tm_sec = sec;
+
+	return 1;
+}
+
+int hpet_set_periodic_freq(unsigned long freq)
+{
+	if (!is_hpet_enabled())
+		return 0;
+
+	PIE_freq = freq;
+	PIE_count = 0;
+
+	return 1;
+}
+
+int hpet_rtc_dropped_irq(void)
+{
+	if (!is_hpet_enabled())
+		return 0;
+
+	return 1;
+}
+
+irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct rtc_time curr_time;
+	unsigned long rtc_int_flag = 0;
+	int call_rtc_interrupt = 0;
+
+	hpet_rtc_timer_reinit();
+
+	if (UIE_on | AIE_on) {
+		rtc_get_rtc_time(&curr_time);
+	}
+	if (UIE_on) {
+		if (curr_time.tm_sec != prev_update_sec) {
+			/* Set update int info, call real rtc int routine */
+			call_rtc_interrupt = 1;
+			rtc_int_flag = RTC_UF;
+			prev_update_sec = curr_time.tm_sec;
+		}
+	}
+	if (PIE_on) {
+		PIE_count++;
+		if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
+			/* Set periodic int info, call real rtc int routine */
+			call_rtc_interrupt = 1;
+			rtc_int_flag |= RTC_PF;
+			PIE_count = 0;
+		}
+	}
+	if (AIE_on) {
+		if ((curr_time.tm_sec == alarm_time.tm_sec) &&
+		    (curr_time.tm_min == alarm_time.tm_min) &&
+		    (curr_time.tm_hour == alarm_time.tm_hour)) {
+			/* Set alarm int info, call real rtc int routine */
+			call_rtc_interrupt = 1;
+			rtc_int_flag |= RTC_AF;
+		}
+	}
+	if (call_rtc_interrupt) {
+		rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
+		rtc_interrupt(rtc_int_flag, dev_id, regs);
+	}
+	return IRQ_HANDLED;
+}
+#endif
+
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
new file mode 100644
index 00000000000..8fa12be658d
--- /dev/null
+++ b/arch/i386/kernel/timers/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for x86 timers
+#
+
+obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
+
+obj-$(CONFIG_X86_CYCLONE_TIMER)	+= timer_cyclone.o
+obj-$(CONFIG_HPET_TIMER)	+= timer_hpet.o
+obj-$(CONFIG_X86_PM_TIMER)	+= timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
new file mode 100644
index 00000000000..f7f90005e22
--- /dev/null
+++ b/arch/i386/kernel/timers/common.c
@@ -0,0 +1,160 @@
+/*
+ *	Common functions used across the timers go here
+ */
+
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <asm/io.h>
+#include <asm/timer.h>
+#include <asm/hpet.h>
+
+#include "mach_timer.h"
+
+/* ------ Calibrate the TSC -------
+ * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
+ * Too much 64-bit arithmetic here to do this cleanly in C, and for
+ * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
+ * output busy loop as low as possible. We avoid reading the CTC registers
+ * directly because of the awkward 8-bit access mechanism of the 82C54
+ * device.
+ */
+
+#define CALIBRATE_TIME	(5 * 1000020/HZ)
+
+unsigned long __init calibrate_tsc(void)
+{
+	mach_prepare_counter();
+
+	{
+		unsigned long startlow, starthigh;
+		unsigned long endlow, endhigh;
+		unsigned long count;
+
+		rdtsc(startlow,starthigh);
+		mach_countup(&count);
+		rdtsc(endlow,endhigh);
+
+
+		/* Error: ECTCNEVERSET */
+		if (count <= 1)
+			goto bad_ctc;
+
+		/* 64-bit subtract - gcc just messes up with long longs */
+		__asm__("subl %2,%0\n\t"
+			"sbbl %3,%1"
+			:"=a" (endlow), "=d" (endhigh)
+			:"g" (startlow), "g" (starthigh),
+			 "0" (endlow), "1" (endhigh));
+
+		/* Error: ECPUTOOFAST */
+		if (endhigh)
+			goto bad_ctc;
+
+		/* Error: ECPUTOOSLOW */
+		if (endlow <= CALIBRATE_TIME)
+			goto bad_ctc;
+
+		__asm__("divl %2"
+			:"=a" (endlow), "=d" (endhigh)
+			:"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
+
+		return endlow;
+	}
+
+	/*
+	 * The CTC wasn't reliable: we got a hit on the very first read,
+	 * or the CPU was so fast/slow that the quotient wouldn't fit in
+	 * 32 bits..
+	 */
+bad_ctc:
+	return 0;
+}
+
+#ifdef CONFIG_HPET_TIMER
+/* ------ Calibrate the TSC using HPET -------
+ * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
+ * Second output is parameter 1 (when non NULL)
+ * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
+ * calibrate_tsc() calibrates the processor TSC by comparing
+ * it to the HPET timer of known frequency.
+ * Too much 64-bit arithmetic here to do this cleanly in C
+ */
+#define CALIBRATE_CNT_HPET 	(5 * hpet_tick)
+#define CALIBRATE_TIME_HPET 	(5 * KERNEL_TICK_USEC)
+
+unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
+{
+	unsigned long tsc_startlow, tsc_starthigh;
+	unsigned long tsc_endlow, tsc_endhigh;
+	unsigned long hpet_start, hpet_end;
+	unsigned long result, remain;
+
+	hpet_start = hpet_readl(HPET_COUNTER);
+	rdtsc(tsc_startlow, tsc_starthigh);
+	do {
+		hpet_end = hpet_readl(HPET_COUNTER);
+	} while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
+	rdtsc(tsc_endlow, tsc_endhigh);
+
+	/* 64-bit subtract - gcc just messes up with long longs */
+	__asm__("subl %2,%0\n\t"
+		"sbbl %3,%1"
+		:"=a" (tsc_endlow), "=d" (tsc_endhigh)
+		:"g" (tsc_startlow), "g" (tsc_starthigh),
+		 "0" (tsc_endlow), "1" (tsc_endhigh));
+
+	/* Error: ECPUTOOFAST */
+	if (tsc_endhigh)
+		goto bad_calibration;
+
+	/* Error: ECPUTOOSLOW */
+	if (tsc_endlow <= CALIBRATE_TIME_HPET)
+		goto bad_calibration;
+
+	ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
+	if (remain > (tsc_endlow >> 1))
+		result++; /* rounding the result */
+
+	if (tsc_hpet_quotient_ptr) {
+		unsigned long tsc_hpet_quotient;
+
+		ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
+			CALIBRATE_CNT_HPET);
+		if (remain > (tsc_endlow >> 1))
+			tsc_hpet_quotient++; /* rounding the result */
+		*tsc_hpet_quotient_ptr = tsc_hpet_quotient;
+	}
+
+	return result;
+bad_calibration:
+	/*
+	 * the CPU was so fast/slow that the quotient wouldn't fit in
+	 * 32 bits..
+	 */
+	return 0;
+}
+#endif
+
+/* calculate cpu_khz */
+void __init init_cpu_khz(void)
+{
+	if (cpu_has_tsc) {
+		unsigned long tsc_quotient = calibrate_tsc();
+		if (tsc_quotient) {
+			/* report CPU clock rate in Hz.
+			 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
+			 * clock/second. Our precision is about 100 ppm.
+			 */
+			{	unsigned long eax=0, edx=1000;
+				__asm__("divl %2"
+		       		:"=a" (cpu_khz), "=d" (edx)
+        	       		:"r" (tsc_quotient),
+	                	"0" (eax), "1" (edx));
+				printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
+			}
+		}
+	}
+}
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
new file mode 100644
index 00000000000..a3d6a288088
--- /dev/null
+++ b/arch/i386/kernel/timers/timer.c
@@ -0,0 +1,66 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/timer.h>
+
+#ifdef CONFIG_HPET_TIMER
+/*
+ * HPET memory read is slower than tsc reads, but is more dependable as it
+ * always runs at constant frequency and reduces complexity due to
+ * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
+ * timer_pit when HPET is active. So, we default to timer_tsc.
+ */
+#endif
+/* list of timers, ordered by preference, NULL terminated */
+static struct init_timer_opts* __initdata timers[] = {
+#ifdef CONFIG_X86_CYCLONE_TIMER
+	&timer_cyclone_init,
+#endif
+#ifdef CONFIG_HPET_TIMER
+	&timer_hpet_init,
+#endif
+#ifdef CONFIG_X86_PM_TIMER
+	&timer_pmtmr_init,
+#endif
+	&timer_tsc_init,
+	&timer_pit_init,
+	NULL,
+};
+
+static char clock_override[10] __initdata;
+
+static int __init clock_setup(char* str)
+{
+	if (str)
+		strlcpy(clock_override, str, sizeof(clock_override));
+	return 1;
+}
+__setup("clock=", clock_setup);
+
+
+/* The chosen timesource has been found to be bad.
+ * Fall back to a known good timesource (the PIT)
+ */
+void clock_fallback(void)
+{
+	cur_timer = &timer_pit;
+}
+
+/* iterates through the list of timers, returning the first 
+ * one that initializes successfully.
+ */
+struct timer_opts* __init select_timer(void)
+{
+	int i = 0;
+	
+	/* find most preferred working timer */
+	while (timers[i]) {
+		if (timers[i]->init)
+			if (timers[i]->init(clock_override) == 0)
+				return timers[i]->opts;
+		++i;
+	}
+		
+	panic("select_timer: Cannot find a suitable timer\n");
+	return NULL;
+}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
new file mode 100644
index 00000000000..f6f1206a11b
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_cyclone.c
@@ -0,0 +1,259 @@
+/*	Cyclone-timer: 
+ *		This code implements timer_ops for the cyclone counter found
+ *		on IBM x440, x360, and other Summit based systems.
+ *
+ *	Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
+ */
+
+
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/jiffies.h>
+
+#include <asm/timer.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include "io_ports.h"
+
+extern spinlock_t i8253_lock;
+
+/* Number of usecs that the last interrupt was delayed */
+static int delay_at_last_interrupt;
+
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0
+#define CYCLONE_PMCC_OFFSET 0x51A0
+#define CYCLONE_MPMC_OFFSET 0x51D0
+#define CYCLONE_MPCS_OFFSET 0x51A8
+#define CYCLONE_TIMER_FREQ 100000000
+#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
+int use_cyclone = 0;
+
+static u32* volatile cyclone_timer;	/* Cyclone MPMC0 register */
+static u32 last_cyclone_low;
+static u32 last_cyclone_high;
+static unsigned long long monotonic_base;
+static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
+
+/* helper macro to atomically read both cyclone counter registers */
+#define read_cyclone_counter(low,high) \
+	do{ \
+		high = cyclone_timer[1]; low = cyclone_timer[0]; \
+	} while (high != cyclone_timer[1]);
+
+
+static void mark_offset_cyclone(void)
+{
+	unsigned long lost, delay;
+	unsigned long delta = last_cyclone_low;
+	int count;
+	unsigned long long this_offset, last_offset;
+
+	write_seqlock(&monotonic_lock);
+	last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
+	
+	spin_lock(&i8253_lock);
+	read_cyclone_counter(last_cyclone_low,last_cyclone_high);
+
+	/* read values for delay_at_last_interrupt */
+	outb_p(0x00, 0x43);     /* latch the count ASAP */
+
+	count = inb_p(0x40);    /* read the latched count */
+	count |= inb(0x40) << 8;
+
+	/*
+	 * VIA686a test code... reset the latch if count > max + 1
+	 * from timer_pit.c - cjb
+	 */
+	if (count > LATCH) {
+		outb_p(0x34, PIT_MODE);
+		outb_p(LATCH & 0xff, PIT_CH0);
+		outb(LATCH >> 8, PIT_CH0);
+		count = LATCH - 1;
+	}
+	spin_unlock(&i8253_lock);
+
+	/* lost tick compensation */
+	delta = last_cyclone_low - delta;	
+	delta /= (CYCLONE_TIMER_FREQ/1000000);
+	delta += delay_at_last_interrupt;
+	lost = delta/(1000000/HZ);
+	delay = delta%(1000000/HZ);
+	if (lost >= 2)
+		jiffies_64 += lost-1;
+	
+	/* update the monotonic base value */
+	this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
+	monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
+	write_sequnlock(&monotonic_lock);
+
+	/* calculate delay_at_last_interrupt */
+	count = ((LATCH-1) - count) * TICK_SIZE;
+	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+
+	/* catch corner case where tick rollover occured 
+	 * between cyclone and pit reads (as noted when 
+	 * usec delta is > 90% # of usecs/tick)
+	 */
+	if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
+		jiffies_64++;
+}
+
+static unsigned long get_offset_cyclone(void)
+{
+	u32 offset;
+
+	if(!cyclone_timer)
+		return delay_at_last_interrupt;
+
+	/* Read the cyclone timer */
+	offset = cyclone_timer[0];
+
+	/* .. relative to previous jiffy */
+	offset = offset - last_cyclone_low;
+
+	/* convert cyclone ticks to microseconds */	
+	/* XXX slow, can we speed this up? */
+	offset = offset/(CYCLONE_TIMER_FREQ/1000000);
+
+	/* our adjusted time offset in microseconds */
+	return delay_at_last_interrupt + offset;
+}
+
+static unsigned long long monotonic_clock_cyclone(void)
+{
+	u32 now_low, now_high;
+	unsigned long long last_offset, this_offset, base;
+	unsigned long long ret;
+	unsigned seq;
+
+	/* atomically read monotonic base & last_offset */
+	do {
+		seq = read_seqbegin(&monotonic_lock);
+		last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
+		base = monotonic_base;
+	} while (read_seqretry(&monotonic_lock, seq));
+
+
+	/* Read the cyclone counter */
+	read_cyclone_counter(now_low,now_high);
+	this_offset = ((unsigned long long)now_high<<32)|now_low;
+
+	/* convert to nanoseconds */
+	ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
+	return ret * (1000000000 / CYCLONE_TIMER_FREQ);
+}
+
+static int __init init_cyclone(char* override)
+{
+	u32* reg;	
+	u32 base;		/* saved cyclone base address */
+	u32 pageaddr;	/* page that contains cyclone_timer register */
+	u32 offset;		/* offset from pageaddr to cyclone_timer register */
+	int i;
+	
+	/* check clock override */
+	if (override[0] && strncmp(override,"cyclone",7))
+			return -ENODEV;
+
+	/*make sure we're on a summit box*/
+	if(!use_cyclone) return -ENODEV; 
+	
+	printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
+
+	/* find base address */
+	pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
+	offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
+	set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
+	reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
+		return -ENODEV;
+	}
+	base = *reg;	
+	if(!base){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
+		return -ENODEV;
+	}
+	
+	/* setup PMCC */
+	pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
+	offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
+	set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
+	reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
+		return -ENODEV;
+	}
+	reg[0] = 0x00000001;
+
+	/* setup MPCS */
+	pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
+	offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
+	set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
+	reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
+		return -ENODEV;
+	}
+	reg[0] = 0x00000001;
+
+	/* map in cyclone_timer */
+	pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
+	offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
+	set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
+	cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
+	if(!cyclone_timer){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
+		return -ENODEV;
+	}
+
+	/*quick test to make sure its ticking*/
+	for(i=0; i<3; i++){
+		u32 old = cyclone_timer[0];
+		int stall = 100;
+		while(stall--) barrier();
+		if(cyclone_timer[0] == old){
+			printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
+			cyclone_timer = 0;
+			return -ENODEV;
+		}
+	}
+
+	init_cpu_khz();
+
+	/* Everything looks good! */
+	return 0;
+}
+
+
+static void delay_cyclone(unsigned long loops)
+{
+	unsigned long bclock, now;
+	if(!cyclone_timer)
+		return;
+	bclock = cyclone_timer[0];
+	do {
+		rep_nop();
+		now = cyclone_timer[0];
+	} while ((now-bclock) < loops);
+}
+/************************************************************/
+
+/* cyclone timer_opts struct */
+static struct timer_opts timer_cyclone = {
+	.name = "cyclone",
+	.mark_offset = mark_offset_cyclone, 
+	.get_offset = get_offset_cyclone,
+	.monotonic_clock =	monotonic_clock_cyclone,
+	.delay = delay_cyclone,
+};
+
+struct init_timer_opts __initdata timer_cyclone_init = {
+	.init = init_cyclone,
+	.opts = &timer_cyclone,
+};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
new file mode 100644
index 00000000000..713134e7184
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -0,0 +1,191 @@
+/*
+ * This code largely moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/jiffies.h>
+
+#include <asm/timer.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#include "io_ports.h"
+#include "mach_timer.h"
+#include <asm/hpet.h>
+
+static unsigned long hpet_usec_quotient;	/* convert hpet clks to usec */
+static unsigned long tsc_hpet_quotient;		/* convert tsc to hpet clks */
+static unsigned long hpet_last; 	/* hpet counter value at last tick*/
+static unsigned long last_tsc_low;	/* lsb 32 bits of Time Stamp Counter */
+static unsigned long last_tsc_high; 	/* msb 32 bits of Time Stamp Counter */
+static unsigned long long monotonic_base;
+static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
+
+/* convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *		ns = cycles / (freq / ns_per_sec)
+ *		ns = cycles * (ns_per_sec / freq)
+ *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ *		ns = cycles * (10^3 / cpu_mhz)
+ *
+ *	Then we use scaling math (suggested by george@mvista.com) to get:
+ *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ *		ns = cycles * cyc2ns_scale / SC
+ *
+ *	And since SC is a constant power of two, we can convert the div
+ *  into a shift.
+ *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale;
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+static unsigned long long monotonic_clock_hpet(void)
+{
+	unsigned long long last_offset, this_offset, base;
+	unsigned seq;
+
+	/* atomically read monotonic base & last_offset */
+	do {
+		seq = read_seqbegin(&monotonic_lock);
+		last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+		base = monotonic_base;
+	} while (read_seqretry(&monotonic_lock, seq));
+
+	/* Read the Time Stamp Counter */
+	rdtscll(this_offset);
+
+	/* return the value in ns */
+	return base + cycles_2_ns(this_offset - last_offset);
+}
+
+static unsigned long get_offset_hpet(void)
+{
+	register unsigned long eax, edx;
+
+	eax = hpet_readl(HPET_COUNTER);
+	eax -= hpet_last;	/* hpet delta */
+
+	/*
+         * Time offset = (hpet delta) * ( usecs per HPET clock )
+	 *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
+	 *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
+	 *
+	 * Where,
+	 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
+	 *
+	 * Using a mull instead of a divl saves some cycles in critical path.
+         */
+	ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
+
+	/* our adjusted time offset in microseconds */
+	return edx;
+}
+
+static void mark_offset_hpet(void)
+{
+	unsigned long long this_offset, last_offset;
+	unsigned long offset;
+
+	write_seqlock(&monotonic_lock);
+	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+	rdtsc(last_tsc_low, last_tsc_high);
+
+	offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+	if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
+		int lost_ticks = (offset - hpet_last) / hpet_tick;
+		jiffies_64 += lost_ticks;
+	}
+	hpet_last = offset;
+
+	/* update the monotonic base value */
+	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+	monotonic_base += cycles_2_ns(this_offset - last_offset);
+	write_sequnlock(&monotonic_lock);
+}
+
+static void delay_hpet(unsigned long loops)
+{
+	unsigned long hpet_start, hpet_end;
+	unsigned long eax;
+
+	/* loops is the number of cpu cycles. Convert it to hpet clocks */
+	ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
+
+	hpet_start = hpet_readl(HPET_COUNTER);
+	do {
+		rep_nop();
+		hpet_end = hpet_readl(HPET_COUNTER);
+	} while ((hpet_end - hpet_start) < (loops));
+}
+
+static int __init init_hpet(char* override)
+{
+	unsigned long result, remain;
+
+	/* check clock override */
+	if (override[0] && strncmp(override,"hpet",4))
+		return -ENODEV;
+
+	if (!is_hpet_enabled())
+		return -ENODEV;
+
+	printk("Using HPET for gettimeofday\n");
+	if (cpu_has_tsc) {
+		unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
+		if (tsc_quotient) {
+			/* report CPU clock rate in Hz.
+			 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
+			 * clock/second. Our precision is about 100 ppm.
+			 */
+			{	unsigned long eax=0, edx=1000;
+				ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
+						eax, edx);
+				printk("Detected %lu.%03lu MHz processor.\n",
+					cpu_khz / 1000, cpu_khz % 1000);
+			}
+			set_cyc2ns_scale(cpu_khz/1000);
+		}
+	}
+
+	/*
+	 * Math to calculate hpet to usec multiplier
+	 * Look for the comments at get_offset_hpet()
+	 */
+	ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
+	if (remain > (hpet_tick >> 1))
+		result++; /* rounding the result */
+	hpet_usec_quotient = result;
+
+	return 0;
+}
+
+/************************************************************/
+
+/* tsc timer_opts struct */
+static struct timer_opts timer_hpet = {
+	.name = 		"hpet",
+	.mark_offset =		mark_offset_hpet,
+	.get_offset =		get_offset_hpet,
+	.monotonic_clock =	monotonic_clock_hpet,
+	.delay = 		delay_hpet,
+};
+
+struct init_timer_opts __initdata timer_hpet_init = {
+	.init =	init_hpet,
+	.opts = &timer_hpet,
+};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
new file mode 100644
index 00000000000..4ea2f414dbb
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_none.c
@@ -0,0 +1,39 @@
+#include <linux/init.h>
+#include <asm/timer.h>
+
+static void mark_offset_none(void)
+{
+	/* nothing needed */
+}
+
+static unsigned long get_offset_none(void)
+{
+	return 0;
+}
+
+static unsigned long long monotonic_clock_none(void)
+{
+	return 0;
+}
+
+static void delay_none(unsigned long loops)
+{
+	int d0;
+	__asm__ __volatile__(
+		"\tjmp 1f\n"
+		".align 16\n"
+		"1:\tjmp 2f\n"
+		".align 16\n"
+		"2:\tdecl %0\n\tjns 2b"
+		:"=&a" (d0)
+		:"0" (loops));
+}
+
+/* none timer_opts struct */
+struct timer_opts timer_none = {
+	.name = 	"none",
+	.mark_offset =	mark_offset_none, 
+	.get_offset =	get_offset_none,
+	.monotonic_clock =	monotonic_clock_none,
+	.delay = delay_none,
+};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
new file mode 100644
index 00000000000..967d5453cd0
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -0,0 +1,206 @@
+/*
+ * This code largely moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/irq.h>
+#include <linux/sysdev.h>
+#include <linux/timex.h>
+#include <asm/delay.h>
+#include <asm/mpspec.h>
+#include <asm/timer.h>
+#include <asm/smp.h>
+#include <asm/io.h>
+#include <asm/arch_hooks.h>
+
+extern spinlock_t i8259A_lock;
+extern spinlock_t i8253_lock;
+#include "do_timer.h"
+#include "io_ports.h"
+
+static int count_p; /* counter in get_offset_pit() */
+
+static int __init init_pit(char* override)
+{
+ 	/* check clock override */
+ 	if (override[0] && strncmp(override,"pit",3))
+ 		printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n");
+ 
+	count_p = LATCH;
+	return 0;
+}
+
+static void mark_offset_pit(void)
+{
+	/* nothing needed */
+}
+
+static unsigned long long monotonic_clock_pit(void)
+{
+	return 0;
+}
+
+static void delay_pit(unsigned long loops)
+{
+	int d0;
+	__asm__ __volatile__(
+		"\tjmp 1f\n"
+		".align 16\n"
+		"1:\tjmp 2f\n"
+		".align 16\n"
+		"2:\tdecl %0\n\tjns 2b"
+		:"=&a" (d0)
+		:"0" (loops));
+}
+
+
+/* This function must be called with xtime_lock held.
+ * It was inspired by Steve McCanne's microtime-i386 for BSD.  -- jrs
+ * 
+ * However, the pc-audio speaker driver changes the divisor so that
+ * it gets interrupted rather more often - it loads 64 into the
+ * counter rather than 11932! This has an adverse impact on
+ * do_gettimeoffset() -- it stops working! What is also not
+ * good is that the interval that our timer function gets called
+ * is no longer 10.0002 ms, but 9.9767 ms. To get around this
+ * would require using a different timing source. Maybe someone
+ * could use the RTC - I know that this can interrupt at frequencies
+ * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
+ * it so that at startup, the timer code in sched.c would select
+ * using either the RTC or the 8253 timer. The decision would be
+ * based on whether there was any other device around that needed
+ * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
+ * and then do some jiggery to have a version of do_timer that 
+ * advanced the clock by 1/1024 s. Every time that reached over 1/100
+ * of a second, then do all the old code. If the time was kept correct
+ * then do_gettimeoffset could just return 0 - there is no low order
+ * divider that can be accessed.
+ *
+ * Ideally, you would be able to use the RTC for the speaker driver,
+ * but it appears that the speaker driver really needs interrupt more
+ * often than every 120 us or so.
+ *
+ * Anyway, this needs more thought....		pjsg (1993-08-28)
+ * 
+ * If you are really that interested, you should be reading
+ * comp.protocols.time.ntp!
+ */
+
+static unsigned long get_offset_pit(void)
+{
+	int count;
+	unsigned long flags;
+	static unsigned long jiffies_p = 0;
+
+	/*
+	 * cache volatile jiffies temporarily; we have xtime_lock. 
+	 */
+	unsigned long jiffies_t;
+
+	spin_lock_irqsave(&i8253_lock, flags);
+	/* timer count may underflow right here */
+	outb_p(0x00, PIT_MODE);	/* latch the count ASAP */
+
+	count = inb_p(PIT_CH0);	/* read the latched count */
+
+	/*
+	 * We do this guaranteed double memory access instead of a _p 
+	 * postfix in the previous port access. Wheee, hackady hack
+	 */
+ 	jiffies_t = jiffies;
+
+	count |= inb_p(PIT_CH0) << 8;
+	
+        /* VIA686a test code... reset the latch if count > max + 1 */
+        if (count > LATCH) {
+                outb_p(0x34, PIT_MODE);
+                outb_p(LATCH & 0xff, PIT_CH0);
+                outb(LATCH >> 8, PIT_CH0);
+                count = LATCH - 1;
+        }
+	
+	/*
+	 * avoiding timer inconsistencies (they are rare, but they happen)...
+	 * there are two kinds of problems that must be avoided here:
+	 *  1. the timer counter underflows
+	 *  2. hardware problem with the timer, not giving us continuous time,
+	 *     the counter does small "jumps" upwards on some Pentium systems,
+	 *     (see c't 95/10 page 335 for Neptun bug.)
+	 */
+
+	if( jiffies_t == jiffies_p ) {
+		if( count > count_p ) {
+			/* the nutcase */
+			count = do_timer_overflow(count);
+		}
+	} else
+		jiffies_p = jiffies_t;
+
+	count_p = count;
+
+	spin_unlock_irqrestore(&i8253_lock, flags);
+
+	count = ((LATCH-1) - count) * TICK_SIZE;
+	count = (count + LATCH/2) / LATCH;
+
+	return count;
+}
+
+
+/* tsc timer_opts struct */
+struct timer_opts timer_pit = {
+	.name = "pit",
+	.mark_offset = mark_offset_pit, 
+	.get_offset = get_offset_pit,
+	.monotonic_clock = monotonic_clock_pit,
+	.delay = delay_pit,
+};
+
+struct init_timer_opts __initdata timer_pit_init = {
+	.init = init_pit, 
+	.opts = &timer_pit,
+};
+
+void setup_pit_timer(void)
+{
+	extern spinlock_t i8253_lock;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8253_lock, flags);
+	outb_p(0x34,PIT_MODE);		/* binary, mode 2, LSB/MSB, ch 0 */
+	udelay(10);
+	outb_p(LATCH & 0xff , PIT_CH0);	/* LSB */
+	udelay(10);
+	outb(LATCH >> 8 , PIT_CH0);	/* MSB */
+	spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static int timer_resume(struct sys_device *dev)
+{
+	setup_pit_timer();
+	return 0;
+}
+
+static struct sysdev_class timer_sysclass = {
+	set_kset_name("timer_pit"),
+	.resume	= timer_resume,
+};
+
+static struct sys_device device_timer = {
+	.id	= 0,
+	.cls	= &timer_sysclass,
+};
+
+static int __init init_timer_sysfs(void)
+{
+	int error = sysdev_class_register(&timer_sysclass);
+	if (!error)
+		error = sysdev_register(&device_timer);
+	return error;
+}
+
+device_initcall(init_timer_sysfs);
+
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
new file mode 100644
index 00000000000..d77f22030fe
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -0,0 +1,258 @@
+/*
+ * (C) Dominik Brodowski <linux@brodo.de> 2003
+ *
+ * Driver to use the Power Management Timer (PMTMR) available in some
+ * southbridges as primary timing source for the Linux kernel.
+ *
+ * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
+ * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
+ *
+ * This file is licensed under the GPL v2.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <asm/types.h>
+#include <asm/timer.h>
+#include <asm/smp.h>
+#include <asm/io.h>
+#include <asm/arch_hooks.h>
+
+#include <linux/timex.h>
+#include "mach_timer.h"
+
+/* Number of PMTMR ticks expected during calibration run */
+#define PMTMR_TICKS_PER_SEC 3579545
+#define PMTMR_EXPECTED_RATE \
+  ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
+
+
+/* The I/O port the PMTMR resides at.
+ * The location is detected during setup_arch(),
+ * in arch/i386/acpi/boot.c */
+u32 pmtmr_ioport = 0;
+
+
+/* value of the Power timer at last timer interrupt */
+static u32 offset_tick;
+static u32 offset_delay;
+
+static unsigned long long monotonic_base;
+static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
+
+#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
+
+/*helper function to safely read acpi pm timesource*/
+static inline u32 read_pmtmr(void)
+{
+	u32 v1=0,v2=0,v3=0;
+	/* It has been reported that because of various broken
+	 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
+	 * source is not latched, so you must read it multiple
+	 * times to insure a safe value is read.
+	 */
+	do {
+		v1 = inl(pmtmr_ioport);
+		v2 = inl(pmtmr_ioport);
+		v3 = inl(pmtmr_ioport);
+	} while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+			|| (v3 > v1 && v3 < v2));
+
+	/* mask the output to 24 bits */
+	return v2 & ACPI_PM_MASK;
+}
+
+
+/*
+ * Some boards have the PMTMR running way too fast. We check
+ * the PMTMR rate against PIT channel 2 to catch these cases.
+ */
+static int verify_pmtmr_rate(void)
+{
+	u32 value1, value2;
+	unsigned long count, delta;
+
+	mach_prepare_counter();
+	value1 = read_pmtmr();
+	mach_countup(&count);
+	value2 = read_pmtmr();
+	delta = (value2 - value1) & ACPI_PM_MASK;
+
+	/* Check that the PMTMR delta is within 5% of what we expect */
+	if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
+	    delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
+		printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
+		return -1;
+	}
+
+	return 0;
+}
+
+
+static int init_pmtmr(char* override)
+{
+	u32 value1, value2;
+	unsigned int i;
+
+ 	if (override[0] && strncmp(override,"pmtmr",5))
+		return -ENODEV;
+
+	if (!pmtmr_ioport)
+		return -ENODEV;
+
+	/* we use the TSC for delay_pmtmr, so make sure it exists */
+	if (!cpu_has_tsc)
+		return -ENODEV;
+
+	/* "verify" this timing source */
+	value1 = read_pmtmr();
+	for (i = 0; i < 10000; i++) {
+		value2 = read_pmtmr();
+		if (value2 == value1)
+			continue;
+		if (value2 > value1)
+			goto pm_good;
+		if ((value2 < value1) && ((value2) < 0xFFF))
+			goto pm_good;
+		printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
+		return -EINVAL;
+	}
+	printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
+	return -ENODEV;
+
+pm_good:
+	if (verify_pmtmr_rate() != 0)
+		return -ENODEV;
+
+	init_cpu_khz();
+	return 0;
+}
+
+static inline u32 cyc2us(u32 cycles)
+{
+	/* The Power Management Timer ticks at 3.579545 ticks per microsecond.
+	 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
+	 *
+	 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
+	 * easily be multiplied with 286 (=0x11E) without having to fear
+	 * u32 overflows.
+	 */
+	cycles *= 286;
+	return (cycles >> 10);
+}
+
+/*
+ * this gets called during each timer interrupt
+ *   - Called while holding the writer xtime_lock
+ */
+static void mark_offset_pmtmr(void)
+{
+	u32 lost, delta, last_offset;
+	static int first_run = 1;
+	last_offset = offset_tick;
+
+	write_seqlock(&monotonic_lock);
+
+	offset_tick = read_pmtmr();
+
+	/* calculate tick interval */
+	delta = (offset_tick - last_offset) & ACPI_PM_MASK;
+
+	/* convert to usecs */
+	delta = cyc2us(delta);
+
+	/* update the monotonic base value */
+	monotonic_base += delta * NSEC_PER_USEC;
+	write_sequnlock(&monotonic_lock);
+
+	/* convert to ticks */
+	delta += offset_delay;
+	lost = delta / (USEC_PER_SEC / HZ);
+	offset_delay = delta % (USEC_PER_SEC / HZ);
+
+
+	/* compensate for lost ticks */
+	if (lost >= 2)
+		jiffies_64 += lost - 1;
+
+	/* don't calculate delay for first run,
+	   or if we've got less then a tick */
+	if (first_run || (lost < 1)) {
+		first_run = 0;
+		offset_delay = 0;
+	}
+}
+
+
+static unsigned long long monotonic_clock_pmtmr(void)
+{
+	u32 last_offset, this_offset;
+	unsigned long long base, ret;
+	unsigned seq;
+
+
+	/* atomically read monotonic base & last_offset */
+	do {
+		seq = read_seqbegin(&monotonic_lock);
+		last_offset = offset_tick;
+		base = monotonic_base;
+	} while (read_seqretry(&monotonic_lock, seq));
+
+	/* Read the pmtmr */
+	this_offset =  read_pmtmr();
+
+	/* convert to nanoseconds */
+	ret = (this_offset - last_offset) & ACPI_PM_MASK;
+	ret = base + (cyc2us(ret) * NSEC_PER_USEC);
+	return ret;
+}
+
+static void delay_pmtmr(unsigned long loops)
+{
+	unsigned long bclock, now;
+
+	rdtscl(bclock);
+	do
+	{
+		rep_nop();
+		rdtscl(now);
+	} while ((now-bclock) < loops);
+}
+
+
+/*
+ * get the offset (in microseconds) from the last call to mark_offset()
+ *	- Called holding a reader xtime_lock
+ */
+static unsigned long get_offset_pmtmr(void)
+{
+	u32 now, offset, delta = 0;
+
+	offset = offset_tick;
+	now = read_pmtmr();
+	delta = (now - offset)&ACPI_PM_MASK;
+
+	return (unsigned long) offset_delay + cyc2us(delta);
+}
+
+
+/* acpi timer_opts struct */
+static struct timer_opts timer_pmtmr = {
+	.name			= "pmtmr",
+	.mark_offset		= mark_offset_pmtmr,
+	.get_offset		= get_offset_pmtmr,
+	.monotonic_clock 	= monotonic_clock_pmtmr,
+	.delay 			= delay_pmtmr,
+};
+
+struct init_timer_opts __initdata timer_pmtmr_init = {
+	.init = init_pmtmr,
+	.opts = &timer_pmtmr,
+};
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
new file mode 100644
index 00000000000..a685994e5c8
--- /dev/null
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -0,0 +1,560 @@
+/*
+ * This code largely moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ *
+ * 2004-06-25    Jesper Juhl
+ *      moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
+ *      failing to inline.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/cpufreq.h>
+#include <linux/string.h>
+#include <linux/jiffies.h>
+
+#include <asm/timer.h>
+#include <asm/io.h>
+/* processor.h for distable_tsc flag */
+#include <asm/processor.h>
+
+#include "io_ports.h"
+#include "mach_timer.h"
+
+#include <asm/hpet.h>
+
+#ifdef CONFIG_HPET_TIMER
+static unsigned long hpet_usec_quotient;
+static unsigned long hpet_last;
+static struct timer_opts timer_tsc;
+#endif
+
+static inline void cpufreq_delayed_get(void);
+
+int tsc_disable __initdata = 0;
+
+extern spinlock_t i8253_lock;
+
+static int use_tsc;
+/* Number of usecs that the last interrupt was delayed */
+static int delay_at_last_interrupt;
+
+static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
+static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
+static unsigned long long monotonic_base;
+static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
+
+/* convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *		ns = cycles / (freq / ns_per_sec)
+ *		ns = cycles * (ns_per_sec / freq)
+ *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ *		ns = cycles * (10^3 / cpu_mhz)
+ *
+ *	Then we use scaling math (suggested by george@mvista.com) to get:
+ *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ *		ns = cycles * cyc2ns_scale / SC
+ *
+ *	And since SC is a constant power of two, we can convert the div
+ *  into a shift.   
+ *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale; 
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+static int count2; /* counter for mark_offset_tsc() */
+
+/* Cached *multiplier* to convert TSC counts to microseconds.
+ * (see the equation below).
+ * Equal to 2^32 * (1 / (clocks per usec) ).
+ * Initialized in time_init.
+ */
+static unsigned long fast_gettimeoffset_quotient;
+
+static unsigned long get_offset_tsc(void)
+{
+	register unsigned long eax, edx;
+
+	/* Read the Time Stamp Counter */
+
+	rdtsc(eax,edx);
+
+	/* .. relative to previous jiffy (32 bits is enough) */
+	eax -= last_tsc_low;	/* tsc_low delta */
+
+	/*
+         * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
+         *             = (tsc_low delta) * (usecs_per_clock)
+         *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
+	 *
+	 * Using a mull instead of a divl saves up to 31 clock cycles
+	 * in the critical path.
+         */
+
+	__asm__("mull %2"
+		:"=a" (eax), "=d" (edx)
+		:"rm" (fast_gettimeoffset_quotient),
+		 "0" (eax));
+
+	/* our adjusted time offset in microseconds */
+	return delay_at_last_interrupt + edx;
+}
+
+static unsigned long long monotonic_clock_tsc(void)
+{
+	unsigned long long last_offset, this_offset, base;
+	unsigned seq;
+	
+	/* atomically read monotonic base & last_offset */
+	do {
+		seq = read_seqbegin(&monotonic_lock);
+		last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+		base = monotonic_base;
+	} while (read_seqretry(&monotonic_lock, seq));
+
+	/* Read the Time Stamp Counter */
+	rdtscll(this_offset);
+
+	/* return the value in ns */
+	return base + cycles_2_ns(this_offset - last_offset);
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+	unsigned long long this_offset;
+
+	/*
+	 * In the NUMA case we dont use the TSC as they are not
+	 * synchronized across all CPUs.
+	 */
+#ifndef CONFIG_NUMA
+	if (!use_tsc)
+#endif
+		/* no locking but a rare wrong value is not a big deal */
+		return jiffies_64 * (1000000000 / HZ);
+
+	/* Read the Time Stamp Counter */
+	rdtscll(this_offset);
+
+	/* return the value in ns */
+	return cycles_2_ns(this_offset);
+}
+
+static void delay_tsc(unsigned long loops)
+{
+	unsigned long bclock, now;
+	
+	rdtscl(bclock);
+	do
+	{
+		rep_nop();
+		rdtscl(now);
+	} while ((now-bclock) < loops);
+}
+
+#ifdef CONFIG_HPET_TIMER
+static void mark_offset_tsc_hpet(void)
+{
+	unsigned long long this_offset, last_offset;
+ 	unsigned long offset, temp, hpet_current;
+
+	write_seqlock(&monotonic_lock);
+	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+	/*
+	 * It is important that these two operations happen almost at
+	 * the same time. We do the RDTSC stuff first, since it's
+	 * faster. To avoid any inconsistencies, we need interrupts
+	 * disabled locally.
+	 */
+	/*
+	 * Interrupts are just disabled locally since the timer irq
+	 * has the SA_INTERRUPT flag set. -arca
+	 */
+	/* read Pentium cycle counter */
+
+	hpet_current = hpet_readl(HPET_COUNTER);
+	rdtsc(last_tsc_low, last_tsc_high);
+
+	/* lost tick compensation */
+	offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+	if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
+		int lost_ticks = (offset - hpet_last) / hpet_tick;
+		jiffies_64 += lost_ticks;
+	}
+	hpet_last = hpet_current;
+
+	/* update the monotonic base value */
+	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+	monotonic_base += cycles_2_ns(this_offset - last_offset);
+	write_sequnlock(&monotonic_lock);
+
+	/* calculate delay_at_last_interrupt */
+	/*
+	 * Time offset = (hpet delta) * ( usecs per HPET clock )
+	 *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
+	 *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
+	 * Where,
+	 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
+	 */
+	delay_at_last_interrupt = hpet_current - offset;
+	ASM_MUL64_REG(temp, delay_at_last_interrupt,
+			hpet_usec_quotient, delay_at_last_interrupt);
+}
+#endif
+
+
+#ifdef CONFIG_CPU_FREQ
+#include <linux/workqueue.h>
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+	unsigned int cpu;
+	for_each_online_cpu(cpu) {
+		cpufreq_get(cpu);
+	}
+	cpufreq_delayed_issched = 0;
+}
+
+/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+static inline void cpufreq_delayed_get(void) 
+{
+	if (cpufreq_init && !cpufreq_delayed_issched) {
+		cpufreq_delayed_issched = 1;
+		printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
+		schedule_work(&cpufreq_delayed_get_work);
+	}
+}
+
+/* If the CPU frequency is scaled, TSC-based delays will need a different
+ * loops_per_jiffy value to function properly.
+ */
+
+static unsigned int  ref_freq = 0;
+static unsigned long loops_per_jiffy_ref = 0;
+
+#ifndef CONFIG_SMP
+static unsigned long fast_gettimeoffset_ref = 0;
+static unsigned long cpu_khz_ref = 0;
+#endif
+
+static int
+time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+		       void *data)
+{
+	struct cpufreq_freqs *freq = data;
+
+	if (val != CPUFREQ_RESUMECHANGE)
+		write_seqlock_irq(&xtime_lock);
+	if (!ref_freq) {
+		ref_freq = freq->old;
+		loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+#ifndef CONFIG_SMP
+		fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
+		cpu_khz_ref = cpu_khz;
+#endif
+	}
+
+	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+	    (val == CPUFREQ_RESUMECHANGE)) {
+		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+			cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+#ifndef CONFIG_SMP
+		if (cpu_khz)
+			cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
+		if (use_tsc) {
+			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
+				fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
+				set_cyc2ns_scale(cpu_khz/1000);
+			}
+		}
+#endif
+	}
+
+	if (val != CPUFREQ_RESUMECHANGE)
+		write_sequnlock_irq(&xtime_lock);
+
+	return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+	.notifier_call	= time_cpufreq_notifier
+};
+
+
+static int __init cpufreq_tsc(void)
+{
+	int ret;
+	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+	ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
+					CPUFREQ_TRANSITION_NOTIFIER);
+	if (!ret)
+		cpufreq_init = 1;
+	return ret;
+}
+core_initcall(cpufreq_tsc);
+
+#else /* CONFIG_CPU_FREQ */
+static inline void cpufreq_delayed_get(void) { return; }
+#endif 
+
+static void mark_offset_tsc(void)
+{
+	unsigned long lost,delay;
+	unsigned long delta = last_tsc_low;
+	int count;
+	int countmp;
+	static int count1 = 0;
+	unsigned long long this_offset, last_offset;
+	static int lost_count = 0;
+
+	write_seqlock(&monotonic_lock);
+	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+	/*
+	 * It is important that these two operations happen almost at
+	 * the same time. We do the RDTSC stuff first, since it's
+	 * faster. To avoid any inconsistencies, we need interrupts
+	 * disabled locally.
+	 */
+
+	/*
+	 * Interrupts are just disabled locally since the timer irq
+	 * has the SA_INTERRUPT flag set. -arca
+	 */
+
+	/* read Pentium cycle counter */
+
+	rdtsc(last_tsc_low, last_tsc_high);
+
+	spin_lock(&i8253_lock);
+	outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
+
+	count = inb_p(PIT_CH0);    /* read the latched count */
+	count |= inb(PIT_CH0) << 8;
+
+	/*
+	 * VIA686a test code... reset the latch if count > max + 1
+	 * from timer_pit.c - cjb
+	 */
+	if (count > LATCH) {
+		outb_p(0x34, PIT_MODE);
+		outb_p(LATCH & 0xff, PIT_CH0);
+		outb(LATCH >> 8, PIT_CH0);
+		count = LATCH - 1;
+	}
+
+	spin_unlock(&i8253_lock);
+
+	if (pit_latch_buggy) {
+		/* get center value of last 3 time lutch */
+		if ((count2 >= count && count >= count1)
+		    || (count1 >= count && count >= count2)) {
+			count2 = count1; count1 = count;
+		} else if ((count1 >= count2 && count2 >= count)
+			   || (count >= count2 && count2 >= count1)) {
+			countmp = count;count = count2;
+			count2 = count1;count1 = countmp;
+		} else {
+			count2 = count1; count1 = count; count = count1;
+		}
+	}
+
+	/* lost tick compensation */
+	delta = last_tsc_low - delta;
+	{
+		register unsigned long eax, edx;
+		eax = delta;
+		__asm__("mull %2"
+		:"=a" (eax), "=d" (edx)
+		:"rm" (fast_gettimeoffset_quotient),
+		 "0" (eax));
+		delta = edx;
+	}
+	delta += delay_at_last_interrupt;
+	lost = delta/(1000000/HZ);
+	delay = delta%(1000000/HZ);
+	if (lost >= 2) {
+		jiffies_64 += lost-1;
+
+		/* sanity check to ensure we're not always losing ticks */
+		if (lost_count++ > 100) {
+			printk(KERN_WARNING "Losing too many ticks!\n");
+			printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
+			printk(KERN_WARNING "Possible reasons for this are:\n");
+			printk(KERN_WARNING "  You're running with Speedstep,\n");
+			printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
+			printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
+			printk(KERN_WARNING "Falling back to a sane timesource now.\n");
+
+			clock_fallback();
+		}
+		/* ... but give the TSC a fair chance */
+		if (lost_count > 25)
+			cpufreq_delayed_get();
+	} else
+		lost_count = 0;
+	/* update the monotonic base value */
+	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+	monotonic_base += cycles_2_ns(this_offset - last_offset);
+	write_sequnlock(&monotonic_lock);
+
+	/* calculate delay_at_last_interrupt */
+	count = ((LATCH-1) - count) * TICK_SIZE;
+	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+	/* catch corner case where tick rollover occured
+	 * between tsc and pit reads (as noted when
+	 * usec delta is > 90% # of usecs/tick)
+	 */
+	if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
+		jiffies_64++;
+}
+
+static int __init init_tsc(char* override)
+{
+
+	/* check clock override */
+	if (override[0] && strncmp(override,"tsc",3)) {
+#ifdef CONFIG_HPET_TIMER
+		if (is_hpet_enabled()) {
+			printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
+		} else
+#endif
+		{
+			return -ENODEV;
+		}
+	}
+
+	/*
+	 * If we have APM enabled or the CPU clock speed is variable
+	 * (CPU stops clock on HLT or slows clock to save power)
+	 * then the TSC timestamps may diverge by up to 1 jiffy from
+	 * 'real time' but nothing will break.
+	 * The most frequent case is that the CPU is "woken" from a halt
+	 * state by the timer interrupt itself, so we get 0 error. In the
+	 * rare cases where a driver would "wake" the CPU and request a
+	 * timestamp, the maximum error is < 1 jiffy. But timestamps are
+	 * still perfectly ordered.
+	 * Note that the TSC counter will be reset if APM suspends
+	 * to disk; this won't break the kernel, though, 'cuz we're
+	 * smart.  See arch/i386/kernel/apm.c.
+	 */
+ 	/*
+ 	 *	Firstly we have to do a CPU check for chips with
+ 	 * 	a potentially buggy TSC. At this point we haven't run
+ 	 *	the ident/bugs checks so we must run this hook as it
+ 	 *	may turn off the TSC flag.
+ 	 *
+ 	 *	NOTE: this doesn't yet handle SMP 486 machines where only
+ 	 *	some CPU's have a TSC. Thats never worked and nobody has
+ 	 *	moaned if you have the only one in the world - you fix it!
+ 	 */
+
+	count2 = LATCH; /* initialize counter for mark_offset_tsc() */
+
+	if (cpu_has_tsc) {
+		unsigned long tsc_quotient;
+#ifdef CONFIG_HPET_TIMER
+		if (is_hpet_enabled()){
+			unsigned long result, remain;
+			printk("Using TSC for gettimeofday\n");
+			tsc_quotient = calibrate_tsc_hpet(NULL);
+			timer_tsc.mark_offset = &mark_offset_tsc_hpet;
+			/*
+			 * Math to calculate hpet to usec multiplier
+			 * Look for the comments at get_offset_tsc_hpet()
+			 */
+			ASM_DIV64_REG(result, remain, hpet_tick,
+					0, KERNEL_TICK_USEC);
+			if (remain > (hpet_tick >> 1))
+				result++; /* rounding the result */
+
+			hpet_usec_quotient = result;
+		} else
+#endif
+		{
+			tsc_quotient = calibrate_tsc();
+		}
+
+		if (tsc_quotient) {
+			fast_gettimeoffset_quotient = tsc_quotient;
+			use_tsc = 1;
+			/*
+			 *	We could be more selective here I suspect
+			 *	and just enable this for the next intel chips ?
+			 */
+			/* report CPU clock rate in Hz.
+			 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
+			 * clock/second. Our precision is about 100 ppm.
+			 */
+			{	unsigned long eax=0, edx=1000;
+				__asm__("divl %2"
+		       		:"=a" (cpu_khz), "=d" (edx)
+        	       		:"r" (tsc_quotient),
+	                	"0" (eax), "1" (edx));
+				printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
+			}
+			set_cyc2ns_scale(cpu_khz/1000);
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+#ifndef CONFIG_X86_TSC
+/* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
+ * in cpu/common.c */
+static int __init tsc_setup(char *str)
+{
+	tsc_disable = 1;
+	return 1;
+}
+#else
+static int __init tsc_setup(char *str)
+{
+	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
+				"cannot disable TSC.\n");
+	return 1;
+}
+#endif
+__setup("notsc", tsc_setup);
+
+
+
+/************************************************************/
+
+/* tsc timer_opts struct */
+static struct timer_opts timer_tsc = {
+	.name = "tsc",
+	.mark_offset = mark_offset_tsc, 
+	.get_offset = get_offset_tsc,
+	.monotonic_clock = monotonic_clock_tsc,
+	.delay = delay_tsc,
+};
+
+struct init_timer_opts __initdata timer_tsc_init = {
+	.init = init_tsc,
+	.opts = &timer_tsc,
+};
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S
new file mode 100644
index 00000000000..fcce0e61b0e
--- /dev/null
+++ b/arch/i386/kernel/trampoline.S
@@ -0,0 +1,80 @@
+/*
+ *
+ *	Trampoline.S	Derived from Setup.S by Linus Torvalds
+ *
+ *	4 Jan 1997 Michael Chastain: changed to gnu as.
+ *
+ *	This is only used for booting secondary CPUs in SMP machine
+ *
+ *	Entry: CS:IP point to the start of our code, we are 
+ *	in real mode with no stack, but the rest of the 
+ *	trampoline page to make our stack and everything else
+ *	is a mystery.
+ *
+ *	In fact we don't actually need a stack so we don't
+ *	set one up.
+ *
+ *	We jump into the boot/compressed/head.S code. So you'd
+ *	better be running a compressed kernel image or you
+ *	won't get very far.
+ *
+ *	On entry to trampoline_data, the processor is in real mode
+ *	with 16-bit addressing and 16-bit data.  CS has some value
+ *	and IP is zero.  Thus, data addresses need to be absolute
+ *	(no relocation) and are taken with regard to r_base.
+ *
+ *	If you work on this file, check the object module with
+ *	objdump --reloc to make sure there are no relocation
+ *	entries except for:
+ *
+ *	TYPE              VALUE
+ *	R_386_32          startup_32_smp
+ *	R_386_32          boot_gdt_table
+ */
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+
+.data
+
+.code16
+
+ENTRY(trampoline_data)
+r_base = .
+	wbinvd			# Needed for NUMA-Q should be harmless for others
+	mov	%cs, %ax	# Code and data in the same place
+	mov	%ax, %ds
+
+	cli			# We should be safe anyway
+
+	movl	$0xA5A5A5A5, trampoline_data - r_base
+				# write marker for master knows we're running
+
+	/* GDT tables in non default location kernel can be beyond 16MB and
+	 * lgdt will not be able to load the address as in real mode default
+	 * operand size is 16bit. Use lgdtl instead to force operand size
+	 * to 32 bit.
+	 */
+
+	lidtl	boot_idt - r_base	# load idt with 0, 0
+	lgdtl	boot_gdt - r_base	# load gdt with whatever is appropriate
+
+	xor	%ax, %ax
+	inc	%ax		# protected mode (PE) bit
+	lmsw	%ax		# into protected mode
+	# flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S
+	ljmpl	$__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET)
+
+	# These need to be in the same 64K segment as the above;
+	# hence we don't use the boot_gdt_descr defined in head.S
+boot_gdt:
+	.word	__BOOT_DS + 7			# gdt limit
+	.long	boot_gdt_table-__PAGE_OFFSET	# gdt base
+
+boot_idt:
+	.word	0				# idt limit = 0
+	.long	0				# idt base = 0L
+
+.globl trampoline_end
+trampoline_end:
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
new file mode 100644
index 00000000000..6c0e383915b
--- /dev/null
+++ b/arch/i386/kernel/traps.c
@@ -0,0 +1,1084 @@
+/*
+ *  linux/arch/i386/traps.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/kallsyms.h>
+#include <linux/ptrace.h>
+#include <linux/utsname.h>
+#include <linux/kprobes.h>
+
+#ifdef CONFIG_EISA
+#include <linux/ioport.h>
+#include <linux/eisa.h>
+#endif
+
+#ifdef CONFIG_MCA
+#include <linux/mca.h>
+#endif
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/debugreg.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <asm/nmi.h>
+
+#include <asm/smp.h>
+#include <asm/arch_hooks.h>
+#include <asm/kdebug.h>
+
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include "mach_traps.h"
+
+asmlinkage int system_call(void);
+
+struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
+		{ 0, 0 }, { 0, 0 } };
+
+/* Do we ignore FPU interrupts ? */
+char ignore_fpu_irq = 0;
+
+/*
+ * The IDT has to be page-aligned to simplify the Pentium
+ * F0 0F bug workaround.. We have a special link segment
+ * for this.
+ */
+struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
+
+static int kstack_depth_to_print = 24;
+struct notifier_block *i386die_chain;
+static DEFINE_SPINLOCK(die_notifier_lock);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	int err = 0;
+	unsigned long flags;
+	spin_lock_irqsave(&die_notifier_lock, flags);
+	err = notifier_chain_register(&i386die_chain, nb);
+	spin_unlock_irqrestore(&die_notifier_lock, flags);
+	return err;
+}
+
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+	return	p > (void *)tinfo &&
+		p < (void *)tinfo + THREAD_SIZE - 3;
+}
+
+static inline unsigned long print_context_stack(struct thread_info *tinfo,
+				unsigned long *stack, unsigned long ebp)
+{
+	unsigned long addr;
+
+#ifdef	CONFIG_FRAME_POINTER
+	while (valid_stack_ptr(tinfo, (void *)ebp)) {
+		addr = *(unsigned long *)(ebp + 4);
+		printk(" [<%08lx>] ", addr);
+		print_symbol("%s", addr);
+		printk("\n");
+		ebp = *(unsigned long *)ebp;
+	}
+#else
+	while (valid_stack_ptr(tinfo, stack)) {
+		addr = *stack++;
+		if (__kernel_text_address(addr)) {
+			printk(" [<%08lx>]", addr);
+			print_symbol(" %s", addr);
+			printk("\n");
+		}
+	}
+#endif
+	return ebp;
+}
+
+void show_trace(struct task_struct *task, unsigned long * stack)
+{
+	unsigned long ebp;
+
+	if (!task)
+		task = current;
+
+	if (task == current) {
+		/* Grab ebp right from our regs */
+		asm ("movl %%ebp, %0" : "=r" (ebp) : );
+	} else {
+		/* ebp is the last reg pushed by switch_to */
+		ebp = *(unsigned long *) task->thread.esp;
+	}
+
+	while (1) {
+		struct thread_info *context;
+		context = (struct thread_info *)
+			((unsigned long)stack & (~(THREAD_SIZE - 1)));
+		ebp = print_context_stack(context, stack, ebp);
+		stack = (unsigned long*)context->previous_esp;
+		if (!stack)
+			break;
+		printk(" =======================\n");
+	}
+}
+
+void show_stack(struct task_struct *task, unsigned long *esp)
+{
+	unsigned long *stack;
+	int i;
+
+	if (esp == NULL) {
+		if (task)
+			esp = (unsigned long*)task->thread.esp;
+		else
+			esp = (unsigned long *)&esp;
+	}
+
+	stack = esp;
+	for(i = 0; i < kstack_depth_to_print; i++) {
+		if (kstack_end(stack))
+			break;
+		if (i && ((i % 8) == 0))
+			printk("\n       ");
+		printk("%08lx ", *stack++);
+	}
+	printk("\nCall Trace:\n");
+	show_trace(task, esp);
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+	unsigned long stack;
+
+	show_trace(current, &stack);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void show_registers(struct pt_regs *regs)
+{
+	int i;
+	int in_kernel = 1;
+	unsigned long esp;
+	unsigned short ss;
+
+	esp = (unsigned long) (&regs->esp);
+	ss = __KERNEL_DS;
+	if (regs->xcs & 3) {
+		in_kernel = 0;
+		esp = regs->esp;
+		ss = regs->xss & 0xffff;
+	}
+	print_modules();
+	printk("CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\nEFLAGS: %08lx"
+			"   (%s) \n",
+		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
+		print_tainted(), regs->eflags, system_utsname.release);
+	print_symbol("EIP is at %s\n", regs->eip);
+	printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+		regs->eax, regs->ebx, regs->ecx, regs->edx);
+	printk("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+		regs->esi, regs->edi, regs->ebp, esp);
+	printk("ds: %04x   es: %04x   ss: %04x\n",
+		regs->xds & 0xffff, regs->xes & 0xffff, ss);
+	printk("Process %s (pid: %d, threadinfo=%p task=%p)",
+		current->comm, current->pid, current_thread_info(), current);
+	/*
+	 * When in-kernel, we also print out the stack and code at the
+	 * time of the fault..
+	 */
+	if (in_kernel) {
+		u8 *eip;
+
+		printk("\nStack: ");
+		show_stack(NULL, (unsigned long*)esp);
+
+		printk("Code: ");
+
+		eip = (u8 *)regs->eip - 43;
+		for (i = 0; i < 64; i++, eip++) {
+			unsigned char c;
+
+			if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) {
+				printk(" Bad EIP value.");
+				break;
+			}
+			if (eip == (u8 *)regs->eip)
+				printk("<%02x> ", c);
+			else
+				printk("%02x ", c);
+		}
+	}
+	printk("\n");
+}	
+
+static void handle_BUG(struct pt_regs *regs)
+{
+	unsigned short ud2;
+	unsigned short line;
+	char *file;
+	char c;
+	unsigned long eip;
+
+	if (regs->xcs & 3)
+		goto no_bug;		/* Not in kernel */
+
+	eip = regs->eip;
+
+	if (eip < PAGE_OFFSET)
+		goto no_bug;
+	if (__get_user(ud2, (unsigned short *)eip))
+		goto no_bug;
+	if (ud2 != 0x0b0f)
+		goto no_bug;
+	if (__get_user(line, (unsigned short *)(eip + 2)))
+		goto bug;
+	if (__get_user(file, (char **)(eip + 4)) ||
+		(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+		file = "<bad filename>";
+
+	printk("------------[ cut here ]------------\n");
+	printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
+
+no_bug:
+	return;
+
+	/* Here we know it was a BUG but file-n-line is unavailable */
+bug:
+	printk("Kernel BUG\n");
+}
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+	static struct {
+		spinlock_t lock;
+		u32 lock_owner;
+		int lock_owner_depth;
+	} die = {
+		.lock =			SPIN_LOCK_UNLOCKED,
+		.lock_owner =		-1,
+		.lock_owner_depth =	0
+	};
+	static int die_counter;
+
+	if (die.lock_owner != _smp_processor_id()) {
+		console_verbose();
+		spin_lock_irq(&die.lock);
+		die.lock_owner = smp_processor_id();
+		die.lock_owner_depth = 0;
+		bust_spinlocks(1);
+	}
+
+	if (++die.lock_owner_depth < 3) {
+		int nl = 0;
+		handle_BUG(regs);
+		printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+		printk("PREEMPT ");
+		nl = 1;
+#endif
+#ifdef CONFIG_SMP
+		printk("SMP ");
+		nl = 1;
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+		printk("DEBUG_PAGEALLOC");
+		nl = 1;
+#endif
+		if (nl)
+			printk("\n");
+	notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
+		show_registers(regs);
+  	} else
+		printk(KERN_ERR "Recursive die() failure, output suppressed\n");
+
+	bust_spinlocks(0);
+	die.lock_owner = -1;
+	spin_unlock_irq(&die.lock);
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+
+	if (panic_on_oops) {
+		printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
+		ssleep(5);
+		panic("Fatal exception");
+	}
+	do_exit(SIGSEGV);
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+	if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs))
+		die(str, regs, err);
+}
+
+static void do_trap(int trapnr, int signr, char *str, int vm86,
+			   struct pt_regs * regs, long error_code, siginfo_t *info)
+{
+	if (regs->eflags & VM_MASK) {
+		if (vm86)
+			goto vm86_trap;
+		goto trap_signal;
+	}
+
+	if (!(regs->xcs & 3))
+		goto kernel_trap;
+
+	trap_signal: {
+		struct task_struct *tsk = current;
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = trapnr;
+		if (info)
+			force_sig_info(signr, info, tsk);
+		else
+			force_sig(signr, tsk);
+		return;
+	}
+
+	kernel_trap: {
+		if (!fixup_exception(regs))
+			die(str, regs, error_code);
+		return;
+	}
+
+	vm86_trap: {
+		int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
+		if (ret) goto trap_signal;
+		return;
+	}
+}
+
+#define DO_ERROR(trapnr, signr, str, name) \
+fastcall void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+						== NOTIFY_STOP) \
+		return; \
+	do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
+}
+
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+fastcall void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	siginfo_t info; \
+	info.si_signo = signr; \
+	info.si_errno = 0; \
+	info.si_code = sicode; \
+	info.si_addr = (void __user *)siaddr; \
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+						== NOTIFY_STOP) \
+		return; \
+	do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
+}
+
+#define DO_VM86_ERROR(trapnr, signr, str, name) \
+fastcall void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+						== NOTIFY_STOP) \
+		return; \
+	do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
+}
+
+#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+fastcall void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	siginfo_t info; \
+	info.si_signo = signr; \
+	info.si_errno = 0; \
+	info.si_code = sicode; \
+	info.si_addr = (void __user *)siaddr; \
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+						== NOTIFY_STOP) \
+		return; \
+	do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
+}
+
+DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->eip)
+#ifndef CONFIG_KPROBES
+DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
+#endif
+DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
+DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
+DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+
+fastcall void do_general_protection(struct pt_regs * regs, long error_code)
+{
+	int cpu = get_cpu();
+	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct thread_struct *thread = &current->thread;
+
+	/*
+	 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
+	 * invalid offset set (the LAZY one) and the faulting thread has
+	 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
+	 * and we set the offset field correctly. Then we let the CPU to
+	 * restart the faulting instruction.
+	 */
+	if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
+	    thread->io_bitmap_ptr) {
+		memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
+		       thread->io_bitmap_max);
+		/*
+		 * If the previously set map was extending to higher ports
+		 * than the current one, pad extra space with 0xff (no access).
+		 */
+		if (thread->io_bitmap_max < tss->io_bitmap_max)
+			memset((char *) tss->io_bitmap +
+				thread->io_bitmap_max, 0xff,
+				tss->io_bitmap_max - thread->io_bitmap_max);
+		tss->io_bitmap_max = thread->io_bitmap_max;
+		tss->io_bitmap_base = IO_BITMAP_OFFSET;
+		put_cpu();
+		return;
+	}
+	put_cpu();
+
+	if (regs->eflags & VM_MASK)
+		goto gp_in_vm86;
+
+	if (!(regs->xcs & 3))
+		goto gp_in_kernel;
+
+	current->thread.error_code = error_code;
+	current->thread.trap_no = 13;
+	force_sig(SIGSEGV, current);
+	return;
+
+gp_in_vm86:
+	local_irq_enable();
+	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
+	return;
+
+gp_in_kernel:
+	if (!fixup_exception(regs)) {
+		if (notify_die(DIE_GPF, "general protection fault", regs,
+				error_code, 13, SIGSEGV) == NOTIFY_STOP)
+			return;
+		die("general protection fault", regs, error_code);
+	}
+}
+
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+	printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+	printk("You probably have a hardware problem with your RAM chips\n");
+
+	/* Clear and disable the memory parity error line. */
+	clear_mem_error(reason);
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+	unsigned long i;
+
+	printk("NMI: IOCK error (debug interrupt?)\n");
+	show_registers(regs);
+
+	/* Re-enable the IOCK line, wait for a few seconds */
+	reason = (reason & 0xf) | 8;
+	outb(reason, 0x61);
+	i = 2000;
+	while (--i) udelay(1000);
+	reason &= ~8;
+	outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{
+#ifdef CONFIG_MCA
+	/* Might actually be able to figure out what the guilty party
+	* is. */
+	if( MCA_bus ) {
+		mca_handle_nmi();
+		return;
+	}
+#endif
+	printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+		reason, smp_processor_id());
+	printk("Dazed and confused, but trying to continue\n");
+	printk("Do you have a strange power saving mode enabled?\n");
+}
+
+static DEFINE_SPINLOCK(nmi_print_lock);
+
+void die_nmi (struct pt_regs *regs, const char *msg)
+{
+	spin_lock(&nmi_print_lock);
+	/*
+	* We are in trouble anyway, lets at least try
+	* to get a message out.
+	*/
+	bust_spinlocks(1);
+	printk(msg);
+	printk(" on CPU%d, eip %08lx, registers:\n",
+		smp_processor_id(), regs->eip);
+	show_registers(regs);
+	printk("console shuts up ...\n");
+	console_silent();
+	spin_unlock(&nmi_print_lock);
+	bust_spinlocks(0);
+	do_exit(SIGSEGV);
+}
+
+static void default_do_nmi(struct pt_regs * regs)
+{
+	unsigned char reason = 0;
+
+	/* Only the BSP gets external NMIs from the system.  */
+	if (!smp_processor_id())
+		reason = get_nmi_reason();
+ 
+	if (!(reason & 0xc0)) {
+		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+							== NOTIFY_STOP)
+			return;
+#ifdef CONFIG_X86_LOCAL_APIC
+		/*
+		 * Ok, so this is none of the documented NMI sources,
+		 * so it must be the NMI watchdog.
+		 */
+		if (nmi_watchdog) {
+			nmi_watchdog_tick(regs);
+			return;
+		}
+#endif
+		unknown_nmi_error(reason, regs);
+		return;
+	}
+	if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+		return;
+	if (reason & 0x80)
+		mem_parity_error(reason, regs);
+	if (reason & 0x40)
+		io_check_error(reason, regs);
+	/*
+	 * Reassert NMI in case it became active meanwhile
+	 * as it's edge-triggered.
+	 */
+	reassert_nmi();
+}
+
+static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	return 0;
+}
+ 
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+ 
+fastcall void do_nmi(struct pt_regs * regs, long error_code)
+{
+	int cpu;
+
+	nmi_enter();
+
+	cpu = smp_processor_id();
+	++nmi_count(cpu);
+
+	if (!nmi_callback(regs, cpu))
+		default_do_nmi(regs);
+
+	nmi_exit();
+}
+
+void set_nmi_callback(nmi_callback_t callback)
+{
+	nmi_callback = callback;
+}
+
+void unset_nmi_callback(void)
+{
+	nmi_callback = dummy_nmi_callback;
+}
+
+#ifdef CONFIG_KPROBES
+fastcall int do_int3(struct pt_regs *regs, long error_code)
+{
+	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+			== NOTIFY_STOP)
+		return 1;
+	/* This is an interrupt gate, because kprobes wants interrupts
+	disabled.  Normal trap handlers don't. */
+	restore_interrupts(regs);
+	do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
+	return 0;
+}
+#endif
+
+/*
+ * Our handling of the processor debug registers is non-trivial.
+ * We do not clear them on entry and exit from the kernel. Therefore
+ * it is possible to get a watchpoint trap here from inside the kernel.
+ * However, the code in ./ptrace.c has ensured that the user can
+ * only set watchpoints on userspace addresses. Therefore the in-kernel
+ * watchpoint trap can only occur in code which is reading/writing
+ * from user space. Such code must not hold kernel locks (since it
+ * can equally take a page fault), therefore it is safe to call
+ * force_sig_info even though that claims and releases locks.
+ * 
+ * Code in ./signal.c ensures that the debug control register
+ * is restored before we deliver any signal, and therefore that
+ * user code runs with the correct debug control register even though
+ * we clear it here.
+ *
+ * Being careful here means that we don't have to be as careful in a
+ * lot of more complicated places (task switching can be a bit lazy
+ * about restoring all the debug state, and ptrace doesn't have to
+ * find every occurrence of the TF bit that could be saved away even
+ * by user code)
+ */
+fastcall void do_debug(struct pt_regs * regs, long error_code)
+{
+	unsigned int condition;
+	struct task_struct *tsk = current;
+
+	__asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+
+	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
+					SIGTRAP) == NOTIFY_STOP)
+		return;
+	/* It's safe to allow irq's after DR6 has been saved */
+	if (regs->eflags & X86_EFLAGS_IF)
+		local_irq_enable();
+
+	/* Mask out spurious debug traps due to lazy DR7 setting */
+	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+		if (!tsk->thread.debugreg[7])
+			goto clear_dr7;
+	}
+
+	if (regs->eflags & VM_MASK)
+		goto debug_vm86;
+
+	/* Save debug status register where ptrace can see it */
+	tsk->thread.debugreg[6] = condition;
+
+	/*
+	 * Single-stepping through TF: make sure we ignore any events in
+	 * kernel space (but re-enable TF when returning to user mode).
+	 */
+	if (condition & DR_STEP) {
+		/*
+		 * We already checked v86 mode above, so we can
+		 * check for kernel mode by just checking the CPL
+		 * of CS.
+		 */
+		if ((regs->xcs & 3) == 0)
+			goto clear_TF_reenable;
+	}
+
+	/* Ok, finally something we can handle */
+	send_sigtrap(tsk, regs, error_code);
+
+	/* Disable additional traps. They'll be re-enabled when
+	 * the signal is delivered.
+	 */
+clear_dr7:
+	__asm__("movl %0,%%db7"
+		: /* no output */
+		: "r" (0));
+	return;
+
+debug_vm86:
+	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
+	return;
+
+clear_TF_reenable:
+	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+	regs->eflags &= ~TF_MASK;
+	return;
+}
+
+/*
+ * Note that we play around with the 'TS' bit in an attempt to get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void __user *eip)
+{
+	struct task_struct * task;
+	siginfo_t info;
+	unsigned short cwd, swd;
+
+	/*
+	 * Save the info for the exception handler and clear the error.
+	 */
+	task = current;
+	save_init_fpu(task);
+	task->thread.trap_no = 16;
+	task->thread.error_code = 0;
+	info.si_signo = SIGFPE;
+	info.si_errno = 0;
+	info.si_code = __SI_FAULT;
+	info.si_addr = eip;
+	/*
+	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
+	 * status.  0x3f is the exception bits in these regs, 0x200 is the
+	 * C1 reg you need in case of a stack fault, 0x040 is the stack
+	 * fault bit.  We should only be taking one exception at a time,
+	 * so if this combination doesn't produce any single exception,
+	 * then we have a bad program that isn't syncronizing its FPU usage
+	 * and it will suffer the consequences since we won't be able to
+	 * fully reproduce the context of the exception
+	 */
+	cwd = get_fpu_cwd(task);
+	swd = get_fpu_swd(task);
+	switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+		case 0x000:
+		default:
+			break;
+		case 0x001: /* Invalid Op */
+		case 0x041: /* Stack Fault */
+		case 0x241: /* Stack Fault | Direction */
+			info.si_code = FPE_FLTINV;
+			/* Should we clear the SF or let user space do it ???? */
+			break;
+		case 0x002: /* Denormalize */
+		case 0x010: /* Underflow */
+			info.si_code = FPE_FLTUND;
+			break;
+		case 0x004: /* Zero Divide */
+			info.si_code = FPE_FLTDIV;
+			break;
+		case 0x008: /* Overflow */
+			info.si_code = FPE_FLTOVF;
+			break;
+		case 0x020: /* Precision */
+			info.si_code = FPE_FLTRES;
+			break;
+	}
+	force_sig_info(SIGFPE, &info, task);
+}
+
+fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+	ignore_fpu_irq = 1;
+	math_error((void __user *)regs->eip);
+}
+
+static void simd_math_error(void __user *eip)
+{
+	struct task_struct * task;
+	siginfo_t info;
+	unsigned short mxcsr;
+
+	/*
+	 * Save the info for the exception handler and clear the error.
+	 */
+	task = current;
+	save_init_fpu(task);
+	task->thread.trap_no = 19;
+	task->thread.error_code = 0;
+	info.si_signo = SIGFPE;
+	info.si_errno = 0;
+	info.si_code = __SI_FAULT;
+	info.si_addr = eip;
+	/*
+	 * The SIMD FPU exceptions are handled a little differently, as there
+	 * is only a single status/control register.  Thus, to determine which
+	 * unmasked exception was caught we must mask the exception mask bits
+	 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+	 */
+	mxcsr = get_fpu_mxcsr(task);
+	switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+		case 0x000:
+		default:
+			break;
+		case 0x001: /* Invalid Op */
+			info.si_code = FPE_FLTINV;
+			break;
+		case 0x002: /* Denormalize */
+		case 0x010: /* Underflow */
+			info.si_code = FPE_FLTUND;
+			break;
+		case 0x004: /* Zero Divide */
+			info.si_code = FPE_FLTDIV;
+			break;
+		case 0x008: /* Overflow */
+			info.si_code = FPE_FLTOVF;
+			break;
+		case 0x020: /* Precision */
+			info.si_code = FPE_FLTRES;
+			break;
+	}
+	force_sig_info(SIGFPE, &info, task);
+}
+
+fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
+					  long error_code)
+{
+	if (cpu_has_xmm) {
+		/* Handle SIMD FPU exceptions on PIII+ processors. */
+		ignore_fpu_irq = 1;
+		simd_math_error((void __user *)regs->eip);
+	} else {
+		/*
+		 * Handle strange cache flush from user space exception
+		 * in all other cases.  This is undocumented behaviour.
+		 */
+		if (regs->eflags & VM_MASK) {
+			handle_vm86_fault((struct kernel_vm86_regs *)regs,
+					  error_code);
+			return;
+		}
+		die_if_kernel("cache flush denied", regs, error_code);
+		current->thread.trap_no = 19;
+		current->thread.error_code = error_code;
+		force_sig(SIGSEGV, current);
+	}
+}
+
+fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
+					  long error_code)
+{
+#if 0
+	/* No need to warn about this any longer. */
+	printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+#endif
+}
+
+fastcall void setup_x86_bogus_stack(unsigned char * stk)
+{
+	unsigned long *switch16_ptr, *switch32_ptr;
+	struct pt_regs *regs;
+	unsigned long stack_top, stack_bot;
+	unsigned short iret_frame16_off;
+	int cpu = smp_processor_id();
+	/* reserve the space on 32bit stack for the magic switch16 pointer */
+	memmove(stk, stk + 8, sizeof(struct pt_regs));
+	switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
+	regs = (struct pt_regs *)stk;
+	/* now the switch32 on 16bit stack */
+	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
+	switch32_ptr = (unsigned long *)(stack_top - 8);
+	iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
+	/* copy iret frame on 16bit stack */
+	memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
+	/* fill in the switch pointers */
+	switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
+	switch16_ptr[1] = __ESPFIX_SS;
+	switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
+		8 - CPU_16BIT_STACK_SIZE;
+	switch32_ptr[1] = __KERNEL_DS;
+}
+
+fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
+{
+	unsigned long *switch32_ptr;
+	unsigned char *stack16, *stack32;
+	unsigned long stack_top, stack_bot;
+	int len;
+	int cpu = smp_processor_id();
+	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
+	switch32_ptr = (unsigned long *)(stack_top - 8);
+	/* copy the data from 16bit stack to 32bit stack */
+	len = CPU_16BIT_STACK_SIZE - 8 - sp;
+	stack16 = (unsigned char *)(stack_bot + sp);
+	stack32 = (unsigned char *)
+		(switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
+	memcpy(stack32, stack16, len);
+	return stack32;
+}
+
+/*
+ *  'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ *
+ * Must be called with kernel preemption disabled (in this case,
+ * local interrupts are disabled at the call-site in entry.S).
+ */
+asmlinkage void math_state_restore(struct pt_regs regs)
+{
+	struct thread_info *thread = current_thread_info();
+	struct task_struct *tsk = thread->task;
+
+	clts();		/* Allow maths ops (or we recurse) */
+	if (!tsk_used_math(tsk))
+		init_fpu(tsk);
+	restore_fpu(tsk);
+	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
+}
+
+#ifndef CONFIG_MATH_EMULATION
+
+asmlinkage void math_emulate(long arg)
+{
+	printk("math-emulation not enabled and no coprocessor found.\n");
+	printk("killing %s.\n",current->comm);
+	force_sig(SIGFPE,current);
+	schedule();
+}
+
+#endif /* CONFIG_MATH_EMULATION */
+
+#ifdef CONFIG_X86_F00F_BUG
+void __init trap_init_f00f_bug(void)
+{
+	__set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+
+	/*
+	 * Update the IDT descriptor and reload the IDT so that
+	 * it uses the read-only mapped virtual address.
+	 */
+	idt_descr.address = fix_to_virt(FIX_F00F_IDT);
+	__asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+}
+#endif
+
+#define _set_gate(gate_addr,type,dpl,addr,seg) \
+do { \
+  int __d0, __d1; \
+  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+	"movw %4,%%dx\n\t" \
+	"movl %%eax,%0\n\t" \
+	"movl %%edx,%1" \
+	:"=m" (*((long *) (gate_addr))), \
+	 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+	:"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+	 "3" ((char *) (addr)),"2" ((seg) << 16)); \
+} while (0)
+
+
+/*
+ * This needs to use 'idt_table' rather than 'idt', and
+ * thus use the _nonmapped_ version of the IDT, as the
+ * Pentium F0 0F bugfix can have resulted in the mapped
+ * IDT being write-protected.
+ */
+void set_intr_gate(unsigned int n, void *addr)
+{
+	_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
+}
+
+/*
+ * This routine sets up an interrupt gate at directory privilege level 3.
+ */
+static inline void set_system_intr_gate(unsigned int n, void *addr)
+{
+	_set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
+}
+
+static void __init set_trap_gate(unsigned int n, void *addr)
+{
+	_set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
+}
+
+static void __init set_system_gate(unsigned int n, void *addr)
+{
+	_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
+}
+
+static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
+{
+	_set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
+}
+
+
+void __init trap_init(void)
+{
+#ifdef CONFIG_EISA
+	void __iomem *p = ioremap(0x0FFFD9, 4);
+	if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
+		EISA_bus = 1;
+	}
+	iounmap(p);
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	init_apic_mappings();
+#endif
+
+	set_trap_gate(0,&divide_error);
+	set_intr_gate(1,&debug);
+	set_intr_gate(2,&nmi);
+	set_system_intr_gate(3, &int3); /* int3-5 can be called from all */
+	set_system_gate(4,&overflow);
+	set_system_gate(5,&bounds);
+	set_trap_gate(6,&invalid_op);
+	set_trap_gate(7,&device_not_available);
+	set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
+	set_trap_gate(9,&coprocessor_segment_overrun);
+	set_trap_gate(10,&invalid_TSS);
+	set_trap_gate(11,&segment_not_present);
+	set_trap_gate(12,&stack_segment);
+	set_trap_gate(13,&general_protection);
+	set_intr_gate(14,&page_fault);
+	set_trap_gate(15,&spurious_interrupt_bug);
+	set_trap_gate(16,&coprocessor_error);
+	set_trap_gate(17,&alignment_check);
+#ifdef CONFIG_X86_MCE
+	set_trap_gate(18,&machine_check);
+#endif
+	set_trap_gate(19,&simd_coprocessor_error);
+
+	set_system_gate(SYSCALL_VECTOR,&system_call);
+
+	/*
+	 * Should be a barrier for any external CPU state.
+	 */
+	cpu_init();
+
+	trap_init_hook();
+}
+
+static int __init kstack_setup(char *s)
+{
+	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+	return 0;
+}
+__setup("kstack=", kstack_setup);
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
new file mode 100644
index 00000000000..2f3d52dacff
--- /dev/null
+++ b/arch/i386/kernel/vm86.c
@@ -0,0 +1,804 @@
+/*
+ *  linux/kernel/vm86.c
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ *
+ *  29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
+ *                stack - Manfred Spraul <manfreds@colorfullife.com>
+ *
+ *  22 mar 2002 - Manfred detected the stackfaults, but didn't handle
+ *                them correctly. Now the emulation will be in a
+ *                consistent state after stackfaults - Kasper Dupont
+ *                <kasperd@daimi.au.dk>
+ *
+ *  22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
+ *                <kasperd@daimi.au.dk>
+ *
+ *  ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
+ *                caused by Kasper Dupont's changes - Stas Sergeev
+ *
+ *   4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
+ *                Kasper Dupont <kasperd@daimi.au.dk>
+ *
+ *   9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
+ *                Kasper Dupont <kasperd@daimi.au.dk>
+ *
+ *   9 apr 2002 - Changed stack access macros to jump to a label
+ *                instead of returning to userspace. This simplifies
+ *                do_int, and is needed by handle_vm6_fault. Kasper
+ *                Dupont <kasperd@daimi.au.dk>
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/highmem.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/tlbflush.h>
+#include <asm/irq.h>
+
+/*
+ * Known problems:
+ *
+ * Interrupt handling is not guaranteed:
+ * - a real x86 will disable all interrupts for one instruction
+ *   after a "mov ss,xx" to make stack handling atomic even without
+ *   the 'lss' instruction. We can't guarantee this in v86 mode,
+ *   as the next instruction might result in a page fault or similar.
+ * - a real x86 will have interrupts disabled for one instruction
+ *   past the 'sti' that enables them. We don't bother with all the
+ *   details yet.
+ *
+ * Let's hope these problems do not actually matter for anything.
+ */
+
+
+#define KVM86	((struct kernel_vm86_struct *)regs)
+#define VMPI 	KVM86->vm86plus
+
+
+/*
+ * 8- and 16-bit register defines..
+ */
+#define AL(regs)	(((unsigned char *)&((regs)->eax))[0])
+#define AH(regs)	(((unsigned char *)&((regs)->eax))[1])
+#define IP(regs)	(*(unsigned short *)&((regs)->eip))
+#define SP(regs)	(*(unsigned short *)&((regs)->esp))
+
+/*
+ * virtual flags (16 and 32-bit versions)
+ */
+#define VFLAGS	(*(unsigned short *)&(current->thread.v86flags))
+#define VEFLAGS	(current->thread.v86flags)
+
+#define set_flags(X,new,mask) \
+((X) = ((X) & ~(mask)) | ((new) & (mask)))
+
+#define SAFE_MASK	(0xDD5)
+#define RETURN_MASK	(0xDFF)
+
+#define VM86_REGS_PART2 orig_eax
+#define VM86_REGS_SIZE1 \
+        ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) )
+#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1)
+
+struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
+struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
+{
+	struct tss_struct *tss;
+	struct pt_regs *ret;
+	unsigned long tmp;
+
+	/*
+	 * This gets called from entry.S with interrupts disabled, but
+	 * from process context. Enable interrupts here, before trying
+	 * to access user space.
+	 */
+	local_irq_enable();
+
+	if (!current->thread.vm86_info) {
+		printk("no vm86_info: BAD\n");
+		do_exit(SIGSEGV);
+	}
+	set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
+	tmp = copy_to_user(&current->thread.vm86_info->regs,regs, VM86_REGS_SIZE1);
+	tmp += copy_to_user(&current->thread.vm86_info->regs.VM86_REGS_PART2,
+		&regs->VM86_REGS_PART2, VM86_REGS_SIZE2);
+	tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
+	if (tmp) {
+		printk("vm86: could not access userspace vm86_info\n");
+		do_exit(SIGSEGV);
+	}
+
+	tss = &per_cpu(init_tss, get_cpu());
+	current->thread.esp0 = current->thread.saved_esp0;
+	current->thread.sysenter_cs = __KERNEL_CS;
+	load_esp0(tss, &current->thread);
+	current->thread.saved_esp0 = 0;
+	put_cpu();
+
+	loadsegment(fs, current->thread.saved_fs);
+	loadsegment(gs, current->thread.saved_gs);
+	ret = KVM86->regs32;
+	return ret;
+}
+
+static void mark_screen_rdonly(struct task_struct * tsk)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, *mapped;
+	int i;
+
+	preempt_disable();
+	spin_lock(&tsk->mm->page_table_lock);
+	pgd = pgd_offset(tsk->mm, 0xA0000);
+	if (pgd_none_or_clear_bad(pgd))
+		goto out;
+	pud = pud_offset(pgd, 0xA0000);
+	if (pud_none_or_clear_bad(pud))
+		goto out;
+	pmd = pmd_offset(pud, 0xA0000);
+	if (pmd_none_or_clear_bad(pmd))
+		goto out;
+	pte = mapped = pte_offset_map(pmd, 0xA0000);
+	for (i = 0; i < 32; i++) {
+		if (pte_present(*pte))
+			set_pte(pte, pte_wrprotect(*pte));
+		pte++;
+	}
+	pte_unmap(mapped);
+out:
+	spin_unlock(&tsk->mm->page_table_lock);
+	preempt_enable();
+	flush_tlb();
+}
+
+
+
+static int do_vm86_irq_handling(int subfunction, int irqnumber);
+static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
+
+asmlinkage int sys_vm86old(struct pt_regs regs)
+{
+	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx;
+	struct kernel_vm86_struct info; /* declare this _on top_,
+					 * this avoids wasting of stack space.
+					 * This remains on the stack until we
+					 * return to 32 bit user space.
+					 */
+	struct task_struct *tsk;
+	int tmp, ret = -EPERM;
+
+	tsk = current;
+	if (tsk->thread.saved_esp0)
+		goto out;
+	tmp  = copy_from_user(&info, v86, VM86_REGS_SIZE1);
+	tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
+		(long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2);
+	ret = -EFAULT;
+	if (tmp)
+		goto out;
+	memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
+	info.regs32 = &regs;
+	tsk->thread.vm86_info = v86;
+	do_sys_vm86(&info, tsk);
+	ret = 0;	/* we never return here */
+out:
+	return ret;
+}
+
+
+asmlinkage int sys_vm86(struct pt_regs regs)
+{
+	struct kernel_vm86_struct info; /* declare this _on top_,
+					 * this avoids wasting of stack space.
+					 * This remains on the stack until we
+					 * return to 32 bit user space.
+					 */
+	struct task_struct *tsk;
+	int tmp, ret;
+	struct vm86plus_struct __user *v86;
+
+	tsk = current;
+	switch (regs.ebx) {
+		case VM86_REQUEST_IRQ:
+		case VM86_FREE_IRQ:
+		case VM86_GET_IRQ_BITS:
+		case VM86_GET_AND_RESET_IRQ:
+			ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx);
+			goto out;
+		case VM86_PLUS_INSTALL_CHECK:
+			/* NOTE: on old vm86 stuff this will return the error
+			   from verify_area(), because the subfunction is
+			   interpreted as (invalid) address to vm86_struct.
+			   So the installation check works.
+			 */
+			ret = 0;
+			goto out;
+	}
+
+	/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
+	ret = -EPERM;
+	if (tsk->thread.saved_esp0)
+		goto out;
+	v86 = (struct vm86plus_struct __user *)regs.ecx;
+	tmp  = copy_from_user(&info, v86, VM86_REGS_SIZE1);
+	tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
+		(long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2);
+	ret = -EFAULT;
+	if (tmp)
+		goto out;
+	info.regs32 = &regs;
+	info.vm86plus.is_vm86pus = 1;
+	tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
+	do_sys_vm86(&info, tsk);
+	ret = 0;	/* we never return here */
+out:
+	return ret;
+}
+
+
+static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
+{
+	struct tss_struct *tss;
+/*
+ * make sure the vm86() system call doesn't try to do anything silly
+ */
+	info->regs.__null_ds = 0;
+	info->regs.__null_es = 0;
+
+/* we are clearing fs,gs later just before "jmp resume_userspace",
+ * because starting with Linux 2.1.x they aren't no longer saved/restored
+ */
+
+/*
+ * The eflags register is also special: we cannot trust that the user
+ * has set it up safely, so this makes sure interrupt etc flags are
+ * inherited from protected mode.
+ */
+ 	VEFLAGS = info->regs.eflags;
+	info->regs.eflags &= SAFE_MASK;
+	info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK;
+	info->regs.eflags |= VM_MASK;
+
+	switch (info->cpu_type) {
+		case CPU_286:
+			tsk->thread.v86mask = 0;
+			break;
+		case CPU_386:
+			tsk->thread.v86mask = NT_MASK | IOPL_MASK;
+			break;
+		case CPU_486:
+			tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
+			break;
+		default:
+			tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
+			break;
+	}
+
+/*
+ * Save old state, set default return value (%eax) to 0
+ */
+	info->regs32->eax = 0;
+	tsk->thread.saved_esp0 = tsk->thread.esp0;
+	asm volatile("movl %%fs,%0":"=m" (tsk->thread.saved_fs));
+	asm volatile("movl %%gs,%0":"=m" (tsk->thread.saved_gs));
+
+	tss = &per_cpu(init_tss, get_cpu());
+	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
+	if (cpu_has_sep)
+		tsk->thread.sysenter_cs = 0;
+	load_esp0(tss, &tsk->thread);
+	put_cpu();
+
+	tsk->thread.screen_bitmap = info->screen_bitmap;
+	if (info->flags & VM86_SCREEN_BITMAP)
+		mark_screen_rdonly(tsk);
+	__asm__ __volatile__(
+		"xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
+		"movl %0,%%esp\n\t"
+		"movl %1,%%ebp\n\t"
+		"jmp resume_userspace"
+		: /* no outputs */
+		:"r" (&info->regs), "r" (tsk->thread_info) : "ax");
+	/* we never return here */
+}
+
+static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval)
+{
+	struct pt_regs * regs32;
+
+	regs32 = save_v86_state(regs16);
+	regs32->eax = retval;
+	__asm__ __volatile__("movl %0,%%esp\n\t"
+		"movl %1,%%ebp\n\t"
+		"jmp resume_userspace"
+		: : "r" (regs32), "r" (current_thread_info()));
+}
+
+static inline void set_IF(struct kernel_vm86_regs * regs)
+{
+	VEFLAGS |= VIF_MASK;
+	if (VEFLAGS & VIP_MASK)
+		return_to_32bit(regs, VM86_STI);
+}
+
+static inline void clear_IF(struct kernel_vm86_regs * regs)
+{
+	VEFLAGS &= ~VIF_MASK;
+}
+
+static inline void clear_TF(struct kernel_vm86_regs * regs)
+{
+	regs->eflags &= ~TF_MASK;
+}
+
+static inline void clear_AC(struct kernel_vm86_regs * regs)
+{
+	regs->eflags &= ~AC_MASK;
+}
+
+/* It is correct to call set_IF(regs) from the set_vflags_*
+ * functions. However someone forgot to call clear_IF(regs)
+ * in the opposite case.
+ * After the command sequence CLI PUSHF STI POPF you should
+ * end up with interrups disabled, but you ended up with
+ * interrupts enabled.
+ *  ( I was testing my own changes, but the only bug I
+ *    could find was in a function I had not changed. )
+ * [KD]
+ */
+
+static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
+{
+	set_flags(VEFLAGS, eflags, current->thread.v86mask);
+	set_flags(regs->eflags, eflags, SAFE_MASK);
+	if (eflags & IF_MASK)
+		set_IF(regs);
+	else
+		clear_IF(regs);
+}
+
+static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
+{
+	set_flags(VFLAGS, flags, current->thread.v86mask);
+	set_flags(regs->eflags, flags, SAFE_MASK);
+	if (flags & IF_MASK)
+		set_IF(regs);
+	else
+		clear_IF(regs);
+}
+
+static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
+{
+	unsigned long flags = regs->eflags & RETURN_MASK;
+
+	if (VEFLAGS & VIF_MASK)
+		flags |= IF_MASK;
+	flags |= IOPL_MASK;
+	return flags | (VEFLAGS & current->thread.v86mask);
+}
+
+static inline int is_revectored(int nr, struct revectored_struct * bitmap)
+{
+	__asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
+		:"=r" (nr)
+		:"m" (*bitmap),"r" (nr));
+	return nr;
+}
+
+#define val_byte(val, n) (((__u8 *)&val)[n])
+
+#define pushb(base, ptr, val, err_label) \
+	do { \
+		__u8 __val = val; \
+		ptr--; \
+		if (put_user(__val, base + ptr) < 0) \
+			goto err_label; \
+	} while(0)
+
+#define pushw(base, ptr, val, err_label) \
+	do { \
+		__u16 __val = val; \
+		ptr--; \
+		if (put_user(val_byte(__val, 1), base + ptr) < 0) \
+			goto err_label; \
+		ptr--; \
+		if (put_user(val_byte(__val, 0), base + ptr) < 0) \
+			goto err_label; \
+	} while(0)
+
+#define pushl(base, ptr, val, err_label) \
+	do { \
+		__u32 __val = val; \
+		ptr--; \
+		if (put_user(val_byte(__val, 3), base + ptr) < 0) \
+			goto err_label; \
+		ptr--; \
+		if (put_user(val_byte(__val, 2), base + ptr) < 0) \
+			goto err_label; \
+		ptr--; \
+		if (put_user(val_byte(__val, 1), base + ptr) < 0) \
+			goto err_label; \
+		ptr--; \
+		if (put_user(val_byte(__val, 0), base + ptr) < 0) \
+			goto err_label; \
+	} while(0)
+
+#define popb(base, ptr, err_label) \
+	({ \
+		__u8 __res; \
+		if (get_user(__res, base + ptr) < 0) \
+			goto err_label; \
+		ptr++; \
+		__res; \
+	})
+
+#define popw(base, ptr, err_label) \
+	({ \
+		__u16 __res; \
+		if (get_user(val_byte(__res, 0), base + ptr) < 0) \
+			goto err_label; \
+		ptr++; \
+		if (get_user(val_byte(__res, 1), base + ptr) < 0) \
+			goto err_label; \
+		ptr++; \
+		__res; \
+	})
+
+#define popl(base, ptr, err_label) \
+	({ \
+		__u32 __res; \
+		if (get_user(val_byte(__res, 0), base + ptr) < 0) \
+			goto err_label; \
+		ptr++; \
+		if (get_user(val_byte(__res, 1), base + ptr) < 0) \
+			goto err_label; \
+		ptr++; \
+		if (get_user(val_byte(__res, 2), base + ptr) < 0) \
+			goto err_label; \
+		ptr++; \
+		if (get_user(val_byte(__res, 3), base + ptr) < 0) \
+			goto err_label; \
+		ptr++; \
+		__res; \
+	})
+
+/* There are so many possible reasons for this function to return
+ * VM86_INTx, so adding another doesn't bother me. We can expect
+ * userspace programs to be able to handle it. (Getting a problem
+ * in userspace is always better than an Oops anyway.) [KD]
+ */
+static void do_int(struct kernel_vm86_regs *regs, int i,
+    unsigned char __user * ssp, unsigned short sp)
+{
+	unsigned long __user *intr_ptr;
+	unsigned long segoffs;
+
+	if (regs->cs == BIOSSEG)
+		goto cannot_handle;
+	if (is_revectored(i, &KVM86->int_revectored))
+		goto cannot_handle;
+	if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored))
+		goto cannot_handle;
+	intr_ptr = (unsigned long __user *) (i << 2);
+	if (get_user(segoffs, intr_ptr))
+		goto cannot_handle;
+	if ((segoffs >> 16) == BIOSSEG)
+		goto cannot_handle;
+	pushw(ssp, sp, get_vflags(regs), cannot_handle);
+	pushw(ssp, sp, regs->cs, cannot_handle);
+	pushw(ssp, sp, IP(regs), cannot_handle);
+	regs->cs = segoffs >> 16;
+	SP(regs) -= 6;
+	IP(regs) = segoffs & 0xffff;
+	clear_TF(regs);
+	clear_IF(regs);
+	clear_AC(regs);
+	return;
+
+cannot_handle:
+	return_to_32bit(regs, VM86_INTx + (i << 8));
+}
+
+int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno)
+{
+	if (VMPI.is_vm86pus) {
+		if ( (trapno==3) || (trapno==1) )
+			return_to_32bit(regs, VM86_TRAP + (trapno << 8));
+		do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs));
+		return 0;
+	}
+	if (trapno !=1)
+		return 1; /* we let this handle by the calling routine */
+	if (current->ptrace & PT_PTRACED) {
+		unsigned long flags;
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		sigdelset(&current->blocked, SIGTRAP);
+		recalc_sigpending();
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+	send_sig(SIGTRAP, current, 1);
+	current->thread.trap_no = trapno;
+	current->thread.error_code = error_code;
+	return 0;
+}
+
+void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
+{
+	unsigned char opcode;
+	unsigned char __user *csp;
+	unsigned char __user *ssp;
+	unsigned short ip, sp;
+	int data32, pref_done;
+
+#define CHECK_IF_IN_TRAP \
+	if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
+		newflags |= TF_MASK
+#define VM86_FAULT_RETURN do { \
+	if (VMPI.force_return_for_pic  && (VEFLAGS & (IF_MASK | VIF_MASK))) \
+		return_to_32bit(regs, VM86_PICRETURN); \
+	return; } while (0)
+
+	csp = (unsigned char __user *) (regs->cs << 4);
+	ssp = (unsigned char __user *) (regs->ss << 4);
+	sp = SP(regs);
+	ip = IP(regs);
+
+	data32 = 0;
+	pref_done = 0;
+	do {
+		switch (opcode = popb(csp, ip, simulate_sigsegv)) {
+			case 0x66:      /* 32-bit data */     data32=1; break;
+			case 0x67:      /* 32-bit address */  break;
+			case 0x2e:      /* CS */              break;
+			case 0x3e:      /* DS */              break;
+			case 0x26:      /* ES */              break;
+			case 0x36:      /* SS */              break;
+			case 0x65:      /* GS */              break;
+			case 0x64:      /* FS */              break;
+			case 0xf2:      /* repnz */       break;
+			case 0xf3:      /* rep */             break;
+			default: pref_done = 1;
+		}
+	} while (!pref_done);
+
+	switch (opcode) {
+
+	/* pushf */
+	case 0x9c:
+		if (data32) {
+			pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
+			SP(regs) -= 4;
+		} else {
+			pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
+			SP(regs) -= 2;
+		}
+		IP(regs) = ip;
+		VM86_FAULT_RETURN;
+
+	/* popf */
+	case 0x9d:
+		{
+		unsigned long newflags;
+		if (data32) {
+			newflags=popl(ssp, sp, simulate_sigsegv);
+			SP(regs) += 4;
+		} else {
+			newflags = popw(ssp, sp, simulate_sigsegv);
+			SP(regs) += 2;
+		}
+		IP(regs) = ip;
+		CHECK_IF_IN_TRAP;
+		if (data32) {
+			set_vflags_long(newflags, regs);
+		} else {
+			set_vflags_short(newflags, regs);
+		}
+		VM86_FAULT_RETURN;
+		}
+
+	/* int xx */
+	case 0xcd: {
+		int intno=popb(csp, ip, simulate_sigsegv);
+		IP(regs) = ip;
+		if (VMPI.vm86dbg_active) {
+			if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] )
+				return_to_32bit(regs, VM86_INTx + (intno << 8));
+		}
+		do_int(regs, intno, ssp, sp);
+		return;
+	}
+
+	/* iret */
+	case 0xcf:
+		{
+		unsigned long newip;
+		unsigned long newcs;
+		unsigned long newflags;
+		if (data32) {
+			newip=popl(ssp, sp, simulate_sigsegv);
+			newcs=popl(ssp, sp, simulate_sigsegv);
+			newflags=popl(ssp, sp, simulate_sigsegv);
+			SP(regs) += 12;
+		} else {
+			newip = popw(ssp, sp, simulate_sigsegv);
+			newcs = popw(ssp, sp, simulate_sigsegv);
+			newflags = popw(ssp, sp, simulate_sigsegv);
+			SP(regs) += 6;
+		}
+		IP(regs) = newip;
+		regs->cs = newcs;
+		CHECK_IF_IN_TRAP;
+		if (data32) {
+			set_vflags_long(newflags, regs);
+		} else {
+			set_vflags_short(newflags, regs);
+		}
+		VM86_FAULT_RETURN;
+		}
+
+	/* cli */
+	case 0xfa:
+		IP(regs) = ip;
+		clear_IF(regs);
+		VM86_FAULT_RETURN;
+
+	/* sti */
+	/*
+	 * Damn. This is incorrect: the 'sti' instruction should actually
+	 * enable interrupts after the /next/ instruction. Not good.
+	 *
+	 * Probably needs some horsing around with the TF flag. Aiee..
+	 */
+	case 0xfb:
+		IP(regs) = ip;
+		set_IF(regs);
+		VM86_FAULT_RETURN;
+
+	default:
+		return_to_32bit(regs, VM86_UNKNOWN);
+	}
+
+	return;
+
+simulate_sigsegv:
+	/* FIXME: After a long discussion with Stas we finally
+	 *        agreed, that this is wrong. Here we should
+	 *        really send a SIGSEGV to the user program.
+	 *        But how do we create the correct context? We
+	 *        are inside a general protection fault handler
+	 *        and has just returned from a page fault handler.
+	 *        The correct context for the signal handler
+	 *        should be a mixture of the two, but how do we
+	 *        get the information? [KD]
+	 */
+	return_to_32bit(regs, VM86_UNKNOWN);
+}
+
+/* ---------------- vm86 special IRQ passing stuff ----------------- */
+
+#define VM86_IRQNAME		"vm86irq"
+
+static struct vm86_irqs {
+	struct task_struct *tsk;
+	int sig;
+} vm86_irqs[16];
+
+static DEFINE_SPINLOCK(irqbits_lock);
+static int irqbits;
+
+#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \
+	| (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO)  | (1 << SIGURG) \
+	| (1 << SIGUNUSED) )
+	
+static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs)
+{
+	int irq_bit;
+	unsigned long flags;
+
+	spin_lock_irqsave(&irqbits_lock, flags);	
+	irq_bit = 1 << intno;
+	if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk)
+		goto out;
+	irqbits |= irq_bit;
+	if (vm86_irqs[intno].sig)
+		send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
+	spin_unlock_irqrestore(&irqbits_lock, flags);
+	/*
+	 * IRQ will be re-enabled when user asks for the irq (whether
+	 * polling or as a result of the signal)
+	 */
+	disable_irq(intno);
+	return IRQ_HANDLED;
+
+out:
+	spin_unlock_irqrestore(&irqbits_lock, flags);	
+	return IRQ_NONE;
+}
+
+static inline void free_vm86_irq(int irqnumber)
+{
+	unsigned long flags;
+
+	free_irq(irqnumber, NULL);
+	vm86_irqs[irqnumber].tsk = NULL;
+
+	spin_lock_irqsave(&irqbits_lock, flags);	
+	irqbits &= ~(1 << irqnumber);
+	spin_unlock_irqrestore(&irqbits_lock, flags);	
+}
+
+void release_vm86_irqs(struct task_struct *task)
+{
+	int i;
+	for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
+	    if (vm86_irqs[i].tsk == task)
+		free_vm86_irq(i);
+}
+
+static inline int get_and_reset_irq(int irqnumber)
+{
+	int bit;
+	unsigned long flags;
+	
+	if (invalid_vm86_irq(irqnumber)) return 0;
+	if (vm86_irqs[irqnumber].tsk != current) return 0;
+	spin_lock_irqsave(&irqbits_lock, flags);	
+	bit = irqbits & (1 << irqnumber);
+	irqbits &= ~bit;
+	spin_unlock_irqrestore(&irqbits_lock, flags);	
+	if (!bit)
+		return 0;
+	enable_irq(irqnumber);
+	return 1;
+}
+
+
+static int do_vm86_irq_handling(int subfunction, int irqnumber)
+{
+	int ret;
+	switch (subfunction) {
+		case VM86_GET_AND_RESET_IRQ: {
+			return get_and_reset_irq(irqnumber);
+		}
+		case VM86_GET_IRQ_BITS: {
+			return irqbits;
+		}
+		case VM86_REQUEST_IRQ: {
+			int sig = irqnumber >> 8;
+			int irq = irqnumber & 255;
+			if (!capable(CAP_SYS_ADMIN)) return -EPERM;
+			if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
+			if (invalid_vm86_irq(irq)) return -EPERM;
+			if (vm86_irqs[irq].tsk) return -EPERM;
+			ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
+			if (ret) return ret;
+			vm86_irqs[irq].sig = sig;
+			vm86_irqs[irq].tsk = current;
+			return irq;
+		}
+		case  VM86_FREE_IRQ: {
+			if (invalid_vm86_irq(irqnumber)) return -EPERM;
+			if (!vm86_irqs[irqnumber].tsk) return 0;
+			if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
+			free_vm86_irq(irqnumber);
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
new file mode 100644
index 00000000000..e0512cc8bea
--- /dev/null
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -0,0 +1,134 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+jiffies = jiffies_64;
+SECTIONS
+{
+  . = __PAGE_OFFSET + 0x100000;
+  /* read-only */
+  _text = .;			/* Text and read-only data */
+  .text : {
+	*(.text)
+	SCHED_TEXT
+	LOCK_TEXT
+	*(.fixup)
+	*(.gnu.warning)
+	} = 0x9090
+
+  _etext = .;			/* End of text section */
+
+  . = ALIGN(16);		/* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  RODATA
+
+  /* writeable */
+  .data : {			/* Data */
+	*(.data)
+	CONSTRUCTORS
+	}
+
+  . = ALIGN(4096);
+  __nosave_begin = .;
+  .data_nosave : { *(.data.nosave) }
+  . = ALIGN(4096);
+  __nosave_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  _edata = .;			/* End of data section */
+
+  . = ALIGN(THREAD_SIZE);	/* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  /* will be freed after init */
+  . = ALIGN(4096);		/* Init code and data */
+  __init_begin = .;
+  .init.text : { 
+	_sinittext = .;
+	*(.init.text)
+	_einittext = .;
+  }
+  .init.data : { *(.init.data) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .init.setup : { *(.init.setup) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : {
+	*(.initcall1.init) 
+	*(.initcall2.init) 
+	*(.initcall3.init) 
+	*(.initcall4.init) 
+	*(.initcall5.init) 
+	*(.initcall6.init) 
+	*(.initcall7.init)
+  }
+  __initcall_end = .;
+  __con_initcall_start = .;
+  .con_initcall.init : { *(.con_initcall.init) }
+  __con_initcall_end = .;
+  SECURITY_INIT
+  . = ALIGN(4);
+  __alt_instructions = .;
+  .altinstructions : { *(.altinstructions) } 
+  __alt_instructions_end = .; 
+ .altinstr_replacement : { *(.altinstr_replacement) } 
+  /* .exit.text is discard at runtime, not link time, to deal with references
+     from .altinstructions and .eh_frame */
+  .exit.text : { *(.exit.text) }
+  .exit.data : { *(.exit.data) }
+  . = ALIGN(4096);
+  __initramfs_start = .;
+  .init.ramfs : { *(.init.ramfs) }
+  __initramfs_end = .;
+  . = ALIGN(32);
+  __per_cpu_start = .;
+  .data.percpu  : { *(.data.percpu) }
+  __per_cpu_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+  /* freed after init ends here */
+	
+  __bss_start = .;		/* BSS */
+  .bss : {
+	*(.bss.page_aligned)
+	*(.bss)
+  }
+  . = ALIGN(4);
+  __bss_stop = .; 
+
+  _end = . ;
+
+  /* This is where the kernel creates the early boot page tables */
+  . = ALIGN(4096);
+  pg0 = .;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+	*(.exitcall.exit)
+	}
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/arch/i386/kernel/vsyscall-int80.S b/arch/i386/kernel/vsyscall-int80.S
new file mode 100644
index 00000000000..530d0525e5e
--- /dev/null
+++ b/arch/i386/kernel/vsyscall-int80.S
@@ -0,0 +1,53 @@
+/*
+ * Code for the vsyscall page.  This version uses the old int $0x80 method.
+ *
+ * NOTE:
+ * 1) __kernel_vsyscall _must_ be first in this page.
+ * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
+ *    for details.
+ */
+
+	.text
+	.globl __kernel_vsyscall
+	.type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+	int $0x80
+	ret
+.LEND_vsyscall:
+	.size __kernel_vsyscall,.-.LSTART_vsyscall
+	.previous
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+	.long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zR"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0x0c		/* DW_CFA_def_cfa */
+	.uleb128 4
+	.uleb128 4
+	.byte 0x88		/* DW_CFA_offset, column 0x8 */
+	.uleb128 1
+	.align 4
+.LENDCIEDLSI:
+	.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+	.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+	.long .LSTART_vsyscall-.	/* PC-relative start address */
+	.long .LEND_vsyscall-.LSTART_vsyscall
+	.uleb128 0
+	.align 4
+.LENDFDEDLSI:
+	.previous
+
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#include "vsyscall-sigreturn.S"
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
new file mode 100644
index 00000000000..c8fcf75b9be
--- /dev/null
+++ b/arch/i386/kernel/vsyscall-sigreturn.S
@@ -0,0 +1,142 @@
+/*
+ * Common code for the sigreturn entry points on the vsyscall page.
+ * So far this code is the same for both int80 and sysenter versions.
+ * This file is #include'd by vsyscall-*.S to define them after the
+ * vsyscall entry point.  The kernel assumes that the addresses of these
+ * routines are constant for all vsyscall implementations.
+ */
+
+#include <asm/unistd.h>
+#include <asm/asm_offsets.h>
+
+
+/* XXX
+   Should these be named "_sigtramp" or something?
+*/
+
+	.text
+	.org	__kernel_vsyscall+32
+	.globl __kernel_sigreturn
+	.type __kernel_sigreturn,@function
+__kernel_sigreturn:
+.LSTART_sigreturn:
+	popl %eax		/* XXX does this mean it needs unwind info? */
+	movl $__NR_sigreturn, %eax
+	int $0x80
+.LEND_sigreturn:
+	.size __kernel_sigreturn,.-.LSTART_sigreturn
+
+	.balign 32
+	.globl __kernel_rt_sigreturn
+	.type __kernel_rt_sigreturn,@function
+__kernel_rt_sigreturn:
+.LSTART_rt_sigreturn:
+	movl $__NR_rt_sigreturn, %eax
+	int $0x80
+.LEND_rt_sigreturn:
+	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+	.previous
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI1:
+	.long .LENDCIEDLSI1-.LSTARTCIEDLSI1
+.LSTARTCIEDLSI1:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zR"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0			/* DW_CFA_nop */
+	.align 4
+.LENDCIEDLSI1:
+	.long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
+.LSTARTFDEDLSI1:
+	.long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
+	/* HACK: The dwarf2 unwind routines will subtract 1 from the
+	   return address to get an address in the middle of the
+	   presumed call instruction.  Since we didn't get here via
+	   a call, we need to include the nop before the real start
+	   to make up for it.  */
+	.long .LSTART_sigreturn-1-.	/* PC-relative start address */
+	.long .LEND_sigreturn-.LSTART_sigreturn+1
+	.uleb128 0			/* Augmentation */
+	/* What follows are the instructions for the table generation.
+	   We record the locations of each register saved.  This is
+	   complicated by the fact that the "CFA" is always assumed to
+	   be the value of the stack pointer in the caller.  This means
+	   that we must define the CFA of this body of code to be the
+	   saved value of the stack pointer in the sigcontext.  Which
+	   also means that there is no fixed relation to the other 
+	   saved registers, which means that we must use DW_CFA_expression
+	   to compute their addresses.  It also means that when we 
+	   adjust the stack with the popl, we have to do it all over again.  */
+
+#define do_cfa_expr(offset)						\
+	.byte 0x0f;			/* DW_CFA_def_cfa_expression */	\
+	.uleb128 1f-0f;			/*   length */			\
+0:	.byte 0x74;			/*     DW_OP_breg4 */		\
+	.sleb128 offset;		/*      offset */		\
+	.byte 0x06;			/*     DW_OP_deref */		\
+1:
+
+#define do_expr(regno, offset)						\
+	.byte 0x10;			/* DW_CFA_expression */		\
+	.uleb128 regno;			/*   regno */			\
+	.uleb128 1f-0f;			/*   length */			\
+0:	.byte 0x74;			/*     DW_OP_breg4 */		\
+	.sleb128 offset;		/*       offset */		\
+1:
+
+	do_cfa_expr(SIGCONTEXT_esp+4)
+	do_expr(0, SIGCONTEXT_eax+4)
+	do_expr(1, SIGCONTEXT_ecx+4)
+	do_expr(2, SIGCONTEXT_edx+4)
+	do_expr(3, SIGCONTEXT_ebx+4)
+	do_expr(5, SIGCONTEXT_ebp+4)
+	do_expr(6, SIGCONTEXT_esi+4)
+	do_expr(7, SIGCONTEXT_edi+4)
+	do_expr(8, SIGCONTEXT_eip+4)
+
+	.byte 0x42	/* DW_CFA_advance_loc 2 -- nop; popl eax. */
+
+	do_cfa_expr(SIGCONTEXT_esp)
+	do_expr(0, SIGCONTEXT_eax)
+	do_expr(1, SIGCONTEXT_ecx)
+	do_expr(2, SIGCONTEXT_edx)
+	do_expr(3, SIGCONTEXT_ebx)
+	do_expr(5, SIGCONTEXT_ebp)
+	do_expr(6, SIGCONTEXT_esi)
+	do_expr(7, SIGCONTEXT_edi)
+	do_expr(8, SIGCONTEXT_eip)
+
+	.align 4
+.LENDFDEDLSI1:
+
+	.long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
+.LSTARTFDEDLSI2:
+	.long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
+	/* HACK: See above wrt unwind library assumptions.  */
+	.long .LSTART_rt_sigreturn-1-.	/* PC-relative start address */
+	.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
+	.uleb128 0			/* Augmentation */
+	/* What follows are the instructions for the table generation.
+	   We record the locations of each register saved.  This is
+	   slightly less complicated than the above, since we don't
+	   modify the stack pointer in the process.  */
+
+	do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp)
+	do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax)
+	do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx)
+	do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx)
+	do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx)
+	do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp)
+	do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi)
+	do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi)
+	do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip)
+
+	.align 4
+.LENDFDEDLSI2:
+	.previous
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S
new file mode 100644
index 00000000000..4daefb2ec1b
--- /dev/null
+++ b/arch/i386/kernel/vsyscall-sysenter.S
@@ -0,0 +1,104 @@
+/*
+ * Code for the vsyscall page.  This version uses the sysenter instruction.
+ *
+ * NOTE:
+ * 1) __kernel_vsyscall _must_ be first in this page.
+ * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
+ *    for details.
+ */
+
+	.text
+	.globl __kernel_vsyscall
+	.type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+	push %ecx
+.Lpush_ecx:
+	push %edx
+.Lpush_edx:
+	push %ebp
+.Lenter_kernel:
+	movl %esp,%ebp
+	sysenter
+
+	/* 7: align return point with nop's to make disassembly easier */
+	.space 7,0x90
+
+	/* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
+	jmp .Lenter_kernel
+	/* 16: System call normal return point is here! */
+	.globl SYSENTER_RETURN	/* Symbol used by entry.S.  */
+SYSENTER_RETURN:
+	pop %ebp
+.Lpop_ebp:
+	pop %edx
+.Lpop_edx:
+	pop %ecx
+.Lpop_ecx:
+	ret
+.LEND_vsyscall:
+	.size __kernel_vsyscall,.-.LSTART_vsyscall
+	.previous
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+	.long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zR"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0x0c		/* DW_CFA_def_cfa */
+	.uleb128 4
+	.uleb128 4
+	.byte 0x88		/* DW_CFA_offset, column 0x8 */
+	.uleb128 1
+	.align 4
+.LENDCIEDLSI:
+	.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+	.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+	.long .LSTART_vsyscall-.	/* PC-relative start address */
+	.long .LEND_vsyscall-.LSTART_vsyscall
+	.uleb128 0
+	/* What follows are the instructions for the table generation.
+	   We have to record all changes of the stack pointer.  */
+	.byte 0x04		/* DW_CFA_advance_loc4 */
+	.long .Lpush_ecx-.LSTART_vsyscall
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x08		/* RA at offset 8 now */
+	.byte 0x04		/* DW_CFA_advance_loc4 */
+	.long .Lpush_edx-.Lpush_ecx
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x0c		/* RA at offset 12 now */
+	.byte 0x04		/* DW_CFA_advance_loc4 */
+	.long .Lenter_kernel-.Lpush_edx
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x10		/* RA at offset 16 now */
+	.byte 0x85, 0x04	/* DW_CFA_offset %ebp -16 */
+	/* Finally the epilogue.  */
+	.byte 0x04		/* DW_CFA_advance_loc4 */
+	.long .Lpop_ebp-.Lenter_kernel
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x0c		/* RA at offset 12 now */
+	.byte 0xc5		/* DW_CFA_restore %ebp */
+	.byte 0x04		/* DW_CFA_advance_loc4 */
+	.long .Lpop_edx-.Lpop_ebp
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x08		/* RA at offset 8 now */
+	.byte 0x04		/* DW_CFA_advance_loc4 */
+	.long .Lpop_ecx-.Lpop_edx
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x04		/* RA at offset 4 now */
+	.align 4
+.LENDFDEDLSI:
+	.previous
+
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#include "vsyscall-sigreturn.S"
diff --git a/arch/i386/kernel/vsyscall.S b/arch/i386/kernel/vsyscall.S
new file mode 100644
index 00000000000..b403890fe39
--- /dev/null
+++ b/arch/i386/kernel/vsyscall.S
@@ -0,0 +1,15 @@
+#include <linux/init.h>
+
+__INITDATA
+
+	.globl vsyscall_int80_start, vsyscall_int80_end
+vsyscall_int80_start:
+	.incbin "arch/i386/kernel/vsyscall-int80.so"
+vsyscall_int80_end:
+
+	.globl vsyscall_sysenter_start, vsyscall_sysenter_end
+vsyscall_sysenter_start:
+	.incbin "arch/i386/kernel/vsyscall-sysenter.so"
+vsyscall_sysenter_end:
+
+__FINIT
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S
new file mode 100644
index 00000000000..3a8329d6536
--- /dev/null
+++ b/arch/i386/kernel/vsyscall.lds.S
@@ -0,0 +1,65 @@
+/*
+ * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
+ * object prelinked to its virtual address, and with only one read-only
+ * segment (that fits in one page).  This script controls its layout.
+ */
+#include <asm/asm_offsets.h>
+
+SECTIONS
+{
+  . = VSYSCALL_BASE + SIZEOF_HEADERS;
+
+  .hash           : { *(.hash) }		:text
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+
+  /* This linker script is used both with -r and with -shared.
+     For the layouts to match, we need to skip more than enough
+     space for the dynamic symbol table et al.  If this amount
+     is insufficient, ld -shared will barf.  Just increase it here.  */
+  . = VSYSCALL_BASE + 0x400;
+
+  .text           : { *(.text) }		:text =0x90909090
+
+  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text :eh_frame_hdr
+  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
+  .dynamic        : { *(.dynamic) }		:text :dynamic
+  .useless        : {
+  	*(.got.plt) *(.got)
+	*(.data .data.* .gnu.linkonce.d.*)
+	*(.dynbss)
+	*(.bss .bss.* .gnu.linkonce.b.*)
+  }						:text
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  LINUX_2.5 {
+    global:
+    	__kernel_vsyscall;
+    	__kernel_sigreturn;
+    	__kernel_rt_sigreturn;
+
+    local: *;
+  };
+}
+
+/* The ELF entry point can be used to set the AT_SYSINFO value.  */
+ENTRY(__kernel_vsyscall);
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
new file mode 100644
index 00000000000..7b1932d20f9
--- /dev/null
+++ b/arch/i386/lib/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for i386-specific library files..
+#
+
+
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
+	bitops.o
+
+lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
+lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
diff --git a/arch/i386/lib/bitops.c b/arch/i386/lib/bitops.c
new file mode 100644
index 00000000000..97db3853dc8
--- /dev/null
+++ b/arch/i386/lib/bitops.c
@@ -0,0 +1,70 @@
+#include <linux/bitops.h>
+#include <linux/module.h>
+
+/**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+int find_next_bit(const unsigned long *addr, int size, int offset)
+{
+	const unsigned long *p = addr + (offset >> 5);
+	int set = 0, bit = offset & 31, res;
+
+	if (bit) {
+		/*
+		 * Look for nonzero in the first 32 bits:
+		 */
+		__asm__("bsfl %1,%0\n\t"
+			"jne 1f\n\t"
+			"movl $32, %0\n"
+			"1:"
+			: "=r" (set)
+			: "r" (*p >> bit));
+		if (set < (32 - bit))
+			return set + offset;
+		set = 32 - bit;
+		p++;
+	}
+	/*
+	 * No set bit yet, search remaining full words for a bit
+	 */
+	res = find_first_bit (p, size - 32 * (p - addr));
+	return (offset + set + res);
+}
+EXPORT_SYMBOL(find_next_bit);
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+int find_next_zero_bit(const unsigned long *addr, int size, int offset)
+{
+	unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+	int set = 0, bit = offset & 31, res;
+
+	if (bit) {
+		/*
+		 * Look for zero in the first 32 bits.
+		 */
+		__asm__("bsfl %1,%0\n\t"
+			"jne 1f\n\t"
+			"movl $32, %0\n"
+			"1:"
+			: "=r" (set)
+			: "r" (~(*p >> bit)));
+		if (set < (32 - bit))
+			return set + offset;
+		set = 32 - bit;
+		p++;
+	}
+	/*
+	 * No zero yet, search remaining full bytes for a zero
+	 */
+	res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
+	return (offset + set + res);
+}
+EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S
new file mode 100644
index 00000000000..94c7867ddc3
--- /dev/null
+++ b/arch/i386/lib/checksum.S
@@ -0,0 +1,496 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IP/TCP/UDP checksumming routines
+ *
+ * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Tom May, <ftom@netcom.com>
+ *              Pentium Pro/II routines:
+ *              Alexander Kjeldaas <astor@guardian.no>
+ *              Finn Arne Gangstad <finnag@guardian.no>
+ *		Lots of code moved from tcp.c and ip.c; see those files
+ *		for more names.
+ *
+ * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ *			     handling.
+ *		Andi Kleen,  add zeroing on error
+ *                   converted to pure assembler
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/errno.h>
+				
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+
+/*	
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+		
+.text
+.align 4
+.globl csum_partial								
+		
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+	  /*		
+	   * Experiments with Ethernet and SLIP connections show that buff
+	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
+	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+	   * alignment for the unrolled loop.
+	   */		
+csum_partial:	
+	pushl %esi
+	pushl %ebx
+	movl 20(%esp),%eax	# Function arg: unsigned int sum
+	movl 16(%esp),%ecx	# Function arg: int len
+	movl 12(%esp),%esi	# Function arg: unsigned char *buff
+	testl $3, %esi		# Check alignment.
+	jz 2f			# Jump if alignment is ok.
+	testl $1, %esi		# Check alignment.
+	jz 10f			# Jump if alignment is boundary of 2bytes.
+
+	# buf is odd
+	dec %ecx
+	jl 8f
+	movzbl (%esi), %ebx
+	adcl %ebx, %eax
+	roll $8, %eax
+	inc %esi
+	testl $2, %esi
+	jz 2f
+10:
+	subl $2, %ecx		# Alignment uses up two bytes.
+	jae 1f			# Jump if we had at least two bytes.
+	addl $2, %ecx		# ecx was < 2.  Deal with it.
+	jmp 4f
+1:	movw (%esi), %bx
+	addl $2, %esi
+	addw %bx, %ax
+	adcl $0, %eax
+2:
+	movl %ecx, %edx
+	shrl $5, %ecx
+	jz 2f
+	testl %esi, %esi
+1:	movl (%esi), %ebx
+	adcl %ebx, %eax
+	movl 4(%esi), %ebx
+	adcl %ebx, %eax
+	movl 8(%esi), %ebx
+	adcl %ebx, %eax
+	movl 12(%esi), %ebx
+	adcl %ebx, %eax
+	movl 16(%esi), %ebx
+	adcl %ebx, %eax
+	movl 20(%esi), %ebx
+	adcl %ebx, %eax
+	movl 24(%esi), %ebx
+	adcl %ebx, %eax
+	movl 28(%esi), %ebx
+	adcl %ebx, %eax
+	lea 32(%esi), %esi
+	dec %ecx
+	jne 1b
+	adcl $0, %eax
+2:	movl %edx, %ecx
+	andl $0x1c, %edx
+	je 4f
+	shrl $2, %edx		# This clears CF
+3:	adcl (%esi), %eax
+	lea 4(%esi), %esi
+	dec %edx
+	jne 3b
+	adcl $0, %eax
+4:	andl $3, %ecx
+	jz 7f
+	cmpl $2, %ecx
+	jb 5f
+	movw (%esi),%cx
+	leal 2(%esi),%esi
+	je 6f
+	shll $16,%ecx
+5:	movb (%esi),%cl
+6:	addl %ecx,%eax
+	adcl $0, %eax 
+7:	
+	testl $1, 12(%esp)
+	jz 8f
+	roll $8, %eax
+8:
+	popl %ebx
+	popl %esi
+	ret
+
+#else
+
+/* Version for PentiumII/PPro */
+
+csum_partial:
+	pushl %esi
+	pushl %ebx
+	movl 20(%esp),%eax	# Function arg: unsigned int sum
+	movl 16(%esp),%ecx	# Function arg: int len
+	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
+
+	testl $3, %esi         
+	jnz 25f                 
+10:
+	movl %ecx, %edx
+	movl %ecx, %ebx
+	andl $0x7c, %ebx
+	shrl $7, %ecx
+	addl %ebx,%esi
+	shrl $2, %ebx  
+	negl %ebx
+	lea 45f(%ebx,%ebx,2), %ebx
+	testl %esi, %esi
+	jmp *%ebx
+
+	# Handle 2-byte-aligned regions
+20:	addw (%esi), %ax
+	lea 2(%esi), %esi
+	adcl $0, %eax
+	jmp 10b
+25:
+	testl $1, %esi         
+	jz 30f                 
+	# buf is odd
+	dec %ecx
+	jl 90f
+	movzbl (%esi), %ebx
+	addl %ebx, %eax
+	adcl $0, %eax
+	roll $8, %eax
+	inc %esi
+	testl $2, %esi
+	jz 10b
+
+30:	subl $2, %ecx          
+	ja 20b                 
+	je 32f
+	addl $2, %ecx
+	jz 80f
+	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
+	addl %ebx, %eax
+	adcl $0, %eax
+	jmp 80f
+32:
+	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
+	adcl $0, %eax
+	jmp 80f
+
+40: 
+	addl -128(%esi), %eax
+	adcl -124(%esi), %eax
+	adcl -120(%esi), %eax
+	adcl -116(%esi), %eax   
+	adcl -112(%esi), %eax   
+	adcl -108(%esi), %eax
+	adcl -104(%esi), %eax
+	adcl -100(%esi), %eax
+	adcl -96(%esi), %eax
+	adcl -92(%esi), %eax
+	adcl -88(%esi), %eax
+	adcl -84(%esi), %eax
+	adcl -80(%esi), %eax
+	adcl -76(%esi), %eax
+	adcl -72(%esi), %eax
+	adcl -68(%esi), %eax
+	adcl -64(%esi), %eax     
+	adcl -60(%esi), %eax     
+	adcl -56(%esi), %eax     
+	adcl -52(%esi), %eax   
+	adcl -48(%esi), %eax   
+	adcl -44(%esi), %eax
+	adcl -40(%esi), %eax
+	adcl -36(%esi), %eax
+	adcl -32(%esi), %eax
+	adcl -28(%esi), %eax
+	adcl -24(%esi), %eax
+	adcl -20(%esi), %eax
+	adcl -16(%esi), %eax
+	adcl -12(%esi), %eax
+	adcl -8(%esi), %eax
+	adcl -4(%esi), %eax
+45:
+	lea 128(%esi), %esi
+	adcl $0, %eax
+	dec %ecx
+	jge 40b
+	movl %edx, %ecx
+50:	andl $3, %ecx
+	jz 80f
+
+	# Handle the last 1-3 bytes without jumping
+	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
+	movl $0xffffff,%ebx	# by the shll and shrl instructions
+	shll $3,%ecx
+	shrl %cl,%ebx
+	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
+	addl %ebx,%eax
+	adcl $0,%eax
+80: 
+	testl $1, 12(%esp)
+	jz 90f
+	roll $8, %eax
+90: 
+	popl %ebx
+	popl %esi
+	ret
+				
+#endif
+
+/*
+unsigned int csum_partial_copy_generic (const char *src, char *dst,
+				  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ */ 
+
+/*
+ * Copy from ds while checksumming, otherwise like csum_partial
+ *
+ * The macros SRC and DST specify the type of access for the instruction.
+ * thus we can call a custom exception handler for all access types.
+ *
+ * FIXME: could someone double-check whether I haven't mixed up some SRC and
+ *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
+ *	  them all but there's no guarantee.
+ */
+
+#define SRC(y...)			\
+	9999: y;			\
+	.section __ex_table, "a";	\
+	.long 9999b, 6001f	;	\
+	.previous
+
+#define DST(y...)			\
+	9999: y;			\
+	.section __ex_table, "a";	\
+	.long 9999b, 6002f	;	\
+	.previous
+
+.align 4
+.globl csum_partial_copy_generic
+				
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+#define ARGBASE 16		
+#define FP		12
+		
+csum_partial_copy_generic:
+	subl  $4,%esp	
+	pushl %edi
+	pushl %esi
+	pushl %ebx
+	movl ARGBASE+16(%esp),%eax	# sum
+	movl ARGBASE+12(%esp),%ecx	# len
+	movl ARGBASE+4(%esp),%esi	# src
+	movl ARGBASE+8(%esp),%edi	# dst
+
+	testl $2, %edi			# Check alignment. 
+	jz 2f				# Jump if alignment is ok.
+	subl $2, %ecx			# Alignment uses up two bytes.
+	jae 1f				# Jump if we had at least two bytes.
+	addl $2, %ecx			# ecx was < 2.  Deal with it.
+	jmp 4f
+SRC(1:	movw (%esi), %bx	)
+	addl $2, %esi
+DST(	movw %bx, (%edi)	)
+	addl $2, %edi
+	addw %bx, %ax	
+	adcl $0, %eax
+2:
+	movl %ecx, FP(%esp)
+	shrl $5, %ecx
+	jz 2f
+	testl %esi, %esi
+SRC(1:	movl (%esi), %ebx	)
+SRC(	movl 4(%esi), %edx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, (%edi)	)
+	adcl %edx, %eax
+DST(	movl %edx, 4(%edi)	)
+
+SRC(	movl 8(%esi), %ebx	)
+SRC(	movl 12(%esi), %edx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, 8(%edi)	)
+	adcl %edx, %eax
+DST(	movl %edx, 12(%edi)	)
+
+SRC(	movl 16(%esi), %ebx 	)
+SRC(	movl 20(%esi), %edx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, 16(%edi)	)
+	adcl %edx, %eax
+DST(	movl %edx, 20(%edi)	)
+
+SRC(	movl 24(%esi), %ebx	)
+SRC(	movl 28(%esi), %edx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, 24(%edi)	)
+	adcl %edx, %eax
+DST(	movl %edx, 28(%edi)	)
+
+	lea 32(%esi), %esi
+	lea 32(%edi), %edi
+	dec %ecx
+	jne 1b
+	adcl $0, %eax
+2:	movl FP(%esp), %edx
+	movl %edx, %ecx
+	andl $0x1c, %edx
+	je 4f
+	shrl $2, %edx			# This clears CF
+SRC(3:	movl (%esi), %ebx	)
+	adcl %ebx, %eax
+DST(	movl %ebx, (%edi)	)
+	lea 4(%esi), %esi
+	lea 4(%edi), %edi
+	dec %edx
+	jne 3b
+	adcl $0, %eax
+4:	andl $3, %ecx
+	jz 7f
+	cmpl $2, %ecx
+	jb 5f
+SRC(	movw (%esi), %cx	)
+	leal 2(%esi), %esi
+DST(	movw %cx, (%edi)	)
+	leal 2(%edi), %edi
+	je 6f
+	shll $16,%ecx
+SRC(5:	movb (%esi), %cl	)
+DST(	movb %cl, (%edi)	)
+6:	addl %ecx, %eax
+	adcl $0, %eax
+7:
+5000:
+
+# Exception handler:
+.section .fixup, "ax"							
+
+6001:
+	movl ARGBASE+20(%esp), %ebx	# src_err_ptr
+	movl $-EFAULT, (%ebx)
+
+	# zero the complete destination - computing the rest
+	# is too much work 
+	movl ARGBASE+8(%esp), %edi	# dst
+	movl ARGBASE+12(%esp), %ecx	# len
+	xorl %eax,%eax
+	rep ; stosb
+
+	jmp 5000b
+
+6002:
+	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
+	movl $-EFAULT,(%ebx)
+	jmp 5000b
+
+.previous
+
+	popl %ebx
+	popl %esi
+	popl %edi
+	popl %ecx			# equivalent to addl $4,%esp
+	ret	
+
+#else
+
+/* Version for PentiumII/PPro */
+
+#define ROUND1(x) \
+	SRC(movl x(%esi), %ebx	)	;	\
+	addl %ebx, %eax			;	\
+	DST(movl %ebx, x(%edi)	)	; 
+
+#define ROUND(x) \
+	SRC(movl x(%esi), %ebx	)	;	\
+	adcl %ebx, %eax			;	\
+	DST(movl %ebx, x(%edi)	)	;
+
+#define ARGBASE 12
+		
+csum_partial_copy_generic:
+	pushl %ebx
+	pushl %edi
+	pushl %esi
+	movl ARGBASE+4(%esp),%esi	#src
+	movl ARGBASE+8(%esp),%edi	#dst	
+	movl ARGBASE+12(%esp),%ecx	#len
+	movl ARGBASE+16(%esp),%eax	#sum
+#	movl %ecx, %edx  
+	movl %ecx, %ebx  
+	movl %esi, %edx
+	shrl $6, %ecx     
+	andl $0x3c, %ebx  
+	negl %ebx
+	subl %ebx, %esi  
+	subl %ebx, %edi  
+	lea  -1(%esi),%edx
+	andl $-32,%edx
+	lea 3f(%ebx,%ebx), %ebx
+	testl %esi, %esi 
+	jmp *%ebx
+1:	addl $64,%esi
+	addl $64,%edi 
+	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
+	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)	
+	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)	
+	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)	
+	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)	
+3:	adcl $0,%eax
+	addl $64, %edx
+	dec %ecx
+	jge 1b
+4:	movl ARGBASE+12(%esp),%edx	#len
+	andl $3, %edx
+	jz 7f
+	cmpl $2, %edx
+	jb 5f
+SRC(	movw (%esi), %dx         )
+	leal 2(%esi), %esi
+DST(	movw %dx, (%edi)         )
+	leal 2(%edi), %edi
+	je 6f
+	shll $16,%edx
+5:
+SRC(	movb (%esi), %dl         )
+DST(	movb %dl, (%edi)         )
+6:	addl %edx, %eax
+	adcl $0, %eax
+7:
+.section .fixup, "ax"
+6001:	movl	ARGBASE+20(%esp), %ebx	# src_err_ptr	
+	movl $-EFAULT, (%ebx)
+	# zero the complete destination (computing the rest is too much work)
+	movl ARGBASE+8(%esp),%edi	# dst
+	movl ARGBASE+12(%esp),%ecx	# len
+	xorl %eax,%eax
+	rep; stosb
+	jmp 7b
+6002:	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
+	movl $-EFAULT, (%ebx)
+	jmp  7b			
+.previous				
+
+	popl %esi
+	popl %edi
+	popl %ebx
+	ret
+				
+#undef ROUND
+#undef ROUND1		
+		
+#endif
diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c
new file mode 100644
index 00000000000..ab43394dc77
--- /dev/null
+++ b/arch/i386/lib/dec_and_lock.c
@@ -0,0 +1,40 @@
+/*
+ * x86 version of "atomic_dec_and_lock()" using
+ * the atomic "cmpxchg" instruction.
+ *
+ * (For CPU's lacking cmpxchg, we use the slow
+ * generic version, and this one never even gets
+ * compiled).
+ */
+
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+	int counter;
+	int newcount;
+
+repeat:
+	counter = atomic_read(atomic);
+	newcount = counter-1;
+
+	if (!newcount)
+		goto slow_path;
+
+	asm volatile("lock; cmpxchgl %1,%2"
+		:"=a" (newcount)
+		:"r" (newcount), "m" (atomic->counter), "0" (counter));
+
+	/* If the above failed, "eax" will have changed */
+	if (newcount != counter)
+		goto repeat;
+	return 0;
+
+slow_path:
+	spin_lock(lock);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	spin_unlock(lock);
+	return 0;
+}
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
new file mode 100644
index 00000000000..080639f262b
--- /dev/null
+++ b/arch/i386/lib/delay.c
@@ -0,0 +1,49 @@
+/*
+ *	Precise Delay Loops for i386
+ *
+ *	Copyright (C) 1993 Linus Torvalds
+ *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *	The __delay function must _NOT_ be inlined as its execution time
+ *	depends wildly on alignment on many x86 processors. The additional
+ *	jump magic is needed to get the timing stable on all the CPU's
+ *	we have to worry about.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/timer.h>
+
+#ifdef CONFIG_SMP
+#include <asm/smp.h>
+#endif
+
+extern struct timer_opts* timer;
+
+void __delay(unsigned long loops)
+{
+	cur_timer->delay(loops);
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+	int d0;
+	xloops *= 4;
+	__asm__("mull %0"
+		:"=d" (xloops), "=&a" (d0)
+		:"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4)));
+        __delay(++xloops);
+}
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x000010c7);  /* 2**32 / 1000000 (rounded up) */
+}
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x00005);  /* 2**32 / 1000000000 (rounded up) */
+}
diff --git a/arch/i386/lib/getuser.S b/arch/i386/lib/getuser.S
new file mode 100644
index 00000000000..62d7f178a32
--- /dev/null
+++ b/arch/i386/lib/getuser.S
@@ -0,0 +1,70 @@
+/*
+ * __get_user functions.
+ *
+ * (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __get_user_X
+ *
+ * Inputs:	%eax contains the address
+ *
+ * Outputs:	%eax is error code (0 or -EFAULT)
+ *		%edx contains zero-extended value
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+.text
+.align 4
+.globl __get_user_1
+__get_user_1:
+	GET_THREAD_INFO(%edx)
+	cmpl TI_addr_limit(%edx),%eax
+	jae bad_get_user
+1:	movzbl (%eax),%edx
+	xorl %eax,%eax
+	ret
+
+.align 4
+.globl __get_user_2
+__get_user_2:
+	addl $1,%eax
+	jc bad_get_user
+	GET_THREAD_INFO(%edx)
+	cmpl TI_addr_limit(%edx),%eax
+	jae bad_get_user
+2:	movzwl -1(%eax),%edx
+	xorl %eax,%eax
+	ret
+
+.align 4
+.globl __get_user_4
+__get_user_4:
+	addl $3,%eax
+	jc bad_get_user
+	GET_THREAD_INFO(%edx)
+	cmpl TI_addr_limit(%edx),%eax
+	jae bad_get_user
+3:	movl -3(%eax),%edx
+	xorl %eax,%eax
+	ret
+
+bad_get_user:
+	xorl %edx,%edx
+	movl $-14,%eax
+	ret
+
+.section __ex_table,"a"
+	.long 1b,bad_get_user
+	.long 2b,bad_get_user
+	.long 3b,bad_get_user
+.previous
diff --git a/arch/i386/lib/memcpy.c b/arch/i386/lib/memcpy.c
new file mode 100644
index 00000000000..891b2359d18
--- /dev/null
+++ b/arch/i386/lib/memcpy.c
@@ -0,0 +1,44 @@
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+#undef memcpy
+#undef memset
+
+void *memcpy(void *to, const void *from, size_t n)
+{
+#ifdef CONFIG_X86_USE_3DNOW
+	return __memcpy3d(to, from, n);
+#else
+	return __memcpy(to, from, n);
+#endif
+}
+EXPORT_SYMBOL(memcpy);
+
+void *memset(void *s, int c, size_t count)
+{
+	return __memset(s, c, count);
+}
+EXPORT_SYMBOL(memset);
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+	int d0, d1, d2;
+
+	if (dest < src) {
+		memcpy(dest,src,n);
+	} else {
+		__asm__ __volatile__(
+			"std\n\t"
+			"rep\n\t"
+			"movsb\n\t"
+			"cld"
+			: "=&c" (d0), "=&S" (d1), "=&D" (d2)
+			:"0" (n),
+			 "1" (n-1+(const char *)src),
+			 "2" (n-1+(char *)dest)
+			:"memory");
+	}
+	return dest;
+}
+EXPORT_SYMBOL(memmove);
diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c
new file mode 100644
index 00000000000..01f8b1a2cc8
--- /dev/null
+++ b/arch/i386/lib/mmx.c
@@ -0,0 +1,399 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+
+#include <asm/i387.h>
+
+
+/*
+ *	MMX 3DNow! library helper functions
+ *
+ *	To do:
+ *	We can use MMX just for prefetch in IRQ's. This may be a win. 
+ *		(reported so on K6-III)
+ *	We should use a better code neutral filler for the short jump
+ *		leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
+ *	We also want to clobber the filler register so we don't get any
+ *		register forwarding stalls on the filler. 
+ *
+ *	Add *user handling. Checksums are not a win with MMX on any CPU
+ *	tested so far for any MMX solution figured.
+ *
+ *	22/09/2000 - Arjan van de Ven 
+ *		Improved for non-egineering-sample Athlons 
+ *
+ */
+ 
+void *_mmx_memcpy(void *to, const void *from, size_t len)
+{
+	void *p;
+	int i;
+
+	if (unlikely(in_interrupt()))
+		return __memcpy(to, from, len);
+
+	p = to;
+	i = len >> 6; /* len/64 */
+
+	kernel_fpu_begin();
+
+	__asm__ __volatile__ (
+		"1: prefetch (%0)\n"		/* This set is 28 bytes */
+		"   prefetch 64(%0)\n"
+		"   prefetch 128(%0)\n"
+		"   prefetch 192(%0)\n"
+		"   prefetch 256(%0)\n"
+		"2:  \n"
+		".section .fixup, \"ax\"\n"
+		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+		"   jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 1b, 3b\n"
+		".previous"
+		: : "r" (from) );
+		
+	
+	for(; i>5; i--)
+	{
+		__asm__ __volatile__ (
+		"1:  prefetch 320(%0)\n"
+		"2:  movq (%0), %%mm0\n"
+		"  movq 8(%0), %%mm1\n"
+		"  movq 16(%0), %%mm2\n"
+		"  movq 24(%0), %%mm3\n"
+		"  movq %%mm0, (%1)\n"
+		"  movq %%mm1, 8(%1)\n"
+		"  movq %%mm2, 16(%1)\n"
+		"  movq %%mm3, 24(%1)\n"
+		"  movq 32(%0), %%mm0\n"
+		"  movq 40(%0), %%mm1\n"
+		"  movq 48(%0), %%mm2\n"
+		"  movq 56(%0), %%mm3\n"
+		"  movq %%mm0, 32(%1)\n"
+		"  movq %%mm1, 40(%1)\n"
+		"  movq %%mm2, 48(%1)\n"
+		"  movq %%mm3, 56(%1)\n"
+		".section .fixup, \"ax\"\n"
+		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+		"   jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 1b, 3b\n"
+		".previous"
+		: : "r" (from), "r" (to) : "memory");
+		from+=64;
+		to+=64;
+	}
+
+	for(; i>0; i--)
+	{
+		__asm__ __volatile__ (
+		"  movq (%0), %%mm0\n"
+		"  movq 8(%0), %%mm1\n"
+		"  movq 16(%0), %%mm2\n"
+		"  movq 24(%0), %%mm3\n"
+		"  movq %%mm0, (%1)\n"
+		"  movq %%mm1, 8(%1)\n"
+		"  movq %%mm2, 16(%1)\n"
+		"  movq %%mm3, 24(%1)\n"
+		"  movq 32(%0), %%mm0\n"
+		"  movq 40(%0), %%mm1\n"
+		"  movq 48(%0), %%mm2\n"
+		"  movq 56(%0), %%mm3\n"
+		"  movq %%mm0, 32(%1)\n"
+		"  movq %%mm1, 40(%1)\n"
+		"  movq %%mm2, 48(%1)\n"
+		"  movq %%mm3, 56(%1)\n"
+		: : "r" (from), "r" (to) : "memory");
+		from+=64;
+		to+=64;
+	}
+	/*
+	 *	Now do the tail of the block
+	 */
+	__memcpy(to, from, len&63);
+	kernel_fpu_end();
+	return p;
+}
+
+#ifdef CONFIG_MK7
+
+/*
+ *	The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
+ *	other MMX using processors do not.
+ */
+
+static void fast_clear_page(void *page)
+{
+	int i;
+
+	kernel_fpu_begin();
+	
+	__asm__ __volatile__ (
+		"  pxor %%mm0, %%mm0\n" : :
+	);
+
+	for(i=0;i<4096/64;i++)
+	{
+		__asm__ __volatile__ (
+		"  movntq %%mm0, (%0)\n"
+		"  movntq %%mm0, 8(%0)\n"
+		"  movntq %%mm0, 16(%0)\n"
+		"  movntq %%mm0, 24(%0)\n"
+		"  movntq %%mm0, 32(%0)\n"
+		"  movntq %%mm0, 40(%0)\n"
+		"  movntq %%mm0, 48(%0)\n"
+		"  movntq %%mm0, 56(%0)\n"
+		: : "r" (page) : "memory");
+		page+=64;
+	}
+	/* since movntq is weakly-ordered, a "sfence" is needed to become
+	 * ordered again.
+	 */
+	__asm__ __volatile__ (
+		"  sfence \n" : :
+	);
+	kernel_fpu_end();
+}
+
+static void fast_copy_page(void *to, void *from)
+{
+	int i;
+
+	kernel_fpu_begin();
+
+	/* maybe the prefetch stuff can go before the expensive fnsave...
+	 * but that is for later. -AV
+	 */
+	__asm__ __volatile__ (
+		"1: prefetch (%0)\n"
+		"   prefetch 64(%0)\n"
+		"   prefetch 128(%0)\n"
+		"   prefetch 192(%0)\n"
+		"   prefetch 256(%0)\n"
+		"2:  \n"
+		".section .fixup, \"ax\"\n"
+		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+		"   jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 1b, 3b\n"
+		".previous"
+		: : "r" (from) );
+
+	for(i=0; i<(4096-320)/64; i++)
+	{
+		__asm__ __volatile__ (
+		"1: prefetch 320(%0)\n"
+		"2: movq (%0), %%mm0\n"
+		"   movntq %%mm0, (%1)\n"
+		"   movq 8(%0), %%mm1\n"
+		"   movntq %%mm1, 8(%1)\n"
+		"   movq 16(%0), %%mm2\n"
+		"   movntq %%mm2, 16(%1)\n"
+		"   movq 24(%0), %%mm3\n"
+		"   movntq %%mm3, 24(%1)\n"
+		"   movq 32(%0), %%mm4\n"
+		"   movntq %%mm4, 32(%1)\n"
+		"   movq 40(%0), %%mm5\n"
+		"   movntq %%mm5, 40(%1)\n"
+		"   movq 48(%0), %%mm6\n"
+		"   movntq %%mm6, 48(%1)\n"
+		"   movq 56(%0), %%mm7\n"
+		"   movntq %%mm7, 56(%1)\n"
+		".section .fixup, \"ax\"\n"
+		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+		"   jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 1b, 3b\n"
+		".previous"
+		: : "r" (from), "r" (to) : "memory");
+		from+=64;
+		to+=64;
+	}
+	for(i=(4096-320)/64; i<4096/64; i++)
+	{
+		__asm__ __volatile__ (
+		"2: movq (%0), %%mm0\n"
+		"   movntq %%mm0, (%1)\n"
+		"   movq 8(%0), %%mm1\n"
+		"   movntq %%mm1, 8(%1)\n"
+		"   movq 16(%0), %%mm2\n"
+		"   movntq %%mm2, 16(%1)\n"
+		"   movq 24(%0), %%mm3\n"
+		"   movntq %%mm3, 24(%1)\n"
+		"   movq 32(%0), %%mm4\n"
+		"   movntq %%mm4, 32(%1)\n"
+		"   movq 40(%0), %%mm5\n"
+		"   movntq %%mm5, 40(%1)\n"
+		"   movq 48(%0), %%mm6\n"
+		"   movntq %%mm6, 48(%1)\n"
+		"   movq 56(%0), %%mm7\n"
+		"   movntq %%mm7, 56(%1)\n"
+		: : "r" (from), "r" (to) : "memory");
+		from+=64;
+		to+=64;
+	}
+	/* since movntq is weakly-ordered, a "sfence" is needed to become
+	 * ordered again.
+	 */
+	__asm__ __volatile__ (
+		"  sfence \n" : :
+	);
+	kernel_fpu_end();
+}
+
+#else
+
+/*
+ *	Generic MMX implementation without K7 specific streaming
+ */
+ 
+static void fast_clear_page(void *page)
+{
+	int i;
+	
+	kernel_fpu_begin();
+	
+	__asm__ __volatile__ (
+		"  pxor %%mm0, %%mm0\n" : :
+	);
+
+	for(i=0;i<4096/128;i++)
+	{
+		__asm__ __volatile__ (
+		"  movq %%mm0, (%0)\n"
+		"  movq %%mm0, 8(%0)\n"
+		"  movq %%mm0, 16(%0)\n"
+		"  movq %%mm0, 24(%0)\n"
+		"  movq %%mm0, 32(%0)\n"
+		"  movq %%mm0, 40(%0)\n"
+		"  movq %%mm0, 48(%0)\n"
+		"  movq %%mm0, 56(%0)\n"
+		"  movq %%mm0, 64(%0)\n"
+		"  movq %%mm0, 72(%0)\n"
+		"  movq %%mm0, 80(%0)\n"
+		"  movq %%mm0, 88(%0)\n"
+		"  movq %%mm0, 96(%0)\n"
+		"  movq %%mm0, 104(%0)\n"
+		"  movq %%mm0, 112(%0)\n"
+		"  movq %%mm0, 120(%0)\n"
+		: : "r" (page) : "memory");
+		page+=128;
+	}
+
+	kernel_fpu_end();
+}
+
+static void fast_copy_page(void *to, void *from)
+{
+	int i;
+	
+	
+	kernel_fpu_begin();
+
+	__asm__ __volatile__ (
+		"1: prefetch (%0)\n"
+		"   prefetch 64(%0)\n"
+		"   prefetch 128(%0)\n"
+		"   prefetch 192(%0)\n"
+		"   prefetch 256(%0)\n"
+		"2:  \n"
+		".section .fixup, \"ax\"\n"
+		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+		"   jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 1b, 3b\n"
+		".previous"
+		: : "r" (from) );
+
+	for(i=0; i<4096/64; i++)
+	{
+		__asm__ __volatile__ (
+		"1: prefetch 320(%0)\n"
+		"2: movq (%0), %%mm0\n"
+		"   movq 8(%0), %%mm1\n"
+		"   movq 16(%0), %%mm2\n"
+		"   movq 24(%0), %%mm3\n"
+		"   movq %%mm0, (%1)\n"
+		"   movq %%mm1, 8(%1)\n"
+		"   movq %%mm2, 16(%1)\n"
+		"   movq %%mm3, 24(%1)\n"
+		"   movq 32(%0), %%mm0\n"
+		"   movq 40(%0), %%mm1\n"
+		"   movq 48(%0), %%mm2\n"
+		"   movq 56(%0), %%mm3\n"
+		"   movq %%mm0, 32(%1)\n"
+		"   movq %%mm1, 40(%1)\n"
+		"   movq %%mm2, 48(%1)\n"
+		"   movq %%mm3, 56(%1)\n"
+		".section .fixup, \"ax\"\n"
+		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+		"   jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 1b, 3b\n"
+		".previous"
+		: : "r" (from), "r" (to) : "memory");
+		from+=64;
+		to+=64;
+	}
+	kernel_fpu_end();
+}
+
+
+#endif
+
+/*
+ *	Favour MMX for page clear and copy. 
+ */
+
+static void slow_zero_page(void * page)
+{
+	int d0, d1;
+	__asm__ __volatile__( \
+		"cld\n\t" \
+		"rep ; stosl" \
+		: "=&c" (d0), "=&D" (d1)
+		:"a" (0),"1" (page),"0" (1024)
+		:"memory");
+}
+ 
+void mmx_clear_page(void * page)
+{
+	if(unlikely(in_interrupt()))
+		slow_zero_page(page);
+	else
+		fast_clear_page(page);
+}
+
+static void slow_copy_page(void *to, void *from)
+{
+	int d0, d1, d2;
+	__asm__ __volatile__( \
+		"cld\n\t" \
+		"rep ; movsl" \
+		: "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+		: "0" (1024),"1" ((long) to),"2" ((long) from) \
+		: "memory");
+}
+  
+
+void mmx_copy_page(void *to, void *from)
+{
+	if(unlikely(in_interrupt()))
+		slow_copy_page(to, from);
+	else
+		fast_copy_page(to, from);
+}
diff --git a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
new file mode 100644
index 00000000000..a32d9f570f4
--- /dev/null
+++ b/arch/i386/lib/putuser.S
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs:	%eax[:%edx] contains the data
+ *		%ecx contains the address
+ *
+ * Outputs:	%eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER	pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT	popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+	ENTER
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+1:	movb %al,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $1,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+2:	movw %ax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $3,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+3:	movl %eax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $7,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+4:	movl %eax,(%ecx)
+5:	movl %edx,4(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+bad_put_user:
+	movl $-14,%eax
+	EXIT
+
+.section __ex_table,"a"
+	.long 1b,bad_put_user
+	.long 2b,bad_put_user
+	.long 3b,bad_put_user
+	.long 4b,bad_put_user
+	.long 5b,bad_put_user
+.previous
diff --git a/arch/i386/lib/strstr.c b/arch/i386/lib/strstr.c
new file mode 100644
index 00000000000..a3dafbf59da
--- /dev/null
+++ b/arch/i386/lib/strstr.c
@@ -0,0 +1,31 @@
+#include <linux/string.h>
+
+char * strstr(const char * cs,const char * ct)
+{
+int	d0, d1;
+register char * __res;
+__asm__ __volatile__(
+	"movl %6,%%edi\n\t"
+	"repne\n\t"
+	"scasb\n\t"
+	"notl %%ecx\n\t"
+	"decl %%ecx\n\t"	/* NOTE! This also sets Z if searchstring='' */
+	"movl %%ecx,%%edx\n"
+	"1:\tmovl %6,%%edi\n\t"
+	"movl %%esi,%%eax\n\t"
+	"movl %%edx,%%ecx\n\t"
+	"repe\n\t"
+	"cmpsb\n\t"
+	"je 2f\n\t"		/* also works for empty string, see above */
+	"xchgl %%eax,%%esi\n\t"
+	"incl %%esi\n\t"
+	"cmpb $0,-1(%%eax)\n\t"
+	"jne 1b\n\t"
+	"xorl %%eax,%%eax\n\t"
+	"2:"
+	:"=a" (__res), "=&c" (d0), "=&S" (d1)
+	:"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
+	:"dx", "di");
+return __res;
+}
+
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
new file mode 100644
index 00000000000..51aa2bbb026
--- /dev/null
+++ b/arch/i386/lib/usercopy.c
@@ -0,0 +1,636 @@
+/* 
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/mmx.h>
+
+static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
+{
+#ifdef CONFIG_X86_INTEL_USERCOPY
+	if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask))
+		return 0;
+#endif
+	return 1;
+}
+#define movsl_is_ok(a1,a2,n) \
+	__movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n))
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res)			   \
+do {									   \
+	int __d0, __d1, __d2;						   \
+	might_sleep();							   \
+	__asm__ __volatile__(						   \
+		"	testl %1,%1\n"					   \
+		"	jz 2f\n"					   \
+		"0:	lodsb\n"					   \
+		"	stosb\n"					   \
+		"	testb %%al,%%al\n"				   \
+		"	jz 1f\n"					   \
+		"	decl %1\n"					   \
+		"	jnz 0b\n"					   \
+		"1:	subl %1,%0\n"					   \
+		"2:\n"							   \
+		".section .fixup,\"ax\"\n"				   \
+		"3:	movl %5,%0\n"					   \
+		"	jmp 2b\n"					   \
+		".previous\n"						   \
+		".section __ex_table,\"a\"\n"				   \
+		"	.align 4\n"					   \
+		"	.long 0b,3b\n"					   \
+		".previous"						   \
+		: "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),	   \
+		  "=&D" (__d2)						   \
+		: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+		: "memory");						   \
+} while (0)
+
+/**
+ * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @src:   Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ * 
+ * Copies a NUL-terminated string from userspace to kernel space.
+ * Caller must check the specified block with access_ok() before calling
+ * this function.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+long
+__strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	long res;
+	__do_strncpy_from_user(dst, src, count, res);
+	return res;
+}
+
+/**
+ * strncpy_from_user: - Copy a NUL terminated string from userspace.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @src:   Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ * 
+ * Copies a NUL-terminated string from userspace to kernel space.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+long
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	long res = -EFAULT;
+	if (access_ok(VERIFY_READ, src, 1))
+		__do_strncpy_from_user(dst, src, count, res);
+	return res;
+}
+
+
+/*
+ * Zero Userspace
+ */
+
+#define __do_clear_user(addr,size)					\
+do {									\
+	int __d0;							\
+	might_sleep();							\
+  	__asm__ __volatile__(						\
+		"0:	rep; stosl\n"					\
+		"	movl %2,%0\n"					\
+		"1:	rep; stosb\n"					\
+		"2:\n"							\
+		".section .fixup,\"ax\"\n"				\
+		"3:	lea 0(%2,%0,4),%0\n"				\
+		"	jmp 2b\n"					\
+		".previous\n"						\
+		".section __ex_table,\"a\"\n"				\
+		"	.align 4\n"					\
+		"	.long 0b,3b\n"					\
+		"	.long 1b,2b\n"					\
+		".previous"						\
+		: "=&c"(size), "=&D" (__d0)				\
+		: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));	\
+} while (0)
+
+/**
+ * clear_user: - Zero a block of memory in user space.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+unsigned long
+clear_user(void __user *to, unsigned long n)
+{
+	might_sleep();
+	if (access_ok(VERIFY_WRITE, to, n))
+		__do_clear_user(to, n);
+	return n;
+}
+
+/**
+ * __clear_user: - Zero a block of memory in user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+unsigned long
+__clear_user(void __user *to, unsigned long n)
+{
+	__do_clear_user(to, n);
+	return n;
+}
+
+/**
+ * strlen_user: - Get the size of a string in user space.
+ * @s: The string to measure.
+ * @n: The maximum valid length
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ * If the string is too long, returns a value greater than @n.
+ */
+long strnlen_user(const char __user *s, long n)
+{
+	unsigned long mask = -__addr_ok(s);
+	unsigned long res, tmp;
+
+	might_sleep();
+
+	__asm__ __volatile__(
+		"	testl %0, %0\n"
+		"	jz 3f\n"
+		"	andl %0,%%ecx\n"
+		"0:	repne; scasb\n"
+		"	setne %%al\n"
+		"	subl %%ecx,%0\n"
+		"	addl %0,%%eax\n"
+		"1:\n"
+		".section .fixup,\"ax\"\n"
+		"2:	xorl %%eax,%%eax\n"
+		"	jmp 1b\n"
+		"3:	movb $1,%%al\n"
+		"	jmp 1b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 0b,2b\n"
+		".previous"
+		:"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+		:"0" (n), "1" (s), "2" (0), "3" (mask)
+		:"cc");
+	return res & mask;
+}
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+static unsigned long
+__copy_user_intel(void __user *to, const void *from, unsigned long size)
+{
+	int d0, d1;
+	__asm__ __volatile__(
+		       "       .align 2,0x90\n"
+		       "1:     movl 32(%4), %%eax\n"
+		       "       cmpl $67, %0\n"
+		       "       jbe 3f\n"
+		       "2:     movl 64(%4), %%eax\n"
+		       "       .align 2,0x90\n"
+		       "3:     movl 0(%4), %%eax\n"
+		       "4:     movl 4(%4), %%edx\n"
+		       "5:     movl %%eax, 0(%3)\n"
+		       "6:     movl %%edx, 4(%3)\n"
+		       "7:     movl 8(%4), %%eax\n"
+		       "8:     movl 12(%4),%%edx\n"
+		       "9:     movl %%eax, 8(%3)\n"
+		       "10:    movl %%edx, 12(%3)\n"
+		       "11:    movl 16(%4), %%eax\n"
+		       "12:    movl 20(%4), %%edx\n"
+		       "13:    movl %%eax, 16(%3)\n"
+		       "14:    movl %%edx, 20(%3)\n"
+		       "15:    movl 24(%4), %%eax\n"
+		       "16:    movl 28(%4), %%edx\n"
+		       "17:    movl %%eax, 24(%3)\n"
+		       "18:    movl %%edx, 28(%3)\n"
+		       "19:    movl 32(%4), %%eax\n"
+		       "20:    movl 36(%4), %%edx\n"
+		       "21:    movl %%eax, 32(%3)\n"
+		       "22:    movl %%edx, 36(%3)\n"
+		       "23:    movl 40(%4), %%eax\n"
+		       "24:    movl 44(%4), %%edx\n"
+		       "25:    movl %%eax, 40(%3)\n"
+		       "26:    movl %%edx, 44(%3)\n"
+		       "27:    movl 48(%4), %%eax\n"
+		       "28:    movl 52(%4), %%edx\n"
+		       "29:    movl %%eax, 48(%3)\n"
+		       "30:    movl %%edx, 52(%3)\n"
+		       "31:    movl 56(%4), %%eax\n"
+		       "32:    movl 60(%4), %%edx\n"
+		       "33:    movl %%eax, 56(%3)\n"
+		       "34:    movl %%edx, 60(%3)\n"
+		       "       addl $-64, %0\n"
+		       "       addl $64, %4\n"
+		       "       addl $64, %3\n"
+		       "       cmpl $63, %0\n"
+		       "       ja  1b\n"
+		       "35:    movl  %0, %%eax\n"
+		       "       shrl  $2, %0\n"
+		       "       andl  $3, %%eax\n"
+		       "       cld\n"
+		       "99:    rep; movsl\n"
+		       "36:    movl %%eax, %0\n"
+		       "37:    rep; movsb\n"
+		       "100:\n"
+		       ".section .fixup,\"ax\"\n"
+		       "101:   lea 0(%%eax,%0,4),%0\n"
+		       "       jmp 100b\n"
+		       ".previous\n"
+		       ".section __ex_table,\"a\"\n"
+		       "       .align 4\n"
+		       "       .long 1b,100b\n"
+		       "       .long 2b,100b\n"
+		       "       .long 3b,100b\n"
+		       "       .long 4b,100b\n"
+		       "       .long 5b,100b\n"
+		       "       .long 6b,100b\n"
+		       "       .long 7b,100b\n"
+		       "       .long 8b,100b\n"
+		       "       .long 9b,100b\n"
+		       "       .long 10b,100b\n"
+		       "       .long 11b,100b\n"
+		       "       .long 12b,100b\n"
+		       "       .long 13b,100b\n"
+		       "       .long 14b,100b\n"
+		       "       .long 15b,100b\n"
+		       "       .long 16b,100b\n"
+		       "       .long 17b,100b\n"
+		       "       .long 18b,100b\n"
+		       "       .long 19b,100b\n"
+		       "       .long 20b,100b\n"
+		       "       .long 21b,100b\n"
+		       "       .long 22b,100b\n"
+		       "       .long 23b,100b\n"
+		       "       .long 24b,100b\n"
+		       "       .long 25b,100b\n"
+		       "       .long 26b,100b\n"
+		       "       .long 27b,100b\n"
+		       "       .long 28b,100b\n"
+		       "       .long 29b,100b\n"
+		       "       .long 30b,100b\n"
+		       "       .long 31b,100b\n"
+		       "       .long 32b,100b\n"
+		       "       .long 33b,100b\n"
+		       "       .long 34b,100b\n"
+		       "       .long 35b,100b\n"
+		       "       .long 36b,100b\n"
+		       "       .long 37b,100b\n"
+		       "       .long 99b,101b\n"
+		       ".previous"
+		       : "=&c"(size), "=&D" (d0), "=&S" (d1)
+		       :  "1"(to), "2"(from), "0"(size)
+		       : "eax", "edx", "memory");
+	return size;
+}
+
+static unsigned long
+__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
+{
+	int d0, d1;
+	__asm__ __volatile__(
+		       "        .align 2,0x90\n"
+		       "0:      movl 32(%4), %%eax\n"
+		       "        cmpl $67, %0\n"      
+		       "        jbe 2f\n"            
+		       "1:      movl 64(%4), %%eax\n"
+		       "        .align 2,0x90\n"     
+		       "2:      movl 0(%4), %%eax\n" 
+		       "21:     movl 4(%4), %%edx\n" 
+		       "        movl %%eax, 0(%3)\n" 
+		       "        movl %%edx, 4(%3)\n" 
+		       "3:      movl 8(%4), %%eax\n" 
+		       "31:     movl 12(%4),%%edx\n" 
+		       "        movl %%eax, 8(%3)\n" 
+		       "        movl %%edx, 12(%3)\n"
+		       "4:      movl 16(%4), %%eax\n"
+		       "41:     movl 20(%4), %%edx\n"
+		       "        movl %%eax, 16(%3)\n"
+		       "        movl %%edx, 20(%3)\n"
+		       "10:     movl 24(%4), %%eax\n"
+		       "51:     movl 28(%4), %%edx\n"
+		       "        movl %%eax, 24(%3)\n"
+		       "        movl %%edx, 28(%3)\n"
+		       "11:     movl 32(%4), %%eax\n"
+		       "61:     movl 36(%4), %%edx\n"
+		       "        movl %%eax, 32(%3)\n"
+		       "        movl %%edx, 36(%3)\n"
+		       "12:     movl 40(%4), %%eax\n"
+		       "71:     movl 44(%4), %%edx\n"
+		       "        movl %%eax, 40(%3)\n"
+		       "        movl %%edx, 44(%3)\n"
+		       "13:     movl 48(%4), %%eax\n"
+		       "81:     movl 52(%4), %%edx\n"
+		       "        movl %%eax, 48(%3)\n"
+		       "        movl %%edx, 52(%3)\n"
+		       "14:     movl 56(%4), %%eax\n"
+		       "91:     movl 60(%4), %%edx\n"
+		       "        movl %%eax, 56(%3)\n"
+		       "        movl %%edx, 60(%3)\n"
+		       "        addl $-64, %0\n"     
+		       "        addl $64, %4\n"      
+		       "        addl $64, %3\n"      
+		       "        cmpl $63, %0\n"      
+		       "        ja  0b\n"            
+		       "5:      movl  %0, %%eax\n"   
+		       "        shrl  $2, %0\n"      
+		       "        andl $3, %%eax\n"    
+		       "        cld\n"               
+		       "6:      rep; movsl\n"   
+		       "        movl %%eax,%0\n"
+		       "7:      rep; movsb\n"	
+		       "8:\n"			
+		       ".section .fixup,\"ax\"\n"
+		       "9:      lea 0(%%eax,%0,4),%0\n"	
+		       "16:     pushl %0\n"	
+		       "        pushl %%eax\n"	
+		       "        xorl %%eax,%%eax\n"
+		       "        rep; stosb\n"	
+		       "        popl %%eax\n"	
+		       "        popl %0\n"	
+		       "        jmp 8b\n"	
+		       ".previous\n"		
+		       ".section __ex_table,\"a\"\n"
+		       "	.align 4\n"	   
+		       "	.long 0b,16b\n"	 
+		       "	.long 1b,16b\n"
+		       "	.long 2b,16b\n"
+		       "	.long 21b,16b\n"
+		       "	.long 3b,16b\n"	
+		       "	.long 31b,16b\n"
+		       "	.long 4b,16b\n"	
+		       "	.long 41b,16b\n"
+		       "	.long 10b,16b\n"
+		       "	.long 51b,16b\n"
+		       "	.long 11b,16b\n"
+		       "	.long 61b,16b\n"
+		       "	.long 12b,16b\n"
+		       "	.long 71b,16b\n"
+		       "	.long 13b,16b\n"
+		       "	.long 81b,16b\n"
+		       "	.long 14b,16b\n"
+		       "	.long 91b,16b\n"
+		       "	.long 6b,9b\n"	
+		       "        .long 7b,16b\n" 
+		       ".previous"		
+		       : "=&c"(size), "=&D" (d0), "=&S" (d1)
+		       :  "1"(to), "2"(from), "0"(size)
+		       : "eax", "edx", "memory");
+	return size;
+}
+#else
+/*
+ * Leave these declared but undefined.  They should not be any references to
+ * them
+ */
+unsigned long
+__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size);
+unsigned long
+__copy_user_intel(void __user *to, const void *from, unsigned long size);
+#endif /* CONFIG_X86_INTEL_USERCOPY */
+
+/* Generic arbitrary sized copy.  */
+#define __copy_user(to,from,size)					\
+do {									\
+	int __d0, __d1, __d2;						\
+	__asm__ __volatile__(						\
+		"	cmp  $7,%0\n"					\
+		"	jbe  1f\n"					\
+		"	movl %1,%0\n"					\
+		"	negl %0\n"					\
+		"	andl $7,%0\n"					\
+		"	subl %0,%3\n"					\
+		"4:	rep; movsb\n"					\
+		"	movl %3,%0\n"					\
+		"	shrl $2,%0\n"					\
+		"	andl $3,%3\n"					\
+		"	.align 2,0x90\n"				\
+		"0:	rep; movsl\n"					\
+		"	movl %3,%0\n"					\
+		"1:	rep; movsb\n"					\
+		"2:\n"							\
+		".section .fixup,\"ax\"\n"				\
+		"5:	addl %3,%0\n"					\
+		"	jmp 2b\n"					\
+		"3:	lea 0(%3,%0,4),%0\n"				\
+		"	jmp 2b\n"					\
+		".previous\n"						\
+		".section __ex_table,\"a\"\n"				\
+		"	.align 4\n"					\
+		"	.long 4b,5b\n"					\
+		"	.long 0b,3b\n"					\
+		"	.long 1b,2b\n"					\
+		".previous"						\
+		: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2)	\
+		: "3"(size), "0"(size), "1"(to), "2"(from)		\
+		: "memory");						\
+} while (0)
+
+#define __copy_user_zeroing(to,from,size)				\
+do {									\
+	int __d0, __d1, __d2;						\
+	__asm__ __volatile__(						\
+		"	cmp  $7,%0\n"					\
+		"	jbe  1f\n"					\
+		"	movl %1,%0\n"					\
+		"	negl %0\n"					\
+		"	andl $7,%0\n"					\
+		"	subl %0,%3\n"					\
+		"4:	rep; movsb\n"					\
+		"	movl %3,%0\n"					\
+		"	shrl $2,%0\n"					\
+		"	andl $3,%3\n"					\
+		"	.align 2,0x90\n"				\
+		"0:	rep; movsl\n"					\
+		"	movl %3,%0\n"					\
+		"1:	rep; movsb\n"					\
+		"2:\n"							\
+		".section .fixup,\"ax\"\n"				\
+		"5:	addl %3,%0\n"					\
+		"	jmp 6f\n"					\
+		"3:	lea 0(%3,%0,4),%0\n"				\
+		"6:	pushl %0\n"					\
+		"	pushl %%eax\n"					\
+		"	xorl %%eax,%%eax\n"				\
+		"	rep; stosb\n"					\
+		"	popl %%eax\n"					\
+		"	popl %0\n"					\
+		"	jmp 2b\n"					\
+		".previous\n"						\
+		".section __ex_table,\"a\"\n"				\
+		"	.align 4\n"					\
+		"	.long 4b,5b\n"					\
+		"	.long 0b,3b\n"					\
+		"	.long 1b,6b\n"					\
+		".previous"						\
+		: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2)	\
+		: "3"(size), "0"(size), "1"(to), "2"(from)		\
+		: "memory");						\
+} while (0)
+
+
+unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
+{
+	BUG_ON((long) n < 0);
+#ifndef CONFIG_X86_WP_WORKS_OK
+	if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
+			((unsigned long )to) < TASK_SIZE) {
+		/* 
+		 * CPU does not honor the WP bit when writing
+		 * from supervisory mode, and due to preemption or SMP,
+		 * the page tables can change at any time.
+		 * Do it manually.	Manfred <manfred@colorfullife.com>
+		 */
+		while (n) {
+		      	unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
+			unsigned long len = PAGE_SIZE - offset;
+			int retval;
+			struct page *pg;
+			void *maddr;
+			
+			if (len > n)
+				len = n;
+
+survive:
+			down_read(&current->mm->mmap_sem);
+			retval = get_user_pages(current, current->mm,
+					(unsigned long )to, 1, 1, 0, &pg, NULL);
+
+			if (retval == -ENOMEM && current->pid == 1) {
+				up_read(&current->mm->mmap_sem);
+				blk_congestion_wait(WRITE, HZ/50);
+				goto survive;
+			}
+
+			if (retval != 1) {
+				up_read(&current->mm->mmap_sem);
+		       		break;
+		       	}
+
+			maddr = kmap_atomic(pg, KM_USER0);
+			memcpy(maddr + offset, from, len);
+			kunmap_atomic(maddr, KM_USER0);
+			set_page_dirty_lock(pg);
+			put_page(pg);
+			up_read(&current->mm->mmap_sem);
+
+			from += len;
+			to += len;
+			n -= len;
+		}
+		return n;
+	}
+#endif
+	if (movsl_is_ok(to, from, n))
+		__copy_user(to, from, n);
+	else
+		n = __copy_user_intel(to, from, n);
+	return n;
+}
+
+unsigned long
+__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
+{
+	BUG_ON((long)n < 0);
+	if (movsl_is_ok(to, from, n))
+		__copy_user_zeroing(to, from, n);
+	else
+		n = __copy_user_zeroing_intel(to, from, n);
+	return n;
+}
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	might_sleep();
+	BUG_ON((long) n < 0);
+	if (access_ok(VERIFY_WRITE, to, n))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+EXPORT_SYMBOL(copy_to_user);
+
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	might_sleep();
+	BUG_ON((long) n < 0);
+	if (access_ok(VERIFY_READ, from, n))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+EXPORT_SYMBOL(copy_from_user);
diff --git a/arch/i386/mach-default/Makefile b/arch/i386/mach-default/Makefile
new file mode 100644
index 00000000000..e95bb023792
--- /dev/null
+++ b/arch/i386/mach-default/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y				:= setup.o topology.o
diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
new file mode 100644
index 00000000000..0aa08eaa893
--- /dev/null
+++ b/arch/i386/mach-default/setup.c
@@ -0,0 +1,106 @@
+/*
+ *	Machine specific setup for generic
+ */
+
+#include <linux/config.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/arch_hooks.h>
+
+/**
+ * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ *	Perform any necessary interrupt initialisation prior to setting up
+ *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+ *	interrupts should be initialised here if the machine emulates a PC
+ *	in any way.
+ **/
+void __init pre_intr_init_hook(void)
+{
+	init_ISA_irqs();
+}
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
+
+/**
+ * intr_init_hook - post gate setup interrupt initialisation
+ *
+ * Description:
+ *	Fill in any interrupts that may have been left out by the general
+ *	init_IRQ() routine.  interrupts having to do with the machine rather
+ *	than the devices on the I/O bus (like APIC interrupts in intel MP
+ *	systems) are started here.
+ **/
+void __init intr_init_hook(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	apic_intr_init();
+#endif
+
+	if (!acpi_ioapic)
+		setup_irq(2, &irq2);
+}
+
+/**
+ * pre_setup_arch_hook - hook called prior to any setup_arch() execution
+ *
+ * Description:
+ *	generally used to activate any machine specific identification
+ *	routines that may be needed before setup_arch() runs.  On VISWS
+ *	this is used to get the board revision and type.
+ **/
+void __init pre_setup_arch_hook(void)
+{
+}
+
+/**
+ * trap_init_hook - initialise system specific traps
+ *
+ * Description:
+ *	Called as the final act of trap_init().  Used in VISWS to initialise
+ *	the various board specific APIC traps.
+ **/
+void __init trap_init_hook(void)
+{
+}
+
+static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};
+
+/**
+ * time_init_hook - do any specific initialisations for the system timer.
+ *
+ * Description:
+ *	Must plug the system timer interrupt source at HZ into the IRQ listed
+ *	in irq_vectors.h:TIMER_IRQ
+ **/
+void __init time_init_hook(void)
+{
+	setup_irq(0, &irq0);
+}
+
+#ifdef CONFIG_MCA
+/**
+ * mca_nmi_hook - hook into MCA specific NMI chain
+ *
+ * Description:
+ *	The MCA (Microchannel Arcitecture) has an NMI chain for NMI sources
+ *	along the MCA bus.  Use this to hook into that chain if you will need
+ *	it.
+ **/
+void __init mca_nmi_hook(void)
+{
+	/* If I recall correctly, there's a whole bunch of other things that
+	 * we can do to check for NMI problems, but that's all I know about
+	 * at the moment.
+	 */
+
+	printk("NMI generated from unknown source!\n");
+}
+#endif
diff --git a/arch/i386/mach-default/topology.c b/arch/i386/mach-default/topology.c
new file mode 100644
index 00000000000..5b3e8817dae
--- /dev/null
+++ b/arch/i386/mach-default/topology.c
@@ -0,0 +1,98 @@
+/*
+ * arch/i386/mach-generic/topology.c - Populate driverfs with topology information
+ *
+ * Written by: Matthew Dobson, IBM Corporation
+ * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/nodemask.h>
+#include <asm/cpu.h>
+
+static struct i386_cpu cpu_devices[NR_CPUS];
+
+int arch_register_cpu(int num){
+	struct node *parent = NULL;
+	
+#ifdef CONFIG_NUMA
+	int node = cpu_to_node(num);
+	if (node_online(node))
+		parent = &node_devices[node].node;
+#endif /* CONFIG_NUMA */
+
+	return register_cpu(&cpu_devices[num].cpu, num, parent);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void arch_unregister_cpu(int num) {
+	struct node *parent = NULL;
+
+#ifdef CONFIG_NUMA
+	int node = cpu_to_node(num);
+	if (node_online(node))
+		parent = &node_devices[node].node;
+#endif /* CONFIG_NUMA */
+
+	return unregister_cpu(&cpu_devices[num].cpu, parent);
+}
+EXPORT_SYMBOL(arch_register_cpu);
+EXPORT_SYMBOL(arch_unregister_cpu);
+#endif /*CONFIG_HOTPLUG_CPU*/
+
+
+
+#ifdef CONFIG_NUMA
+#include <linux/mmzone.h>
+#include <asm/node.h>
+
+struct i386_node node_devices[MAX_NUMNODES];
+
+static int __init topology_init(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		if (node_online(i))
+			arch_register_node(i);
+	}
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_possible(i)) arch_register_cpu(i);
+	return 0;
+}
+
+#else /* !CONFIG_NUMA */
+
+static int __init topology_init(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_possible(i)) arch_register_cpu(i);
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
+subsys_initcall(topology_init);
diff --git a/arch/i386/mach-es7000/Makefile b/arch/i386/mach-es7000/Makefile
new file mode 100644
index 00000000000..69dd4da218d
--- /dev/null
+++ b/arch/i386/mach-es7000/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-$(CONFIG_X86_ES7000)	:= es7000plat.o
+obj-$(CONFIG_X86_GENERICARCH)	:= es7000plat.o
diff --git a/arch/i386/mach-es7000/es7000.h b/arch/i386/mach-es7000/es7000.h
new file mode 100644
index 00000000000..70691f0c4ce
--- /dev/null
+++ b/arch/i386/mach-es7000/es7000.h
@@ -0,0 +1,110 @@
+/*
+ * Written by: Garry Forsgren, Unisys Corporation
+ *             Natalie Protasevich, Unisys Corporation
+ * This file contains the code to configure and interface 
+ * with Unisys ES7000 series hardware system manager.
+ *
+ * Copyright (c) 2003 Unisys Corporation.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Unisys Corporation, Township Line & Union Meeting 
+ * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
+ *
+ * http://www.unisys.com
+ */
+
+#define	MIP_REG			1
+#define	MIP_PSAI_REG		4
+
+#define	MIP_BUSY		1
+#define	MIP_SPIN		0xf0000
+#define	MIP_VALID		0x0100000000000000ULL
+#define	MIP_PORT(VALUE)	((VALUE >> 32) & 0xffff)
+
+#define	MIP_RD_LO(VALUE)	(VALUE & 0xffffffff)   
+
+struct mip_reg_info {
+	unsigned long long mip_info;
+	unsigned long long delivery_info;
+	unsigned long long host_reg;
+	unsigned long long mip_reg;
+};
+
+struct part_info {
+	unsigned char type;   
+	unsigned char length;
+	unsigned char part_id;
+	unsigned char apic_mode;
+	unsigned long snum;    
+	char ptype[16];
+	char sname[64];
+	char pname[64];
+};
+
+struct psai {
+	unsigned long long entry_type;
+	unsigned long long addr;
+	unsigned long long bep_addr;
+};
+
+struct es7000_mem_info {
+	unsigned char type;   
+	unsigned char length;
+	unsigned char resv[6];
+	unsigned long long  start; 
+	unsigned long long  size; 
+};
+
+struct es7000_oem_table {
+	unsigned long long hdr;
+	struct mip_reg_info mip;
+	struct part_info pif;
+	struct es7000_mem_info shm;
+	struct psai psai;
+};
+
+struct acpi_table_sdt {
+	unsigned long pa;
+	unsigned long count;
+	struct {
+		unsigned long pa;
+		enum acpi_table_id id;
+		unsigned long size;
+	}	entry[50];
+};
+
+struct oem_table {
+	struct acpi_table_header Header;
+	u32 OEMTableAddr;
+	u32 OEMTableSize;
+};
+
+struct mip_reg {
+	unsigned long long off_0;
+	unsigned long long off_8;
+	unsigned long long off_10;
+	unsigned long long off_18;
+	unsigned long long off_20;
+	unsigned long long off_28;
+	unsigned long long off_30;
+	unsigned long long off_38;
+};
+
+#define	MIP_SW_APIC		0x1020b
+#define	MIP_FUNC(VALUE) 	(VALUE & 0xff)
+
+extern int parse_unisys_oem (char *oemptr, int oem_entries);
+extern int find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length);
+extern int es7000_start_cpu(int cpu, unsigned long eip);
+extern void es7000_sw_apic(void);
diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c
new file mode 100644
index 00000000000..d5936d50047
--- /dev/null
+++ b/arch/i386/mach-es7000/es7000plat.c
@@ -0,0 +1,316 @@
+/*
+ * Written by: Garry Forsgren, Unisys Corporation
+ *             Natalie Protasevich, Unisys Corporation
+ * This file contains the code to configure and interface
+ * with Unisys ES7000 series hardware system manager.
+ *
+ * Copyright (c) 2003 Unisys Corporation.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Unisys Corporation, Township Line & Union Meeting
+ * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
+ *
+ * http://www.unisys.com
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <asm/io.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/apicdef.h>
+#include "es7000.h"
+
+/*
+ * ES7000 Globals
+ */
+
+volatile unsigned long	*psai = NULL;
+struct mip_reg		*mip_reg;
+struct mip_reg		*host_reg;
+int 			mip_port;
+unsigned long		mip_addr, host_addr;
+
+#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT))
+
+/*
+ * GSI override for ES7000 platforms.
+ */
+
+static unsigned int base;
+
+static int
+es7000_rename_gsi(int ioapic, int gsi)
+{
+	if (!base) {
+		int i;
+		for (i = 0; i < nr_ioapics; i++)
+			base += nr_ioapic_registers[i];
+	}
+
+	if (!ioapic && (gsi < 16)) 
+		gsi += base;
+	return gsi;
+}
+
+#endif // (CONFIG_X86_IO_APIC) && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)
+
+/*
+ * Parse the OEM Table
+ */
+
+int __init
+parse_unisys_oem (char *oemptr, int oem_entries)
+{
+	int                     i;
+	int 			success = 0;
+	unsigned char           type, size;
+	unsigned long           val;
+	char                    *tp = NULL;
+	struct psai             *psaip = NULL;
+	struct mip_reg_info 	*mi;
+	struct mip_reg		*host, *mip;
+
+	tp = oemptr;
+
+	tp += 8;
+
+	for (i=0; i <= oem_entries; i++) {
+		type = *tp++;
+		size = *tp++;
+		tp -= 2;
+		switch (type) {
+		case MIP_REG:
+			mi = (struct mip_reg_info *)tp;
+			val = MIP_RD_LO(mi->host_reg);
+			host_addr = val;
+			host = (struct mip_reg *)val;
+			host_reg = __va(host);
+			val = MIP_RD_LO(mi->mip_reg);
+			mip_port = MIP_PORT(mi->mip_info);
+			mip_addr = val;
+			mip = (struct mip_reg *)val;
+			mip_reg = __va(mip);
+			Dprintk("es7000_mipcfg: host_reg = 0x%lx \n",
+				(unsigned long)host_reg);
+			Dprintk("es7000_mipcfg: mip_reg = 0x%lx \n",
+				(unsigned long)mip_reg);
+			success++;
+			break;
+		case MIP_PSAI_REG:
+			psaip = (struct psai *)tp;
+			if (tp != NULL) {
+				if (psaip->addr)
+					psai = __va(psaip->addr);
+				else
+					psai = NULL;
+				success++;
+			}
+			break;
+		default:
+			break;
+		}
+		if (i == 6) break;
+		tp += size;
+	}
+
+	if (success < 2) {
+		es7000_plat = 0;
+	} else {
+		printk("\nEnabling ES7000 specific features...\n");
+		/*
+		 * Determine the generation of the ES7000 currently running.
+		 *
+		 * es7000_plat = 0 if the machine is NOT a Unisys ES7000 box
+		 * es7000_plat = 1 if the machine is a 5xx ES7000 box
+		 * es7000_plat = 2 if the machine is a x86_64 ES7000 box
+		 *
+		 */
+		if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
+			es7000_plat = 2;
+		else
+			es7000_plat = 1;
+
+		ioapic_renumber_irq = es7000_rename_gsi;
+	}
+	return es7000_plat;
+}
+
+int __init
+find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length)
+{
+	struct acpi_table_rsdp		*rsdp = NULL;
+	unsigned long			rsdp_phys = 0;
+	struct acpi_table_header 	*header = NULL;
+	int				i;
+	struct acpi_table_sdt		sdt;
+
+	rsdp_phys = acpi_find_rsdp();
+	rsdp = __va(rsdp_phys);
+	if (rsdp->rsdt_address) {
+		struct acpi_table_rsdt	*mapped_rsdt = NULL;
+		sdt.pa = rsdp->rsdt_address;
+
+		header = (struct acpi_table_header *)
+			__acpi_map_table(sdt.pa, sizeof(struct acpi_table_header));
+		if (!header)
+			return -ENODEV;
+
+		sdt.count = (header->length - sizeof(struct acpi_table_header)) >> 3;
+		mapped_rsdt = (struct acpi_table_rsdt *)
+			__acpi_map_table(sdt.pa, header->length);
+		if (!mapped_rsdt)
+			return -ENODEV;
+
+		header = &mapped_rsdt->header;
+
+		for (i = 0; i < sdt.count; i++)
+			sdt.entry[i].pa = (unsigned long) mapped_rsdt->entry[i];
+	};
+	for (i = 0; i < sdt.count; i++) {
+
+		header = (struct acpi_table_header *)
+			__acpi_map_table(sdt.entry[i].pa,
+				sizeof(struct acpi_table_header));
+		if (!header)
+			continue;
+		if (!strncmp((char *) &header->signature, "OEM1", 4)) {
+			if (!strncmp((char *) &header->oem_id, "UNISYS", 6)) {
+				void *addr;
+				struct oem_table *t;
+				acpi_table_print(header, sdt.entry[i].pa);
+				t = (struct oem_table *) __acpi_map_table(sdt.entry[i].pa, header->length);
+				addr = (void *) __acpi_map_table(t->OEMTableAddr, t->OEMTableSize);
+				*length = header->length;
+				*oem_addr = (unsigned long) addr;
+				return 0;
+			}
+		}
+	}
+	Dprintk("ES7000: did not find Unisys ACPI OEM table!\n");
+	return -1;
+}
+
+static void
+es7000_spin(int n)
+{
+	int i = 0;
+
+	while (i++ < n)
+		rep_nop();
+}
+
+static int __init
+es7000_mip_write(struct mip_reg *mip_reg)
+{
+	int			status = 0;
+	int			spin;
+
+	spin = MIP_SPIN;
+	while (((unsigned long long)host_reg->off_38 &
+		(unsigned long long)MIP_VALID) != 0) {
+			if (--spin <= 0) {
+				printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
+				return -1;
+			}
+		es7000_spin(MIP_SPIN);
+	}
+
+	memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
+	outb(1, mip_port);
+
+	spin = MIP_SPIN;
+
+	while (((unsigned long long)mip_reg->off_38 &
+		(unsigned long long)MIP_VALID) == 0) {
+		if (--spin <= 0) {
+			printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
+			return -1;
+		}
+		es7000_spin(MIP_SPIN);
+	}
+
+	status = ((unsigned long long)mip_reg->off_0 &
+		(unsigned long long)0xffff0000000000ULL) >> 48;
+	mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
+		(unsigned long long)~MIP_VALID);
+	return status;
+}
+
+int
+es7000_start_cpu(int cpu, unsigned long eip)
+{
+	unsigned long vect = 0, psaival = 0;
+
+	if (psai == NULL)
+		return -1;
+
+	vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+	psaival = (0x1000000 | vect | cpu);
+
+	while (*psai & 0x1000000)
+                ;
+
+	*psai = psaival;
+
+	return 0;
+
+}
+
+int
+es7000_stop_cpu(int cpu)
+{
+	int startup;
+
+	if (psai == NULL)
+		return -1;
+
+	startup= (0x1000000 | cpu);
+
+	while ((*psai & 0xff00ffff) != startup)
+		;
+
+	startup = (*psai & 0xff0000) >> 16;
+	*psai &= 0xffffff;
+
+	return 0;
+
+}
+
+void __init
+es7000_sw_apic()
+{
+	if (es7000_plat) {
+		int mip_status;
+		struct mip_reg es7000_mip_reg;
+
+		printk("ES7000: Enabling APIC mode.\n");
+        	memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
+        	es7000_mip_reg.off_0 = MIP_SW_APIC;
+        	es7000_mip_reg.off_38 = (MIP_VALID);
+        	while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
+              		printk("es7000_sw_apic: command failed, status = %x\n",
+				mip_status);
+		return;
+	}
+}
diff --git a/arch/i386/mach-generic/Makefile b/arch/i386/mach-generic/Makefile
new file mode 100644
index 00000000000..77fbc9f64fb
--- /dev/null
+++ b/arch/i386/mach-generic/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the generic architecture
+#
+
+EXTRA_CFLAGS	+= -I../kernel
+
+obj-y				:= probe.o summit.o bigsmp.o es7000.o default.o ../mach-es7000/
diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c
new file mode 100644
index 00000000000..25883b44f62
--- /dev/null
+++ b/arch/i386/mach-generic/bigsmp.c
@@ -0,0 +1,54 @@
+/* 
+ * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs.
+ * Drives the local APIC in "clustered mode".
+ */
+#define APIC_DEFINITION 1
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <asm/mach-bigsmp/mach_apic.h>
+#include <asm/mach-bigsmp/mach_apicdef.h>
+#include <asm/mach-bigsmp/mach_ipi.h>
+#include <asm/mach-default/mach_mpparse.h>
+
+static int dmi_bigsmp; /* can be set by dmi scanners */
+
+static __init int hp_ht_bigsmp(struct dmi_system_id *d)
+{
+#ifdef CONFIG_X86_GENERICARCH
+	printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
+	dmi_bigsmp = 1;
+#endif
+	return 0;
+}
+
+
+static struct dmi_system_id __initdata bigsmp_dmi_table[] = {
+	{ hp_ht_bigsmp, "HP ProLiant DL760 G2", {
+		DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+		DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
+	}},
+
+	{ hp_ht_bigsmp, "HP ProLiant DL740", {
+		DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+		DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
+	 }},
+	 { }
+};
+
+
+static __init int probe_bigsmp(void)
+{ 
+	dmi_check_system(bigsmp_dmi_table);
+	return dmi_bigsmp; 
+} 
+
+struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp); 
diff --git a/arch/i386/mach-generic/default.c b/arch/i386/mach-generic/default.c
new file mode 100644
index 00000000000..7da14e9a79c
--- /dev/null
+++ b/arch/i386/mach-generic/default.c
@@ -0,0 +1,27 @@
+/* 
+ * Default generic APIC driver. This handles upto 8 CPUs.
+ */
+#define APIC_DEFINITION 1
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+#include <asm/mach-default/mach_apicdef.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <asm/mach-default/mach_apic.h>
+#include <asm/mach-default/mach_ipi.h>
+#include <asm/mach-default/mach_mpparse.h>
+
+/* should be called last. */
+static __init int probe_default(void)
+{ 
+	return 1;
+} 
+
+struct genapic apic_default = APIC_INIT("default", probe_default); 
diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c
new file mode 100644
index 00000000000..48d3ec37241
--- /dev/null
+++ b/arch/i386/mach-generic/es7000.c
@@ -0,0 +1,28 @@
+/*
+ * APIC driver for the Unisys ES7000 chipset.
+ */
+#define APIC_DEFINITION 1
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <asm/mach-es7000/mach_apicdef.h>
+#include <asm/mach-es7000/mach_apic.h>
+#include <asm/mach-es7000/mach_ipi.h>
+#include <asm/mach-es7000/mach_mpparse.h>
+#include <asm/mach-es7000/mach_wakecpu.h>
+
+static __init int probe_es7000(void)
+{
+	/* probed later in mptable/ACPI hooks */
+	return 0;
+}
+
+struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c
new file mode 100644
index 00000000000..5497c65a879
--- /dev/null
+++ b/arch/i386/mach-generic/probe.c
@@ -0,0 +1,102 @@
+/* Copyright 2003 Andi Kleen, SuSE Labs. 
+ * Subject to the GNU Public License, v.2 
+ * 
+ * Generic x86 APIC driver probe layer.
+ */  
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/genapic.h>
+
+extern struct genapic apic_summit;
+extern struct genapic apic_bigsmp;
+extern struct genapic apic_es7000;
+extern struct genapic apic_default;
+
+struct genapic *genapic = &apic_default;
+
+struct genapic *apic_probe[] __initdata = { 
+	&apic_summit,
+	&apic_bigsmp, 
+	&apic_es7000,
+	&apic_default,	/* must be last */
+	NULL,
+};
+
+void __init generic_apic_probe(char *command_line) 
+{ 
+	char *s;
+	int i;
+	int changed = 0;
+
+	s = strstr(command_line, "apic=");
+	if (s && (s == command_line || isspace(s[-1]))) { 
+		char *p = strchr(s, ' '), old; 
+		if (!p)
+			p = strchr(s, '\0'); 
+		old = *p; 
+		*p = 0; 
+		for (i = 0; !changed && apic_probe[i]; i++) {
+			if (!strcmp(apic_probe[i]->name, s+5)) { 
+				changed = 1;
+				genapic = apic_probe[i];
+			}
+		}
+		if (!changed)
+			printk(KERN_ERR "Unknown genapic `%s' specified.\n", s);
+		*p = old;
+	} 
+	for (i = 0; !changed && apic_probe[i]; i++) { 
+		if (apic_probe[i]->probe()) {
+			changed = 1;
+			genapic = apic_probe[i]; 
+		} 
+	}
+	/* Not visible without early console */ 
+	if (!changed) 
+		panic("Didn't find an APIC driver"); 
+
+	printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
+} 
+
+/* These functions can switch the APIC even after the initial ->probe() */
+
+int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid)
+{ 
+	int i;
+	for (i = 0; apic_probe[i]; ++i) { 
+		if (apic_probe[i]->mps_oem_check(mpc,oem,productid)) { 
+			genapic = apic_probe[i];
+			printk(KERN_INFO "Switched to APIC driver `%s'.\n", 
+			       genapic->name);
+			return 1;
+		} 
+	} 
+	return 0;
+} 
+
+int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	int i;
+	for (i = 0; apic_probe[i]; ++i) { 
+		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 
+			genapic = apic_probe[i];
+			printk(KERN_INFO "Switched to APIC driver `%s'.\n", 
+			       genapic->name);
+			return 1;
+		} 
+	} 
+	return 0;	
+}
+
+int hard_smp_processor_id(void)
+{
+	return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
diff --git a/arch/i386/mach-generic/summit.c b/arch/i386/mach-generic/summit.c
new file mode 100644
index 00000000000..65ddf74d7f2
--- /dev/null
+++ b/arch/i386/mach-generic/summit.c
@@ -0,0 +1,27 @@
+/* 
+ * APIC driver for the IBM "Summit" chipset.
+ */
+#define APIC_DEFINITION 1
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <asm/mach-summit/mach_apic.h>
+#include <asm/mach-summit/mach_apicdef.h>
+#include <asm/mach-summit/mach_ipi.h>
+#include <asm/mach-summit/mach_mpparse.h>
+
+static __init int probe_summit(void)
+{ 
+	/* probed later in mptable/ACPI hooks */
+	return 0;
+} 
+
+struct genapic apic_summit = APIC_INIT("summit", probe_summit); 
diff --git a/arch/i386/mach-visws/Makefile b/arch/i386/mach-visws/Makefile
new file mode 100644
index 00000000000..835fd96ad76
--- /dev/null
+++ b/arch/i386/mach-visws/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y				:= setup.o traps.o reboot.o
+
+obj-$(CONFIG_X86_VISWS_APIC)	+= visws_apic.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= mpparse.o
diff --git a/arch/i386/mach-visws/mpparse.c b/arch/i386/mach-visws/mpparse.c
new file mode 100644
index 00000000000..5a22082147f
--- /dev/null
+++ b/arch/i386/mach-visws/mpparse.c
@@ -0,0 +1,105 @@
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/smp.h>
+#include <asm/io.h>
+
+#include "cobalt.h"
+#include "mach_apic.h"
+
+/* Have we found an MP table */
+int smp_found_config;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_logical_apicid = -1U;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+unsigned int __initdata maxcpus = NR_CPUS;
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesn't have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+
+static void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ 	int ver, logical_apicid;
+	physid_mask_t apic_cpus;
+ 	
+	if (!(m->mpc_cpuflag & CPU_ENABLED))
+		return;
+
+	logical_apicid = m->mpc_apicid;
+	printk(KERN_INFO "%sCPU #%d %ld:%ld APIC version %d\n",
+		m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
+		m->mpc_apicid,
+		(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+		(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+		m->mpc_apicver);
+
+	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+		boot_cpu_physical_apicid = m->mpc_apicid;
+		boot_cpu_logical_apicid = logical_apicid;
+	}
+
+	ver = m->mpc_apicver;
+	if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
+		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
+			m->mpc_apicid, MAX_APICS);
+		return;
+	}
+
+	apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
+	physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
+	/*
+	 * Validate version
+	 */
+	if (ver == 0x0) {
+		printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
+			"fixing up to 0x10. (tell your hw vendor)\n",
+			m->mpc_apicid);
+		ver = 0x10;
+	}
+	apic_version[m->mpc_apicid] = ver;
+}
+
+void __init find_smp_config(void)
+{
+	struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
+	unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
+
+	if (ncpus > CO_CPU_MAX) {
+		printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
+			ncpus, mp);
+
+		ncpus = CO_CPU_MAX;
+	}
+
+	if (ncpus > maxcpus)
+		ncpus = maxcpus;
+
+	smp_found_config = 1;
+	while (ncpus--)
+		MP_processor_info(mp++);
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+}
+
+void __init get_smp_config (void)
+{
+}
diff --git a/arch/i386/mach-visws/reboot.c b/arch/i386/mach-visws/reboot.c
new file mode 100644
index 00000000000..3a81e904a7b
--- /dev/null
+++ b/arch/i386/mach-visws/reboot.c
@@ -0,0 +1,51 @@
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/platform.h>
+
+#include <asm/io.h>
+#include "piix4.h"
+
+void (*pm_power_off)(void);
+
+void machine_restart(char * __unused)
+{
+#ifdef CONFIG_SMP
+	smp_send_stop();
+#endif
+
+	/*
+	 * Visual Workstations restart after this
+	 * register is poked on the PIIX4
+	 */
+	outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
+}
+
+EXPORT_SYMBOL(machine_restart);
+
+void machine_power_off(void)
+{
+	unsigned short pm_status;
+	extern unsigned int pci_bus0;
+
+	while ((pm_status = inw(PMSTS_PORT)) & 0x100)
+		outw(pm_status, PMSTS_PORT);
+
+	outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
+
+	mdelay(10);
+
+#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
+	(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
+
+	outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8);
+	outl(PIIX_SPECIAL_STOP, 0xCFC);
+}
+
+EXPORT_SYMBOL(machine_power_off);
+
+void machine_halt(void)
+{
+}
+
+EXPORT_SYMBOL(machine_halt);
diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c
new file mode 100644
index 00000000000..9f6d2d9b1be
--- /dev/null
+++ b/arch/i386/mach-visws/setup.c
@@ -0,0 +1,134 @@
+/*
+ *  Unmaintained SGI Visual Workstation support.
+ *  Split out from setup.c by davej@suse.de
+ */
+
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include <asm/fixmap.h>
+#include <asm/arch_hooks.h>
+#include <asm/io.h>
+#include "cobalt.h"
+#include "piix4.h"
+
+char visws_board_type = -1;
+char visws_board_rev = -1;
+
+void __init visws_get_board_type_and_rev(void)
+{
+	int raw;
+
+	visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
+							 >> PIIX_GPI_BD_SHIFT;
+	/*
+	 * Get Board rev.
+	 * First, we have to initialize the 307 part to allow us access
+	 * to the GPIO registers.  Let's map them at 0x0fc0 which is right
+	 * after the PIIX4 PM section.
+	 */
+	outb_p(SIO_DEV_SEL, SIO_INDEX);
+	outb_p(SIO_GP_DEV, SIO_DATA);	/* Talk to GPIO regs. */
+
+	outb_p(SIO_DEV_MSB, SIO_INDEX);
+	outb_p(SIO_GP_MSB, SIO_DATA);	/* MSB of GPIO base address */
+
+	outb_p(SIO_DEV_LSB, SIO_INDEX);
+	outb_p(SIO_GP_LSB, SIO_DATA);	/* LSB of GPIO base address */
+
+	outb_p(SIO_DEV_ENB, SIO_INDEX);
+	outb_p(1, SIO_DATA);		/* Enable GPIO registers. */
+
+	/*
+	 * Now, we have to map the power management section to write
+	 * a bit which enables access to the GPIO registers.
+	 * What lunatic came up with this shit?
+	 */
+	outb_p(SIO_DEV_SEL, SIO_INDEX);
+	outb_p(SIO_PM_DEV, SIO_DATA);	/* Talk to GPIO regs. */
+
+	outb_p(SIO_DEV_MSB, SIO_INDEX);
+	outb_p(SIO_PM_MSB, SIO_DATA);	/* MSB of PM base address */
+
+	outb_p(SIO_DEV_LSB, SIO_INDEX);
+	outb_p(SIO_PM_LSB, SIO_DATA);	/* LSB of PM base address */
+
+	outb_p(SIO_DEV_ENB, SIO_INDEX);
+	outb_p(1, SIO_DATA);		/* Enable PM registers. */
+
+	/*
+	 * Now, write the PM register which enables the GPIO registers.
+	 */
+	outb_p(SIO_PM_FER2, SIO_PM_INDEX);
+	outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
+
+	/*
+	 * Now, initialize the GPIO registers.
+	 * We want them all to be inputs which is the
+	 * power on default, so let's leave them alone.
+	 * So, let's just read the board rev!
+	 */
+	raw = inb_p(SIO_GP_DATA1);
+	raw &= 0x7f;	/* 7 bits of valid board revision ID. */
+
+	if (visws_board_type == VISWS_320) {
+		if (raw < 0x6) {
+			visws_board_rev = 4;
+		} else if (raw < 0xc) {
+			visws_board_rev = 5;
+		} else {
+			visws_board_rev = 6;
+		}
+	} else if (visws_board_type == VISWS_540) {
+			visws_board_rev = 2;
+		} else {
+			visws_board_rev = raw;
+		}
+
+	printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
+	       (visws_board_type == VISWS_320 ? "320" :
+	       (visws_board_type == VISWS_540 ? "540" :
+		"unknown")), visws_board_rev);
+}
+
+void __init pre_intr_init_hook(void)
+{
+	init_VISWS_APIC_irqs();
+}
+
+void __init intr_init_hook(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	apic_intr_init();
+#endif
+}
+
+void __init pre_setup_arch_hook()
+{
+	visws_get_board_type_and_rev();
+}
+
+static struct irqaction irq0 = {
+	.handler =	timer_interrupt,
+	.flags =	SA_INTERRUPT,
+	.name =		"timer",
+};
+
+void __init time_init_hook(void)
+{
+	printk(KERN_INFO "Starting Cobalt Timer system clock\n");
+
+	/* Set the countdown value */
+	co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
+
+	/* Start the timer */
+	co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
+
+	/* Enable (unmask) the timer interrupt */
+	co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
+
+	/* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
+	setup_irq(0, &irq0);
+}
diff --git a/arch/i386/mach-visws/traps.c b/arch/i386/mach-visws/traps.c
new file mode 100644
index 00000000000..96435399203
--- /dev/null
+++ b/arch/i386/mach-visws/traps.c
@@ -0,0 +1,69 @@
+/* VISWS traps */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#include <asm/io.h>
+#include <asm/arch_hooks.h>
+#include <asm/apic.h>
+#include "cobalt.h"
+#include "lithium.h"
+
+
+#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
+#define BCD (LI_INTB | LI_INTC | LI_INTD)
+#define ALLDEVS (A01234 | BCD)
+
+static __init void lithium_init(void)
+{
+	set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
+	set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
+
+	if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
+	    (li_pcia_read16(PCI_DEVICE_ID) != PCI_VENDOR_ID_SGI_LITHIUM)) {
+		printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
+		panic("This machine is not SGI Visual Workstation 320/540");
+	}
+
+	if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
+	    (li_pcib_read16(PCI_DEVICE_ID) != PCI_VENDOR_ID_SGI_LITHIUM)) {
+		printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
+		panic("This machine is not SGI Visual Workstation 320/540");
+	}
+
+	li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
+	li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
+}
+
+static __init void cobalt_init(void)
+{
+	/*
+	 * On normal SMP PC this is used only with SMP, but we have to
+	 * use it and set it up here to start the Cobalt clock
+	 */
+	set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
+	setup_local_APIC();
+	printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n",
+		apic_read(APIC_LVR), apic_read(APIC_ID));
+
+	set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
+	set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
+	printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
+		co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
+
+	/* Enable Cobalt APIC being careful to NOT change the ID! */
+	co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
+
+	printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
+		co_apic_read(CO_APIC_ID));
+}
+
+void __init trap_init_hook(void)
+{
+	lithium_init();
+	cobalt_init();
+}
diff --git a/arch/i386/mach-visws/visws_apic.c b/arch/i386/mach-visws/visws_apic.c
new file mode 100644
index 00000000000..04e6585849a
--- /dev/null
+++ b/arch/i386/mach-visws/visws_apic.c
@@ -0,0 +1,303 @@
+/*
+ *	linux/arch/i386/mach_visws/visws_apic.c
+ *
+ *	Copyright (C) 1999 Bent Hagemark, Ingo Molnar
+ *
+ *  SGI Visual Workstation interrupt controller
+ *
+ *  The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
+ *  which serves as the main interrupt controller in the system.  Non-legacy
+ *  hardware in the system uses this controller directly.  Legacy devices
+ *  are connected to the PIIX4 which in turn has its 8259(s) connected to
+ *  a of the Cobalt APIC entry.
+ *
+ *  09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
+ *
+ *  25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
+ */
+
+#include <linux/config.h>
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+
+#include "cobalt.h"
+#include "irq_vectors.h"
+
+
+static DEFINE_SPINLOCK(cobalt_lock);
+
+/*
+ * Set the given Cobalt APIC Redirection Table entry to point
+ * to the given IDT vector/index.
+ */
+static inline void co_apic_set(int entry, int irq)
+{
+	co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
+	co_apic_write(CO_APIC_HI(entry), 0);
+}
+
+/*
+ * Cobalt (IO)-APIC functions to handle PCI devices.
+ */
+static inline int co_apic_ide0_hack(void)
+{
+	extern char visws_board_type;
+	extern char visws_board_rev;
+
+	if (visws_board_type == VISWS_320 && visws_board_rev == 5)
+		return 5;
+	return CO_APIC_IDE0;
+}
+
+static int is_co_apic(unsigned int irq)
+{
+	if (IS_CO_APIC(irq))
+		return CO_APIC(irq);
+
+	switch (irq) {
+		case 0: return CO_APIC_CPU;
+		case CO_IRQ_IDE0: return co_apic_ide0_hack();
+		case CO_IRQ_IDE1: return CO_APIC_IDE1;
+		default: return -1;
+	}
+}
+
+
+/*
+ * This is the SGI Cobalt (IO-)APIC:
+ */
+
+static void enable_cobalt_irq(unsigned int irq)
+{
+	co_apic_set(is_co_apic(irq), irq);
+}
+
+static void disable_cobalt_irq(unsigned int irq)
+{
+	int entry = is_co_apic(irq);
+
+	co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
+	co_apic_read(CO_APIC_LO(entry));
+}
+
+/*
+ * "irq" really just serves to identify the device.  Here is where we
+ * map this to the Cobalt APIC entry where it's physically wired.
+ * This is called via request_irq -> setup_irq -> irq_desc->startup()
+ */
+static unsigned int startup_cobalt_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cobalt_lock, flags);
+	if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
+		irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
+	enable_cobalt_irq(irq);
+	spin_unlock_irqrestore(&cobalt_lock, flags);
+	return 0;
+}
+
+static void ack_cobalt_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cobalt_lock, flags);
+	disable_cobalt_irq(irq);
+	apic_write(APIC_EOI, APIC_EIO_ACK);
+	spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static void end_cobalt_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cobalt_lock, flags);
+	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+		enable_cobalt_irq(irq);
+	spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static struct hw_interrupt_type cobalt_irq_type = {
+	.typename =	"Cobalt-APIC",
+	.startup =	startup_cobalt_irq,
+	.shutdown =	disable_cobalt_irq,
+	.enable =	enable_cobalt_irq,
+	.disable =	disable_cobalt_irq,
+	.ack =		ack_cobalt_irq,
+	.end =		end_cobalt_irq,
+};
+
+
+/*
+ * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
+ * -- not the manner expected by the code in i8259.c.
+ *
+ * there is a 'master' physical interrupt source that gets sent to
+ * the CPU. But in the chipset there are various 'virtual' interrupts
+ * waiting to be handled. We represent this to Linux through a 'master'
+ * interrupt controller type, and through a special virtual interrupt-
+ * controller. Device drivers only see the virtual interrupt sources.
+ */
+static unsigned int startup_piix4_master_irq(unsigned int irq)
+{
+	init_8259A(0);
+
+	return startup_cobalt_irq(irq);
+}
+
+static void end_piix4_master_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cobalt_lock, flags);
+	enable_cobalt_irq(irq);
+	spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static struct hw_interrupt_type piix4_master_irq_type = {
+	.typename =	"PIIX4-master",
+	.startup =	startup_piix4_master_irq,
+	.ack =		ack_cobalt_irq,
+	.end =		end_piix4_master_irq,
+};
+
+
+static struct hw_interrupt_type piix4_virtual_irq_type = {
+	.typename =	"PIIX4-virtual",
+	.startup =	startup_8259A_irq,
+	.shutdown =	disable_8259A_irq,
+	.enable =	enable_8259A_irq,
+	.disable =	disable_8259A_irq,
+};
+
+
+/*
+ * PIIX4-8259 master/virtual functions to handle interrupt requests
+ * from legacy devices: floppy, parallel, serial, rtc.
+ *
+ * None of these get Cobalt APIC entries, neither do they have IDT
+ * entries. These interrupts are purely virtual and distributed from
+ * the 'master' interrupt source: CO_IRQ_8259.
+ *
+ * When the 8259 interrupts its handler figures out which of these
+ * devices is interrupting and dispatches to its handler.
+ *
+ * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
+ * enable_irq gets the right irq. This 'master' irq is never directly
+ * manipulated by any driver.
+ */
+static irqreturn_t piix4_master_intr(int irq, void *dev_id, struct pt_regs * regs)
+{
+	int realirq;
+	irq_desc_t *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	/* Find out what's interrupting in the PIIX4 master 8259 */
+	outb(0x0c, 0x20);		/* OCW3 Poll command */
+	realirq = inb(0x20);
+
+	/*
+	 * Bit 7 == 0 means invalid/spurious
+	 */
+	if (unlikely(!(realirq & 0x80)))
+		goto out_unlock;
+
+	realirq &= 7;
+
+	if (unlikely(realirq == 2)) {
+		outb(0x0c, 0xa0);
+		realirq = inb(0xa0);
+
+		if (unlikely(!(realirq & 0x80)))
+			goto out_unlock;
+
+		realirq = (realirq & 7) + 8;
+	}
+
+	/* mask and ack interrupt */
+	cached_irq_mask |= 1 << realirq;
+	if (unlikely(realirq > 7)) {
+		inb(0xa1);
+		outb(cached_slave_mask, 0xa1);
+		outb(0x60 + (realirq & 7), 0xa0);
+		outb(0x60 + 2, 0x20);
+	} else {
+		inb(0x21);
+		outb(cached_master_mask, 0x21);
+		outb(0x60 + realirq, 0x20);
+	}
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	desc = irq_desc + realirq;
+
+	/*
+	 * handle this 'virtual interrupt' as a Cobalt one now.
+	 */
+	kstat_cpu(smp_processor_id()).irqs[realirq]++;
+
+	if (likely(desc->action != NULL))
+		handle_IRQ_event(realirq, regs, desc->action);
+
+	if (!(desc->status & IRQ_DISABLED))
+		enable_8259A_irq(realirq);
+
+	return IRQ_HANDLED;
+
+out_unlock:
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+	return IRQ_NONE;
+}
+
+static struct irqaction master_action = {
+	.handler =	piix4_master_intr,
+	.name =		"PIIX4-8259",
+};
+
+static struct irqaction cascade_action = {
+	.handler = 	no_action,
+	.name =		"cascade",
+};
+
+
+void init_VISWS_APIC_irqs(void)
+{
+	int i;
+
+	for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
+		irq_desc[i].status = IRQ_DISABLED;
+		irq_desc[i].action = 0;
+		irq_desc[i].depth = 1;
+
+		if (i == 0) {
+			irq_desc[i].handler = &cobalt_irq_type;
+		}
+		else if (i == CO_IRQ_IDE0) {
+			irq_desc[i].handler = &cobalt_irq_type;
+		}
+		else if (i == CO_IRQ_IDE1) {
+			irq_desc[i].handler = &cobalt_irq_type;
+		}
+		else if (i == CO_IRQ_8259) {
+			irq_desc[i].handler = &piix4_master_irq_type;
+		}
+		else if (i < CO_IRQ_APIC0) {
+			irq_desc[i].handler = &piix4_virtual_irq_type;
+		}
+		else if (IS_CO_APIC(i)) {
+			irq_desc[i].handler = &cobalt_irq_type;
+		}
+	}
+
+	setup_irq(CO_IRQ_8259, &master_action);
+	setup_irq(2, &cascade_action);
+}
diff --git a/arch/i386/mach-voyager/Makefile b/arch/i386/mach-voyager/Makefile
new file mode 100644
index 00000000000..f24d2965131
--- /dev/null
+++ b/arch/i386/mach-voyager/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux kernel.
+#
+
+EXTRA_CFLAGS	+= -I../kernel
+obj-y			:= setup.o voyager_basic.o voyager_thread.o
+
+obj-$(CONFIG_SMP)	+= voyager_smp.o voyager_cat.o
diff --git a/arch/i386/mach-voyager/setup.c b/arch/i386/mach-voyager/setup.c
new file mode 100644
index 00000000000..df123fc487b
--- /dev/null
+++ b/arch/i386/mach-voyager/setup.c
@@ -0,0 +1,48 @@
+/*
+ *	Machine specific setup for generic
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/arch_hooks.h>
+
+void __init pre_intr_init_hook(void)
+{
+	init_ISA_irqs();
+}
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
+
+void __init intr_init_hook(void)
+{
+#ifdef CONFIG_SMP
+	smp_intr_init();
+#endif
+
+	if (!acpi_ioapic)
+		setup_irq(2, &irq2);
+}
+
+void __init pre_setup_arch_hook(void)
+{
+	/* Voyagers run their CPUs from independent clocks, so disable
+	 * the TSC code because we can't sync them */
+	tsc_disable = 1;
+}
+
+void __init trap_init_hook(void)
+{
+}
+
+static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};
+
+void __init time_init_hook(void)
+{
+	setup_irq(0, &irq0);
+}
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
new file mode 100644
index 00000000000..602aea240e9
--- /dev/null
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -0,0 +1,325 @@
+/* Copyright (C) 1999,2001 
+ *
+ * Author: J.E.J.Bottomley@HansenPartnership.com
+ *
+ * linux/arch/i386/kernel/voyager.c
+ *
+ * This file contains all the voyager specific routines for getting
+ * initialisation of the architecture to function.  For additional
+ * features see:
+ *
+ *	voyager_cat.c - Voyager CAT bus interface
+ *	voyager_smp.c - Voyager SMP hal (emulates linux smp.c)
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/sysrq.h>
+#include <asm/io.h>
+#include <asm/voyager.h>
+#include <asm/vic.h>
+#include <linux/pm.h>
+#include <linux/irq.h>
+#include <asm/tlbflush.h>
+#include <asm/arch_hooks.h>
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+
+int voyager_level = 0;
+
+struct voyager_SUS *voyager_SUS = NULL;
+
+#ifdef CONFIG_SMP
+static void
+voyager_dump(int dummy1, struct pt_regs *dummy2, struct tty_struct *dummy3)
+{
+	/* get here via a sysrq */
+	voyager_smp_dump();
+}
+
+static struct sysrq_key_op sysrq_voyager_dump_op = {
+	.handler	= voyager_dump,
+	.help_msg	= "Voyager",
+	.action_msg	= "Dump Voyager Status",
+};
+#endif
+
+void
+voyager_detect(struct voyager_bios_info *bios)
+{
+	if(bios->len != 0xff) {
+		int class = (bios->class_1 << 8) 
+			| (bios->class_2 & 0xff);
+
+		printk("Voyager System detected.\n"
+		       "        Class %x, Revision %d.%d\n",
+		       class, bios->major, bios->minor);
+		if(class == VOYAGER_LEVEL4) 
+			voyager_level = 4;
+		else if(class < VOYAGER_LEVEL5_AND_ABOVE)
+			voyager_level = 3;
+		else
+			voyager_level = 5;
+		printk("        Architecture Level %d\n", voyager_level);
+		if(voyager_level < 4)
+			printk("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
+		/* install the power off handler */
+		pm_power_off = voyager_power_off;
+#ifdef CONFIG_SMP
+		register_sysrq_key('v', &sysrq_voyager_dump_op);
+#endif
+	} else {
+		printk("\n\n**WARNING**: No Voyager Subsystem Found\n");
+	}
+}
+
+void
+voyager_system_interrupt(int cpl, void *dev_id, struct pt_regs *regs)
+{
+	printk("Voyager: detected system interrupt\n");
+}
+
+/* Routine to read information from the extended CMOS area */
+__u8
+voyager_extended_cmos_read(__u16 addr)
+{
+	outb(addr & 0xff, 0x74);
+	outb((addr >> 8) & 0xff, 0x75);
+	return inb(0x76);
+}
+
+/* internal definitions for the SUS Click Map of memory */
+
+#define CLICK_ENTRIES	16
+#define CLICK_SIZE	4096	/* click to byte conversion for Length */
+
+typedef struct ClickMap {
+	struct Entry {
+		__u32	Address;
+		__u32	Length;
+	} Entry[CLICK_ENTRIES];
+} ClickMap_t;
+
+
+/* This routine is pretty much an awful hack to read the bios clickmap by
+ * mapping it into page 0.  There are usually three regions in the map:
+ * 	Base Memory
+ * 	Extended Memory
+ *	zero length marker for end of map
+ *
+ * Returns are 0 for failure and 1 for success on extracting region.
+ */
+int __init
+voyager_memory_detect(int region, __u32 *start, __u32 *length)
+{
+	int i;
+	int retval = 0;
+	__u8 cmos[4];
+	ClickMap_t *map;
+	unsigned long map_addr;
+	unsigned long old;
+
+	if(region >= CLICK_ENTRIES) {
+		printk("Voyager: Illegal ClickMap region %d\n", region);
+		return 0;
+	}
+
+	for(i = 0; i < sizeof(cmos); i++)
+		cmos[i] = voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
+
+	map_addr = *(unsigned long *)cmos;
+
+	/* steal page 0 for this */
+	old = pg0[0];
+	pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
+	local_flush_tlb();
+	/* now clear everything out but page 0 */
+	map = (ClickMap_t *)(map_addr & (~PAGE_MASK));
+
+	/* zero length is the end of the clickmap */
+	if(map->Entry[region].Length != 0) {
+		*length = map->Entry[region].Length * CLICK_SIZE;
+		*start = map->Entry[region].Address;
+		retval = 1;
+	}
+
+	/* replace the mapping */
+	pg0[0] = old;
+	local_flush_tlb();
+	return retval;
+}
+
+/* voyager specific handling code for timer interrupts.  Used to hand
+ * off the timer tick to the SMP code, since the VIC doesn't have an
+ * internal timer (The QIC does, but that's another story). */
+void
+voyager_timer_interrupt(struct pt_regs *regs)
+{
+	if((jiffies & 0x3ff) == 0) {
+
+		/* There seems to be something flaky in either
+		 * hardware or software that is resetting the timer 0
+		 * count to something much higher than it should be
+		 * This seems to occur in the boot sequence, just
+		 * before root is mounted.  Therefore, every 10
+		 * seconds or so, we sanity check the timer zero count
+		 * and kick it back to where it should be.
+		 *
+		 * FIXME: This is the most awful hack yet seen.  I
+		 * should work out exactly what is interfering with
+		 * the timer count settings early in the boot sequence
+		 * and swiftly introduce it to something sharp and
+		 * pointy.  */
+		__u16 val;
+		extern spinlock_t i8253_lock;
+
+		spin_lock(&i8253_lock);
+		
+		outb_p(0x00, 0x43);
+		val = inb_p(0x40);
+		val |= inb(0x40) << 8;
+		spin_unlock(&i8253_lock);
+
+		if(val > LATCH) {
+			printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val);
+			spin_lock(&i8253_lock);
+			outb(0x34,0x43);
+			outb_p(LATCH & 0xff , 0x40);	/* LSB */
+			outb(LATCH >> 8 , 0x40);	/* MSB */
+			spin_unlock(&i8253_lock);
+		}
+	}
+#ifdef CONFIG_SMP
+	smp_vic_timer_interrupt(regs);
+#endif
+}
+
+void
+voyager_power_off(void)
+{
+	printk("VOYAGER Power Off\n");
+
+	if(voyager_level == 5) {
+		voyager_cat_power_off();
+	} else if(voyager_level == 4) {
+		/* This doesn't apparently work on most L4 machines,
+		 * but the specs say to do this to get automatic power
+		 * off.  Unfortunately, if it doesn't power off the
+		 * machine, it ends up doing a cold restart, which
+		 * isn't really intended, so comment out the code */
+#if 0
+		int port;
+
+	  
+		/* enable the voyager Configuration Space */
+		outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, 
+		     VOYAGER_MC_SETUP);
+		/* the port for the power off flag is an offset from the
+		   floating base */
+		port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21;
+		/* set the power off flag */
+		outb(inb(port) | 0x1, port);
+#endif
+	}
+	/* and wait for it to happen */
+	for(;;) {
+		__asm("cli");
+		__asm("hlt");
+	}
+}
+
+/* copied from process.c */
+static inline void
+kb_wait(void)
+{
+	int i;
+
+	for (i=0; i<0x10000; i++)
+		if ((inb_p(0x64) & 0x02) == 0)
+			break;
+}
+
+void
+machine_restart(char *cmd)
+{
+	printk("Voyager Warm Restart\n");
+	kb_wait();
+
+	if(voyager_level == 5) {
+		/* write magic values to the RTC to inform system that
+		 * shutdown is beginning */
+		outb(0x8f, 0x70);
+		outb(0x5 , 0x71);
+		
+		udelay(50);
+		outb(0xfe,0x64);         /* pull reset low */
+	} else if(voyager_level == 4) {
+		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+		__u8 basebd = inb(VOYAGER_MC_SETUP);
+		
+		outb(basebd | 0x08, VOYAGER_MC_SETUP);
+		outb(0x02, catbase + 0x21);
+	}
+	for(;;) {
+		asm("cli");
+		asm("hlt");
+	}
+}
+
+EXPORT_SYMBOL(machine_restart);
+
+void
+mca_nmi_hook(void)
+{
+	__u8 dumpval __attribute__((unused)) = inb(0xf823);
+	__u8 swnmi __attribute__((unused)) = inb(0xf813);
+
+	/* FIXME: assume dump switch pressed */
+	/* check to see if the dump switch was pressed */
+	VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi));
+	/* clear swnmi */
+	outb(0xff, 0xf813);
+	/* tell SUS to ignore dump */
+	if(voyager_level == 5 && voyager_SUS != NULL) {
+		if(voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
+			voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND;
+			voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS;
+			udelay(1000);
+			voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP;
+			voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS;
+		}
+	}
+	printk(KERN_ERR "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", smp_processor_id());
+	show_stack(NULL, NULL);
+	show_state();
+}
+
+
+
+void
+machine_halt(void)
+{
+	/* treat a halt like a power off */
+	machine_power_off();
+}
+
+EXPORT_SYMBOL(machine_halt);
+
+void machine_power_off(void)
+{
+	if (pm_power_off)
+		pm_power_off();
+}
+
+EXPORT_SYMBOL(machine_power_off);
diff --git a/arch/i386/mach-voyager/voyager_cat.c b/arch/i386/mach-voyager/voyager_cat.c
new file mode 100644
index 00000000000..23967fe658d
--- /dev/null
+++ b/arch/i386/mach-voyager/voyager_cat.c
@@ -0,0 +1,1178 @@
+/* -*- mode: c; c-basic-offset: 8 -*- */
+
+/* Copyright (C) 1999,2001
+ *
+ * Author: J.E.J.Bottomley@HansenPartnership.com
+ *
+ * linux/arch/i386/kernel/voyager_cat.c
+ *
+ * This file contains all the logic for manipulating the CAT bus
+ * in a level 5 machine.
+ *
+ * The CAT bus is a serial configuration and test bus.  Its primary
+ * uses are to probe the initial configuration of the system and to
+ * diagnose error conditions when a system interrupt occurs.  The low
+ * level interface is fairly primitive, so most of this file consists
+ * of bit shift manipulations to send and receive packets on the
+ * serial bus */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <asm/voyager.h>
+#include <asm/vic.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+
+#ifdef VOYAGER_CAT_DEBUG
+#define CDEBUG(x)	printk x
+#else
+#define CDEBUG(x)
+#endif
+
+/* the CAT command port */
+#define CAT_CMD		(sspb + 0xe)
+/* the CAT data port */
+#define CAT_DATA	(sspb + 0xd)
+
+/* the internal cat functions */
+static void cat_pack(__u8 *msg, __u16 start_bit, __u8 *data, 
+		     __u16 num_bits);
+static void cat_unpack(__u8 *msg, __u16 start_bit, __u8 *data,
+		       __u16 num_bits);
+static void cat_build_header(__u8 *header, const __u16 len, 
+			     const __u16 smallest_reg_bits,
+			     const __u16 longest_reg_bits);
+static int cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp,
+			__u8 reg, __u8 op);
+static int cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp,
+		       __u8 reg, __u8 *value);
+static int cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes,
+			__u8 pad_bits);
+static int cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
+		     __u8 value);
+static int cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
+		    __u8 *value);
+static int cat_subread(voyager_module_t *modp, voyager_asic_t *asicp,
+		       __u16 offset, __u16 len, void *buf);
+static int cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
+			__u8 reg, __u8 value);
+static int cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp);
+static int cat_connect(voyager_module_t *modp, voyager_asic_t *asicp);
+
+static inline const char *
+cat_module_name(int module_id)
+{
+	switch(module_id) {
+	case 0x10:
+		return "Processor Slot 0";
+	case 0x11:
+		return "Processor Slot 1";
+	case 0x12:
+		return "Processor Slot 2";
+	case 0x13:
+		return "Processor Slot 4";
+	case 0x14:
+		return "Memory Slot 0";
+	case 0x15:
+		return "Memory Slot 1";
+	case 0x18:
+		return "Primary Microchannel";
+	case 0x19:
+		return "Secondary Microchannel";
+	case 0x1a:
+		return "Power Supply Interface";
+	case 0x1c:
+		return "Processor Slot 5";
+	case 0x1d:
+		return "Processor Slot 6";
+	case 0x1e:
+		return "Processor Slot 7";
+	case 0x1f:
+		return "Processor Slot 8";
+	default:
+		return "Unknown Module";
+	}
+}
+
+static int sspb = 0;		/* stores the super port location */
+int voyager_8slot = 0;		/* set to true if a 51xx monster */
+
+voyager_module_t *voyager_cat_list;
+
+/* the I/O port assignments for the VIC and QIC */
+static struct resource vic_res = {
+	"Voyager Interrupt Controller", 0xFC00, 0xFC6F };
+static struct resource qic_res = {
+	"Quad Interrupt Controller", 0xFC70, 0xFCFF };
+
+/* This function is used to pack a data bit stream inside a message.
+ * It writes num_bits of the data buffer in msg starting at start_bit.
+ * Note: This function assumes that any unused bit in the data stream
+ * is set to zero so that the ors will work correctly */
+#define BITS_PER_BYTE 8
+static void
+cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
+{
+	/* compute initial shift needed */
+	const __u16 offset = start_bit % BITS_PER_BYTE;
+	__u16 len = num_bits / BITS_PER_BYTE;
+	__u16 byte = start_bit / BITS_PER_BYTE;
+	__u16 residue = (num_bits % BITS_PER_BYTE) + offset;
+	int i;
+
+	/* adjust if we have more than a byte of residue */
+	if(residue >= BITS_PER_BYTE) {
+		residue -= BITS_PER_BYTE;
+		len++;
+	}
+
+	/* clear out the bits.  We assume here that if len==0 then
+	 * residue >= offset.  This is always true for the catbus
+	 * operations */
+	msg[byte] &= 0xff << (BITS_PER_BYTE - offset); 
+	msg[byte++] |= data[0] >> offset;
+	if(len == 0)
+		return;
+	for(i = 1; i < len; i++)
+		msg[byte++] = (data[i-1] << (BITS_PER_BYTE - offset))
+			| (data[i] >> offset);
+	if(residue != 0) {
+		__u8 mask = 0xff >> residue;
+		__u8 last_byte = data[i-1] << (BITS_PER_BYTE - offset)
+			| (data[i] >> offset);
+		
+		last_byte &= ~mask;
+		msg[byte] &= mask;
+		msg[byte] |= last_byte;
+	}
+	return;
+}
+/* unpack the data again (same arguments as cat_pack()). data buffer
+ * must be zero populated.
+ *
+ * Function: given a message string move to start_bit and copy num_bits into
+ * data (starting at bit 0 in data).
+ */
+static void
+cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
+{
+	/* compute initial shift needed */
+	const __u16 offset = start_bit % BITS_PER_BYTE;
+	__u16 len = num_bits / BITS_PER_BYTE;
+	const __u8 last_bits = num_bits % BITS_PER_BYTE;
+	__u16 byte = start_bit / BITS_PER_BYTE;
+	int i;
+
+	if(last_bits != 0)
+		len++;
+
+	/* special case: want < 8 bits from msg and we can get it from
+	 * a single byte of the msg */
+	if(len == 0 && BITS_PER_BYTE - offset >= num_bits) {
+		data[0] = msg[byte] << offset;
+		data[0] &= 0xff >> (BITS_PER_BYTE - num_bits);
+		return;
+	}
+	for(i = 0; i < len; i++) {
+		/* this annoying if has to be done just in case a read of
+		 * msg one beyond the array causes a panic */
+		if(offset != 0) {
+			data[i] = msg[byte++] << offset;
+			data[i] |= msg[byte] >> (BITS_PER_BYTE - offset);
+		}
+		else {
+			data[i] = msg[byte++];
+		}
+	}
+	/* do we need to truncate the final byte */
+	if(last_bits != 0) {
+		data[i-1] &= 0xff << (BITS_PER_BYTE - last_bits);
+	}
+	return;
+}
+
+static void
+cat_build_header(__u8 *header, const __u16 len, const __u16 smallest_reg_bits,
+		 const __u16 longest_reg_bits)
+{
+	int i;
+	__u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE;
+	__u8 *last_byte = &header[len - 1];
+
+	if(start_bit == 0)
+		start_bit = 1;	/* must have at least one bit in the hdr */
+	
+	for(i=0; i < len; i++)
+		header[i] = 0;
+
+	for(i = start_bit; i > 0; i--)
+		*last_byte = ((*last_byte) << 1) + 1;
+
+}
+
+static int
+cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
+{
+	__u8 parity, inst, inst_buf[4] = { 0 };
+	__u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE];
+	__u16 ibytes, hbytes, padbits;
+	int i;
+	
+	/* 
+	 * Parity is the parity of the register number + 1 (READ_REGISTER
+	 * and WRITE_REGISTER always add '1' to the number of bits == 1)
+	 */
+	parity = (__u8)(1 + (reg & 0x01) +
+	         ((__u8)(reg & 0x02) >> 1) +
+	         ((__u8)(reg & 0x04) >> 2) +
+	         ((__u8)(reg & 0x08) >> 3)) % 2;
+
+	inst = ((parity << 7) | (reg << 2) | op);
+
+	outb(VOYAGER_CAT_IRCYC, CAT_CMD);
+	if(!modp->scan_path_connected) {
+		if(asicp->asic_id != VOYAGER_CAT_ID) {
+			printk("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
+			return 1;
+		}
+		outb(VOYAGER_CAT_HEADER, CAT_DATA);
+		outb(inst, CAT_DATA);
+		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+			CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n"));
+			return 1;
+		}
+		return 0;
+	}
+	ibytes = modp->inst_bits / BITS_PER_BYTE;
+	if((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
+		padbits = BITS_PER_BYTE - padbits;
+		ibytes++;
+	}
+	hbytes = modp->largest_reg / BITS_PER_BYTE;
+	if(modp->largest_reg % BITS_PER_BYTE)
+		hbytes++;
+	CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes));
+	/* initialise the instruction sequence to 0xff */
+	for(i=0; i < ibytes + hbytes; i++)
+		iseq[i] = 0xff;
+	cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg);
+	cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE);
+	inst_buf[0] = inst;
+	inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE);
+	cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length);
+#ifdef VOYAGER_CAT_DEBUG
+	printk("ins = 0x%x, iseq: ", inst);
+	for(i=0; i< ibytes + hbytes; i++)
+		printk("0x%x ", iseq[i]);
+	printk("\n");
+#endif
+	if(cat_shiftout(iseq, ibytes, hbytes, padbits)) {
+		CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n"));
+		return 1;
+	}
+	CDEBUG(("CAT SHIFTOUT DONE\n"));
+	return 0;
+}
+
+static int
+cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, 
+	    __u8 *value)
+{
+	if(!modp->scan_path_connected) {
+		if(asicp->asic_id != VOYAGER_CAT_ID) {
+			CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n"));
+			return 1;
+		}
+		if(reg > VOYAGER_SUBADDRHI) 
+			outb(VOYAGER_CAT_RUN, CAT_CMD);
+		outb(VOYAGER_CAT_DRCYC, CAT_CMD);
+		outb(VOYAGER_CAT_HEADER, CAT_DATA);
+		*value = inb(CAT_DATA);
+		outb(0xAA, CAT_DATA);
+		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+			CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n"));
+			return 1;
+		}
+		return 0;
+	}
+	else {
+		__u16 sbits = modp->num_asics -1 + asicp->ireg_length;
+		__u16 sbytes = sbits / BITS_PER_BYTE;
+		__u16 tbytes;
+		__u8 string[VOYAGER_MAX_SCAN_PATH], trailer[VOYAGER_MAX_REG_SIZE];
+		__u8 padbits;
+		int i;
+		
+		outb(VOYAGER_CAT_DRCYC, CAT_CMD);
+
+		if((padbits = sbits % BITS_PER_BYTE) != 0) {
+			padbits = BITS_PER_BYTE - padbits;
+			sbytes++;
+		}
+		tbytes = asicp->ireg_length / BITS_PER_BYTE;
+		if(asicp->ireg_length % BITS_PER_BYTE)
+			tbytes++;
+		CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n",
+			tbytes,	sbytes, padbits));
+		cat_build_header(trailer, tbytes, 1, asicp->ireg_length);
+
+		
+		for(i = tbytes - 1; i >= 0; i--) {
+			outb(trailer[i], CAT_DATA);
+			string[sbytes + i] = inb(CAT_DATA);
+		}
+
+		for(i = sbytes - 1; i >= 0; i--) {
+			outb(0xaa, CAT_DATA);
+			string[i] = inb(CAT_DATA);
+		}
+		*value = 0;
+		cat_unpack(string, padbits + (tbytes * BITS_PER_BYTE) + asicp->asic_location, value, asicp->ireg_length);
+#ifdef VOYAGER_CAT_DEBUG
+		printk("value=0x%x, string: ", *value);
+		for(i=0; i< tbytes+sbytes; i++)
+			printk("0x%x ", string[i]);
+		printk("\n");
+#endif
+		
+		/* sanity check the rest of the return */
+		for(i=0; i < tbytes; i++) {
+			__u8 input = 0;
+
+			cat_unpack(string, padbits + (i * BITS_PER_BYTE), &input, BITS_PER_BYTE);
+			if(trailer[i] != input) {
+				CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i]));
+				return 1;
+			}
+		}
+		CDEBUG(("cat_getdata DONE\n"));
+		return 0;
+	}
+}
+
+static int
+cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
+{
+	int i;
+	
+	for(i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
+		outb(data[i], CAT_DATA);
+
+	for(i = header_bytes - 1; i >= 0; i--) {
+		__u8 header = 0;
+		__u8 input;
+
+		outb(data[i], CAT_DATA);
+		input = inb(CAT_DATA);
+		CDEBUG(("cat_shiftout: returned 0x%x\n", input));
+		cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits,
+			   &header, BITS_PER_BYTE);
+		if(input != header) {
+			CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header));
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp, 
+	     __u8 reg, __u8 value)
+{
+	outb(VOYAGER_CAT_DRCYC, CAT_CMD);
+	if(!modp->scan_path_connected) {
+		if(asicp->asic_id != VOYAGER_CAT_ID) {
+			CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n"));
+			return 1;
+		}
+		outb(VOYAGER_CAT_HEADER, CAT_DATA);
+		outb(value, CAT_DATA);
+		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+			CDEBUG(("cat_senddata: failed to get correct header response to sent data\n"));
+			return 1;
+		}
+		if(reg > VOYAGER_SUBADDRHI) {
+			outb(VOYAGER_CAT_RUN, CAT_CMD);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			outb(VOYAGER_CAT_RUN, CAT_CMD);
+		}
+		
+		return 0;
+	}
+	else {
+		__u16 hbytes = asicp->ireg_length / BITS_PER_BYTE;
+		__u16 dbytes = (modp->num_asics - 1 + asicp->ireg_length)/BITS_PER_BYTE;
+		__u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH], 
+			hseq[VOYAGER_MAX_REG_SIZE];
+		int i;
+
+		if((padbits = (modp->num_asics - 1 
+			       + asicp->ireg_length) % BITS_PER_BYTE) != 0) {
+			padbits = BITS_PER_BYTE - padbits;
+			dbytes++;
+		}
+		if(asicp->ireg_length % BITS_PER_BYTE)
+			hbytes++;
+		
+		cat_build_header(hseq, hbytes, 1, asicp->ireg_length);
+		
+		for(i = 0; i < dbytes + hbytes; i++)
+			dseq[i] = 0xff;
+		CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n",
+			dbytes, hbytes, padbits));
+		cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length,
+			 hseq, hbytes * BITS_PER_BYTE);
+		cat_pack(dseq, asicp->asic_location, &value, 
+			 asicp->ireg_length);
+#ifdef VOYAGER_CAT_DEBUG
+		printk("dseq ");
+		for(i=0; i<hbytes+dbytes; i++) {
+			printk("0x%x ", dseq[i]);
+		}
+		printk("\n");
+#endif
+		return cat_shiftout(dseq, dbytes, hbytes, padbits);
+	}
+}
+
+static int
+cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
+	 __u8 value)
+{
+	if(cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
+		return 1;
+	return cat_senddata(modp, asicp, reg, value);
+}
+
+static int
+cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
+	 __u8 *value)
+{
+	if(cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
+		return 1;
+	return cat_getdata(modp, asicp, reg, value);
+}
+
+static int
+cat_subaddrsetup(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+		 __u16 len)
+{
+	__u8 val;
+
+	if(len > 1) {
+		/* set auto increment */
+		__u8 newval;
+		
+		if(cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
+			CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n"));
+			return 1;
+		}
+		CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", val));
+		newval = val | VOYAGER_AUTO_INC;
+		if(newval != val) {
+			if(cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
+				CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n"));
+				return 1;
+			}
+		}
+	}
+	if(cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8)(offset &0xff))) {
+		CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n"));
+		return 1;
+	}
+	if(asicp->subaddr > VOYAGER_SUBADDR_LO) {
+		if(cat_write(modp, asicp, VOYAGER_SUBADDRHI, (__u8)(offset >> 8))) {
+			CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n"));
+			return 1;
+		}
+		cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val);
+		CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, val));
+	}
+	cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val);
+	CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val));
+	return 0;
+}
+		
+static int
+cat_subwrite(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+	    __u16 len, void *buf)
+{
+	int i, retval;
+
+	/* FIXME: need special actions for VOYAGER_CAT_ID here */
+	if(asicp->asic_id == VOYAGER_CAT_ID) {
+		CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n"));
+		/* FIXME -- This is supposed to be handled better
+		 * There is a problem writing to the cat asic in the
+		 * PSI.  The 30us delay seems to work, though */
+		udelay(30);
+	}
+		
+	if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+		printk("cat_subwrite: cat_subaddrsetup FAILED\n");
+		return retval;
+	}
+	
+	if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
+		printk("cat_subwrite: cat_sendinst FAILED\n");
+		return 1;
+	}
+	for(i = 0; i < len; i++) {
+		if(cat_senddata(modp, asicp, 0xFF, ((__u8 *)buf)[i])) {
+			printk("cat_subwrite: cat_sendata element at %d FAILED\n", i);
+			return 1;
+		}
+	}
+	return 0;
+}
+static int
+cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+	    __u16 len, void *buf)
+{
+	int i, retval;
+
+	if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+		CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n"));
+		return retval;
+	}
+
+	if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
+		CDEBUG(("cat_subread: cat_sendinst failed\n"));
+		return 1;
+	}
+	for(i = 0; i < len; i++) {
+		if(cat_getdata(modp, asicp, 0xFF,
+			       &((__u8 *)buf)[i])) {
+			CDEBUG(("cat_subread: cat_getdata element %d failed\n", i));
+			return 1;
+		}
+	}
+	return 0;
+}
+
+
+/* buffer for storing EPROM data read in during initialisation */
+static __initdata __u8 eprom_buf[0xFFFF];
+static voyager_module_t *voyager_initial_module;
+
+/* Initialise the cat bus components.  We assume this is called by the
+ * boot cpu *after* all memory initialisation has been done (so we can
+ * use kmalloc) but before smp initialisation, so we can probe the SMP
+ * configuration and pick up necessary information.  */
+void
+voyager_cat_init(void)
+{
+	voyager_module_t **modpp = &voyager_initial_module;
+	voyager_asic_t **asicpp;
+	voyager_asic_t *qabc_asic = NULL;
+	int i, j;
+	unsigned long qic_addr = 0;
+	__u8 qabc_data[0x20];
+	__u8 num_submodules, val;
+	voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *)&eprom_buf[0];
+	
+	__u8 cmos[4];
+	unsigned long addr;
+	
+	/* initiallise the SUS mailbox */
+	for(i=0; i<sizeof(cmos); i++)
+		cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i);
+	addr = *(unsigned long *)cmos;
+	if((addr & 0xff000000) != 0xff000000) {
+		printk(KERN_ERR "Voyager failed to get SUS mailbox (addr = 0x%lx\n", addr);
+	} else {
+		static struct resource res;
+		
+		res.name = "voyager SUS";
+		res.start = addr;
+		res.end = addr+0x3ff;
+		
+		request_resource(&iomem_resource, &res);
+		voyager_SUS = (struct voyager_SUS *)
+			ioremap(addr, 0x400);
+		printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n",
+		       voyager_SUS->SUS_version);
+		voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION;
+		voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT;
+	}
+
+	/* clear the processor counts */
+	voyager_extended_vic_processors = 0;
+	voyager_quad_processors = 0;
+
+
+
+	printk("VOYAGER: beginning CAT bus probe\n");
+	/* set up the SuperSet Port Block which tells us where the
+	 * CAT communication port is */
+	sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100;
+	VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb));
+
+	/* now find out if were 8 slot or normal */
+	if((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
+	   == EIGHT_SLOT_IDENTIFIER) {
+		voyager_8slot = 1;
+		printk(KERN_NOTICE "Voyager: Eight slot 51xx configuration detected\n");
+	}
+
+	for(i = VOYAGER_MIN_MODULE;
+	    i <= VOYAGER_MAX_MODULE; i++) {
+		__u8 input;
+		int asic;
+		__u16 eprom_size;
+		__u16 sp_offset;
+
+		outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
+		outb(i, VOYAGER_CAT_CONFIG_PORT);
+
+		/* check the presence of the module */
+		outb(VOYAGER_CAT_RUN, CAT_CMD);
+		outb(VOYAGER_CAT_IRCYC, CAT_CMD);
+		outb(VOYAGER_CAT_HEADER, CAT_DATA);
+		/* stream series of alternating 1's and 0's to stimulate
+		 * response */
+		outb(0xAA, CAT_DATA);
+		input = inb(CAT_DATA);
+		outb(VOYAGER_CAT_END, CAT_CMD);
+		if(input != VOYAGER_CAT_HEADER) {
+			continue;
+		}
+		CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i,
+			cat_module_name(i)));
+		*modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++];*/
+		if(*modpp == NULL) {
+			printk("**WARNING** kmalloc failure in cat_init\n");
+			continue;
+		}
+		memset(*modpp, 0, sizeof(voyager_module_t));
+		/* need temporary asic for cat_subread.  It will be
+		 * filled in correctly later */
+		(*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count];*/
+		if((*modpp)->asic == NULL) {
+			printk("**WARNING** kmalloc failure in cat_init\n");
+			continue;
+		}
+		memset((*modpp)->asic, 0, sizeof(voyager_asic_t));
+		(*modpp)->asic->asic_id = VOYAGER_CAT_ID;
+		(*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI;
+		(*modpp)->module_addr = i;
+		(*modpp)->scan_path_connected = 0;
+		if(i == VOYAGER_PSI) {
+			/* Exception leg for modules with no EEPROM */
+			printk("Module \"%s\"\n", cat_module_name(i));
+			continue;
+		}
+			       
+		CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
+		outb(VOYAGER_CAT_RUN, CAT_CMD);
+		cat_disconnect(*modpp, (*modpp)->asic);
+		if(cat_subread(*modpp, (*modpp)->asic,
+			       VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+			       &eprom_size)) {
+			printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			continue;
+		}
+		if(eprom_size > sizeof(eprom_buf)) {
+			printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			continue;
+		}
+		outb(VOYAGER_CAT_END, CAT_CMD);
+		outb(VOYAGER_CAT_RUN, CAT_CMD);
+		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
+		if(cat_subread(*modpp, (*modpp)->asic, 0, 
+			       eprom_size, eprom_buf)) {
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			continue;
+		}
+		outb(VOYAGER_CAT_END, CAT_CMD);
+		printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n",
+		       cat_module_name(i), eprom_hdr->version_id,
+		       *((__u32 *)eprom_hdr->tracer),  eprom_hdr->num_asics);
+		(*modpp)->ee_size = eprom_hdr->ee_size;
+		(*modpp)->num_asics = eprom_hdr->num_asics;
+		asicpp = &((*modpp)->asic);
+		sp_offset = eprom_hdr->scan_path_offset;
+		/* All we really care about are the Quad cards.  We
+                 * identify them because they are in a processor slot
+                 * and have only four asics */
+		if((i < 0x10 || (i>=0x14 && i < 0x1c) || i>0x1f)) {
+			modpp = &((*modpp)->next);
+			continue;
+		}
+		/* Now we know it's in a processor slot, does it have
+		 * a quad baseboard submodule */
+		outb(VOYAGER_CAT_RUN, CAT_CMD);
+		cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT,
+			 &num_submodules);
+		/* lowest two bits, active low */
+		num_submodules = ~(0xfc | num_submodules);
+		CDEBUG(("VOYAGER CAT: %d submodules present\n", num_submodules));
+		if(num_submodules == 0) {
+			/* fill in the dyadic extended processors */
+			__u8 cpu = i & 0x07;
+
+			printk("Module \"%s\": Dyadic Processor Card\n",
+			       cat_module_name(i));
+			voyager_extended_vic_processors |= (1<<cpu);
+			cpu += 4;
+			voyager_extended_vic_processors |= (1<<cpu);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			continue;
+		}
+
+		/* now we want to read the asics on the first submodule,
+		 * which should be the quad base board */
+
+		cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, &val);
+		CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val));
+		val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD;
+		cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val);
+
+		outb(VOYAGER_CAT_END, CAT_CMD);
+			 
+
+		CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
+		outb(VOYAGER_CAT_RUN, CAT_CMD);
+		cat_disconnect(*modpp, (*modpp)->asic);
+		if(cat_subread(*modpp, (*modpp)->asic,
+			       VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+			       &eprom_size)) {
+			printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			continue;
+		}
+		if(eprom_size > sizeof(eprom_buf)) {
+			printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			continue;
+		}
+		outb(VOYAGER_CAT_END, CAT_CMD);
+		outb(VOYAGER_CAT_RUN, CAT_CMD);
+		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
+		if(cat_subread(*modpp, (*modpp)->asic, 0, 
+			       eprom_size, eprom_buf)) {
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			continue;
+		}
+		outb(VOYAGER_CAT_END, CAT_CMD);
+		/* Now do everything for the QBB submodule 1 */
+		(*modpp)->ee_size = eprom_hdr->ee_size;
+		(*modpp)->num_asics = eprom_hdr->num_asics;
+		asicpp = &((*modpp)->asic);
+		sp_offset = eprom_hdr->scan_path_offset;
+		/* get rid of the dummy CAT asic and read the real one */
+		kfree((*modpp)->asic);
+		for(asic=0; asic < (*modpp)->num_asics; asic++) {
+			int j;
+			voyager_asic_t *asicp = *asicpp 
+				= kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/
+			voyager_sp_table_t *sp_table;
+			voyager_at_t *asic_table;
+			voyager_jtt_t *jtag_table;
+
+			if(asicp == NULL) {
+				printk("**WARNING** kmalloc failure in cat_init\n");
+				continue;
+			}
+			memset(asicp, 0, sizeof(voyager_asic_t));
+			asicpp = &(asicp->next);
+			asicp->asic_location = asic;
+			sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset);
+			asicp->asic_id = sp_table->asic_id;
+			asic_table = (voyager_at_t *)(eprom_buf + sp_table->asic_data_offset);
+			for(j=0; j<4; j++)
+				asicp->jtag_id[j] = asic_table->jtag_id[j];
+			jtag_table = (voyager_jtt_t *)(eprom_buf + asic_table->jtag_offset);
+			asicp->ireg_length = jtag_table->ireg_len;
+			asicp->bit_location = (*modpp)->inst_bits;
+			(*modpp)->inst_bits += asicp->ireg_length;
+			if(asicp->ireg_length > (*modpp)->largest_reg)
+				(*modpp)->largest_reg = asicp->ireg_length;
+			if (asicp->ireg_length < (*modpp)->smallest_reg ||
+			    (*modpp)->smallest_reg == 0)
+				(*modpp)->smallest_reg = asicp->ireg_length;
+			CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n",
+				asicp->asic_id, asicp->ireg_length,
+				asicp->bit_location));
+			if(asicp->asic_id == VOYAGER_QUAD_QABC) {
+				CDEBUG(("VOYAGER CAT: QABC ASIC found\n"));
+				qabc_asic = asicp;
+			}
+			sp_offset += sizeof(voyager_sp_table_t);
+		}
+		CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n",
+			(*modpp)->inst_bits, (*modpp)->largest_reg,
+			(*modpp)->smallest_reg));
+		/* OK, now we have the QUAD ASICs set up, use them.
+		 * we need to:
+		 *
+		 * 1. Find the Memory area for the Quad CPIs.
+		 * 2. Find the Extended VIC processor
+		 * 3. Configure a second extended VIC processor (This
+		 *    cannot be done for the 51xx.
+		 * */
+		outb(VOYAGER_CAT_RUN, CAT_CMD);
+		cat_connect(*modpp, (*modpp)->asic);
+		CDEBUG(("CAT CONNECTED!!\n"));
+		cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data);
+		qic_addr = qabc_data[5] << 8;
+		qic_addr = (qic_addr | qabc_data[6]) << 8;
+		qic_addr = (qic_addr | qabc_data[7]) << 8;
+		printk("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
+		       cat_module_name(i), qic_addr, qabc_data[8]);
+#if 0				/* plumbing fails---FIXME */
+		if((qabc_data[8] & 0xf0) == 0) {
+			/* FIXME: 32 way 8 CPU slot monster cannot be
+			 * plumbed this way---need to check for it */
+
+			printk("Plumbing second Extended Quad Processor\n");
+			/* second VIC line hardwired to Quad CPU 1 */
+			qabc_data[8] |= 0x20;
+			cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
+#ifdef VOYAGER_CAT_DEBUG
+			/* verify plumbing */
+			cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
+			if((qabc_data[8] & 0xf0) == 0) {
+				CDEBUG(("PLUMBING FAILED: 0x%x\n", qabc_data[8]));
+			}
+#endif
+		}
+#endif
+
+		{
+			struct resource *res = kmalloc(sizeof(struct resource),GFP_KERNEL);
+			memset(res, 0, sizeof(struct resource));
+			res->name = kmalloc(128, GFP_KERNEL);
+			sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i));
+			res->start = qic_addr;
+			res->end = qic_addr + 0x3ff;
+			request_resource(&iomem_resource, res);
+		}
+
+		qic_addr = (unsigned long)ioremap(qic_addr, 0x400);
+				
+		for(j = 0; j < 4; j++) {
+			__u8 cpu;
+
+			if(voyager_8slot) {
+				/* 8 slot has a different mapping,
+				 * each slot has only one vic line, so
+				 * 1 cpu in each slot must be < 8 */
+				cpu = (i & 0x07) + j*8;
+			} else {
+				cpu = (i & 0x03) + j*4;
+			}
+			if( (qabc_data[8] & (1<<j))) {
+				voyager_extended_vic_processors |= (1<<cpu);
+			}
+			if(qabc_data[8] & (1<<(j+4)) ) {
+				/* Second SET register plumbed: Quad
+				 * card has two VIC connected CPUs.
+				 * Secondary cannot be booted as a VIC
+				 * CPU */
+				voyager_extended_vic_processors |= (1<<cpu);
+				voyager_allowed_boot_processors &= (~(1<<cpu));
+			}
+
+			voyager_quad_processors |= (1<<cpu);
+			voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *)
+				(qic_addr+(j<<8));
+			CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu,
+				(unsigned long)voyager_quad_cpi_addr[cpu]));
+		}
+		outb(VOYAGER_CAT_END, CAT_CMD);
+
+		
+		
+		*asicpp = NULL;
+		modpp = &((*modpp)->next);
+	}
+	*modpp = NULL;
+	printk("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", voyager_extended_vic_processors, voyager_quad_processors, voyager_allowed_boot_processors);
+	request_resource(&ioport_resource, &vic_res);
+	if(voyager_quad_processors)
+		request_resource(&ioport_resource, &qic_res);
+	/* set up the front power switch */
+}
+
+int
+voyager_cat_readb(__u8 module, __u8 asic, int reg)
+{
+	return 0;
+}
+
+static int
+cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp) 
+{
+	__u8 val;
+	int err = 0;
+
+	if(!modp->scan_path_connected)
+		return 0;
+	if(asicp->asic_id != VOYAGER_CAT_ID) {
+		CDEBUG(("cat_disconnect: ASIC is not CAT\n"));
+		return 1;
+	}
+	err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
+	if(err) {
+		CDEBUG(("cat_disconnect: failed to read SCANPATH\n"));
+		return err;
+	}
+	val &= VOYAGER_DISCONNECT_ASIC;
+	err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
+	if(err) {
+		CDEBUG(("cat_disconnect: failed to write SCANPATH\n"));
+		return err;
+	}
+	outb(VOYAGER_CAT_END, CAT_CMD);
+	outb(VOYAGER_CAT_RUN, CAT_CMD);
+	modp->scan_path_connected = 0;
+
+	return 0;
+}
+
+static int
+cat_connect(voyager_module_t *modp, voyager_asic_t *asicp) 
+{
+	__u8 val;
+	int err = 0;
+
+	if(modp->scan_path_connected)
+		return 0;
+	if(asicp->asic_id != VOYAGER_CAT_ID) {
+		CDEBUG(("cat_connect: ASIC is not CAT\n"));
+		return 1;
+	}
+
+	err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
+	if(err) {
+		CDEBUG(("cat_connect: failed to read SCANPATH\n"));
+		return err;
+	}
+	val |= VOYAGER_CONNECT_ASIC;
+	err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
+	if(err) {
+		CDEBUG(("cat_connect: failed to write SCANPATH\n"));
+		return err;
+	}
+	outb(VOYAGER_CAT_END, CAT_CMD);
+	outb(VOYAGER_CAT_RUN, CAT_CMD);
+	modp->scan_path_connected = 1;
+
+	return 0;
+}
+
+void
+voyager_cat_power_off(void)
+{
+	/* Power the machine off by writing to the PSI over the CAT
+         * bus */
+	__u8 data;
+	voyager_module_t psi = { 0 };
+	voyager_asic_t psi_asic = { 0 };
+
+	psi.asic = &psi_asic;
+	psi.asic->asic_id = VOYAGER_CAT_ID;
+	psi.asic->subaddr = VOYAGER_SUBADDR_HI;
+	psi.module_addr = VOYAGER_PSI;
+	psi.scan_path_connected = 0;
+
+	outb(VOYAGER_CAT_END, CAT_CMD);
+	/* Connect the PSI to the CAT Bus */
+	outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
+	outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
+	outb(VOYAGER_CAT_RUN, CAT_CMD);
+	cat_disconnect(&psi, &psi_asic);
+	/* Read the status */
+	cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
+	outb(VOYAGER_CAT_END, CAT_CMD);
+	CDEBUG(("PSI STATUS 0x%x\n", data));
+	/* These two writes are power off prep and perform */
+	data = PSI_CLEAR;
+	outb(VOYAGER_CAT_RUN, CAT_CMD);
+	cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
+	outb(VOYAGER_CAT_END, CAT_CMD);
+	data = PSI_POWER_DOWN;
+	outb(VOYAGER_CAT_RUN, CAT_CMD);
+	cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
+	outb(VOYAGER_CAT_END, CAT_CMD);
+}
+
+struct voyager_status voyager_status = { 0 };
+
+void
+voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
+{
+	voyager_module_t psi = { 0 };
+	voyager_asic_t psi_asic = { 0 };
+
+	psi.asic = &psi_asic;
+	psi.asic->asic_id = VOYAGER_CAT_ID;
+	psi.asic->subaddr = VOYAGER_SUBADDR_HI;
+	psi.module_addr = VOYAGER_PSI;
+	psi.scan_path_connected = 0;
+
+	outb(VOYAGER_CAT_END, CAT_CMD);
+	/* Connect the PSI to the CAT Bus */
+	outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
+	outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
+	outb(VOYAGER_CAT_RUN, CAT_CMD);
+	cat_disconnect(&psi, &psi_asic);
+	switch(cmd) {
+	case VOYAGER_PSI_READ:
+		cat_read(&psi, &psi_asic, reg, data);
+		break;
+	case VOYAGER_PSI_WRITE:
+		cat_write(&psi, &psi_asic, reg, *data);
+		break;
+	case VOYAGER_PSI_SUBREAD:
+		cat_subread(&psi, &psi_asic, reg, 1, data);
+		break;
+	case VOYAGER_PSI_SUBWRITE:
+		cat_subwrite(&psi, &psi_asic, reg, 1, data);
+		break;
+	default:
+		printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd);
+		break;
+	}
+	outb(VOYAGER_CAT_END, CAT_CMD);
+}
+
+void
+voyager_cat_do_common_interrupt(void)
+{
+	/* This is caused either by a memory parity error or something
+	 * in the PSI */
+	__u8 data;
+	voyager_module_t psi = { 0 };
+	voyager_asic_t psi_asic = { 0 };
+	struct voyager_psi psi_reg;
+	int i;
+ re_read:
+	psi.asic = &psi_asic;
+	psi.asic->asic_id = VOYAGER_CAT_ID;
+	psi.asic->subaddr = VOYAGER_SUBADDR_HI;
+	psi.module_addr = VOYAGER_PSI;
+	psi.scan_path_connected = 0;
+
+	outb(VOYAGER_CAT_END, CAT_CMD);
+	/* Connect the PSI to the CAT Bus */
+	outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
+	outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
+	outb(VOYAGER_CAT_RUN, CAT_CMD);
+	cat_disconnect(&psi, &psi_asic);
+	/* Read the status.  NOTE: Need to read *all* the PSI regs here
+	 * otherwise the cmn int will be reasserted */
+	for(i = 0; i < sizeof(psi_reg.regs); i++) {
+		cat_read(&psi, &psi_asic, i, &((__u8 *)&psi_reg.regs)[i]);
+	}
+	outb(VOYAGER_CAT_END, CAT_CMD);
+	if((psi_reg.regs.checkbit & 0x02) == 0) {
+		psi_reg.regs.checkbit |= 0x02;
+		cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit);
+		printk("VOYAGER RE-READ PSI\n");
+		goto re_read;
+	}
+	outb(VOYAGER_CAT_RUN, CAT_CMD);
+	for(i = 0; i < sizeof(psi_reg.subregs); i++) {
+		/* This looks strange, but the PSI doesn't do auto increment
+		 * correctly */
+		cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i, 
+			    1, &((__u8 *)&psi_reg.subregs)[i]); 
+	}
+	outb(VOYAGER_CAT_END, CAT_CMD);
+#ifdef VOYAGER_CAT_DEBUG
+	printk("VOYAGER PSI: ");
+	for(i=0; i<sizeof(psi_reg.regs); i++)
+		printk("%02x ", ((__u8 *)&psi_reg.regs)[i]);
+	printk("\n           ");
+	for(i=0; i<sizeof(psi_reg.subregs); i++)
+		printk("%02x ", ((__u8 *)&psi_reg.subregs)[i]);
+	printk("\n");
+#endif
+	if(psi_reg.regs.intstatus & PSI_MON) {
+		/* switch off or power fail */
+
+		if(psi_reg.subregs.supply & PSI_SWITCH_OFF) {
+			if(voyager_status.switch_off) {
+				printk(KERN_ERR "Voyager front panel switch turned off again---Immediate power off!\n");
+				voyager_cat_power_off();
+				/* not reached */
+			} else {
+				printk(KERN_ERR "Voyager front panel switch turned off\n");
+				voyager_status.switch_off = 1;
+				voyager_status.request_from_kernel = 1;
+				up(&kvoyagerd_sem);
+			}
+			/* Tell the hardware we're taking care of the
+			 * shutdown, otherwise it will power the box off
+			 * within 3 seconds of the switch being pressed and,
+			 * which is much more important to us, continue to 
+			 * assert the common interrupt */
+			data = PSI_CLR_SWITCH_OFF;
+			outb(VOYAGER_CAT_RUN, CAT_CMD);
+			cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG,
+				     1, &data);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+		} else {
+
+			VDEBUG(("Voyager ac fail reg 0x%x\n",
+				psi_reg.subregs.ACfail));
+			if((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
+				/* No further update */
+				return;
+			}
+#if 0
+			/* Don't bother trying to find out who failed.
+			 * FIXME: This probably makes the code incorrect on
+			 * anything other than a 345x */
+			for(i=0; i< 5; i++) {
+				if( psi_reg.subregs.ACfail &(1<<i)) {
+					break;
+				}
+			}
+			printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i);
+#endif
+			/* DON'T do this: it shuts down the AC PSI 
+			outb(VOYAGER_CAT_RUN, CAT_CMD);
+			data = PSI_MASK_MASK | i;
+			cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
+				     1, &data);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			*/
+			printk(KERN_ERR "Voyager AC power failure\n");
+			outb(VOYAGER_CAT_RUN, CAT_CMD);
+			data = PSI_COLD_START;
+			cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG,
+				     1, &data);
+			outb(VOYAGER_CAT_END, CAT_CMD);
+			voyager_status.power_fail = 1;
+			voyager_status.request_from_kernel = 1;
+			up(&kvoyagerd_sem);
+		}
+		
+		
+	} else if(psi_reg.regs.intstatus & PSI_FAULT) {
+		/* Major fault! */
+		printk(KERN_ERR "Voyager PSI Detected major fault, immediate power off!\n");
+		voyager_cat_power_off();
+		/* not reached */
+	} else if(psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
+					    | PSI_CURRENT | PSI_DVM
+					    | PSI_PSCFAULT | PSI_STAT_CHG)) {
+		/* other psi fault */
+
+		printk(KERN_WARNING "Voyager PSI status 0x%x\n", data);
+		/* clear the PSI fault */
+		outb(VOYAGER_CAT_RUN, CAT_CMD);
+		cat_write(&psi, &psi_asic, VOYAGER_PSI_STATUS_REG, 0);
+		outb(VOYAGER_CAT_END, CAT_CMD);
+	}
+}
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
new file mode 100644
index 00000000000..903d739ca74
--- /dev/null
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -0,0 +1,1931 @@
+/* -*- mode: c; c-basic-offset: 8 -*- */
+
+/* Copyright (C) 1999,2001
+ *
+ * Author: J.E.J.Bottomley@HansenPartnership.com
+ *
+ * linux/arch/i386/kernel/voyager_smp.c
+ *
+ * This file provides all the same external entries as smp.c but uses
+ * the voyager hal to provide the functionality
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/bootmem.h>
+#include <linux/completion.h>
+#include <asm/desc.h>
+#include <asm/voyager.h>
+#include <asm/vic.h>
+#include <asm/mtrr.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/arch_hooks.h>
+
+#include <linux/irq.h>
+
+/* TLB state -- visible externally, indexed physically */
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 };
+
+/* CPU IRQ affinity -- set to all ones initially */
+static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1]  = ~0UL };
+
+/* per CPU data structure (for /proc/cpuinfo et al), visible externally
+ * indexed physically */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+/* physical ID of the CPU used to boot the system */
+unsigned char boot_cpu_id;
+
+/* The memory line addresses for the Quad CPIs */
+struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned;
+
+/* The masks for the Extended VIC processors, filled in by cat_init */
+__u32 voyager_extended_vic_processors = 0;
+
+/* Masks for the extended Quad processors which cannot be VIC booted */
+__u32 voyager_allowed_boot_processors = 0;
+
+/* The mask for the Quad Processors (both extended and non-extended) */
+__u32 voyager_quad_processors = 0;
+
+/* Total count of live CPUs, used in process.c to display
+ * the CPU information and in irq.c for the per CPU irq
+ * activity count.  Finally exported by i386_ksyms.c */
+static int voyager_extended_cpus = 1;
+
+/* Have we found an SMP box - used by time.c to do the profiling
+   interrupt for timeslicing; do not set to 1 until the per CPU timer
+   interrupt is active */
+int smp_found_config = 0;
+
+/* Used for the invalidate map that's also checked in the spinlock */
+static volatile unsigned long smp_invalidate_needed;
+
+/* Bitmask of currently online CPUs - used by setup.c for
+   /proc/cpuinfo, visible externally but still physical */
+cpumask_t cpu_online_map = CPU_MASK_NONE;
+
+/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
+ * by scheduler but indexed physically */
+cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
+
+
+/* The internal functions */
+static void send_CPI(__u32 cpuset, __u8 cpi);
+static void ack_CPI(__u8 cpi);
+static int ack_QIC_CPI(__u8 cpi);
+static void ack_special_QIC_CPI(__u8 cpi);
+static void ack_VIC_CPI(__u8 cpi);
+static void send_CPI_allbutself(__u8 cpi);
+static void enable_vic_irq(unsigned int irq);
+static void disable_vic_irq(unsigned int irq);
+static unsigned int startup_vic_irq(unsigned int irq);
+static void enable_local_vic_irq(unsigned int irq);
+static void disable_local_vic_irq(unsigned int irq);
+static void before_handle_vic_irq(unsigned int irq);
+static void after_handle_vic_irq(unsigned int irq);
+static void set_vic_irq_affinity(unsigned int irq, cpumask_t mask);
+static void ack_vic_irq(unsigned int irq);
+static void vic_enable_cpi(void);
+static void do_boot_cpu(__u8 cpuid);
+static void do_quad_bootstrap(void);
+static inline void wrapper_smp_local_timer_interrupt(struct pt_regs *);
+
+int hard_smp_processor_id(void);
+
+/* Inline functions */
+static inline void
+send_one_QIC_CPI(__u8 cpu, __u8 cpi)
+{
+	voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi =
+		(smp_processor_id() << 16) + cpi;
+}
+
+static inline void
+send_QIC_CPI(__u32 cpuset, __u8 cpi)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if(cpuset & (1<<cpu)) {
+#ifdef VOYAGER_DEBUG
+			if(!cpu_isset(cpu, cpu_online_map))
+				VDEBUG(("CPU%d sending cpi %d to CPU%d not in cpu_online_map\n", hard_smp_processor_id(), cpi, cpu));
+#endif
+			send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
+		}
+	}
+}
+
+static inline void
+send_one_CPI(__u8 cpu, __u8 cpi)
+{
+	if(voyager_quad_processors & (1<<cpu))
+		send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
+	else
+		send_CPI(1<<cpu, cpi);
+}
+
+static inline void
+send_CPI_allbutself(__u8 cpi)
+{
+	__u8 cpu = smp_processor_id();
+	__u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu);
+	send_CPI(mask, cpi);
+}
+
+static inline int
+is_cpu_quad(void)
+{
+	__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
+	return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER);
+}
+
+static inline int
+is_cpu_extended(void)
+{
+	__u8 cpu = hard_smp_processor_id();
+
+	return(voyager_extended_vic_processors & (1<<cpu));
+}
+
+static inline int
+is_cpu_vic_boot(void)
+{
+	__u8 cpu = hard_smp_processor_id();
+
+	return(voyager_extended_vic_processors
+	       & voyager_allowed_boot_processors & (1<<cpu));
+}
+
+
+static inline void
+ack_CPI(__u8 cpi)
+{
+	switch(cpi) {
+	case VIC_CPU_BOOT_CPI:
+		if(is_cpu_quad() && !is_cpu_vic_boot())
+			ack_QIC_CPI(cpi);
+		else
+			ack_VIC_CPI(cpi);
+		break;
+	case VIC_SYS_INT:
+	case VIC_CMN_INT: 
+		/* These are slightly strange.  Even on the Quad card,
+		 * They are vectored as VIC CPIs */
+		if(is_cpu_quad())
+			ack_special_QIC_CPI(cpi);
+		else
+			ack_VIC_CPI(cpi);
+		break;
+	default:
+		printk("VOYAGER ERROR: CPI%d is in common CPI code\n", cpi);
+		break;
+	}
+}
+
+/* local variables */
+
+/* The VIC IRQ descriptors -- these look almost identical to the
+ * 8259 IRQs except that masks and things must be kept per processor
+ */
+static struct hw_interrupt_type vic_irq_type = {
+	.typename = "VIC-level",
+	.startup = startup_vic_irq,
+	.shutdown = disable_vic_irq,
+	.enable = enable_vic_irq,
+	.disable = disable_vic_irq,
+	.ack = before_handle_vic_irq,
+	.end = after_handle_vic_irq,
+	.set_affinity = set_vic_irq_affinity,
+};
+
+/* used to count up as CPUs are brought on line (starts at 0) */
+static int cpucount = 0;
+
+/* steal a page from the bottom of memory for the trampoline and
+ * squirrel its address away here.  This will be in kernel virtual
+ * space */
+static __u32 trampoline_base;
+
+/* The per cpu profile stuff - used in smp_local_timer_interrupt */
+static DEFINE_PER_CPU(int, prof_multiplier) = 1;
+static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
+static DEFINE_PER_CPU(int, prof_counter) =  1;
+
+/* the map used to check if a CPU has booted */
+static __u32 cpu_booted_map;
+
+/* the synchronize flag used to hold all secondary CPUs spinning in
+ * a tight loop until the boot sequence is ready for them */
+static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
+
+/* This is for the new dynamic CPU boot code */
+cpumask_t cpu_callin_map = CPU_MASK_NONE;
+cpumask_t cpu_callout_map = CPU_MASK_NONE;
+
+/* The per processor IRQ masks (these are usually kept in sync) */
+static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
+
+/* the list of IRQs to be enabled by the VIC_ENABLE_IRQ_CPI */
+static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 };
+
+/* Lock for enable/disable of VIC interrupts */
+static  __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
+
+/* The boot processor is correctly set up in PC mode when it 
+ * comes up, but the secondaries need their master/slave 8259
+ * pairs initializing correctly */
+
+/* Interrupt counters (per cpu) and total - used to try to
+ * even up the interrupt handling routines */
+static long vic_intr_total = 0;
+static long vic_intr_count[NR_CPUS] __cacheline_aligned = { 0 };
+static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 };
+
+/* Since we can only use CPI0, we fake all the other CPIs */
+static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned;
+
+/* debugging routine to read the isr of the cpu's pic */
+static inline __u16
+vic_read_isr(void)
+{
+	__u16 isr;
+
+	outb(0x0b, 0xa0);
+	isr = inb(0xa0) << 8;
+	outb(0x0b, 0x20);
+	isr |= inb(0x20);
+
+	return isr;
+}
+
+static __init void
+qic_setup(void)
+{
+	if(!is_cpu_quad()) {
+		/* not a quad, no setup */
+		return;
+	}
+	outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
+	outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
+	
+	if(is_cpu_extended()) {
+		/* the QIC duplicate of the VIC base register */
+		outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER);
+		outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER);
+
+		/* FIXME: should set up the QIC timer and memory parity
+		 * error vectors here */
+	}
+}
+
+static __init void
+vic_setup_pic(void)
+{
+	outb(1, VIC_REDIRECT_REGISTER_1);
+	/* clear the claim registers for dynamic routing */
+	outb(0, VIC_CLAIM_REGISTER_0);
+	outb(0, VIC_CLAIM_REGISTER_1);
+
+	outb(0, VIC_PRIORITY_REGISTER);
+	/* Set the Primary and Secondary Microchannel vector
+	 * bases to be the same as the ordinary interrupts
+	 *
+	 * FIXME: This would be more efficient using separate
+	 * vectors. */
+	outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
+	outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
+	/* Now initiallise the master PIC belonging to this CPU by
+	 * sending the four ICWs */
+
+	/* ICW1: level triggered, ICW4 needed */
+	outb(0x19, 0x20);
+
+	/* ICW2: vector base */
+	outb(FIRST_EXTERNAL_VECTOR, 0x21);
+
+	/* ICW3: slave at line 2 */
+	outb(0x04, 0x21);
+
+	/* ICW4: 8086 mode */
+	outb(0x01, 0x21);
+
+	/* now the same for the slave PIC */
+
+	/* ICW1: level trigger, ICW4 needed */
+	outb(0x19, 0xA0);
+
+	/* ICW2: slave vector base */
+	outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1);
+	
+	/* ICW3: slave ID */
+	outb(0x02, 0xA1);
+
+	/* ICW4: 8086 mode */
+	outb(0x01, 0xA1);
+}
+
+static void
+do_quad_bootstrap(void)
+{
+	if(is_cpu_quad() && is_cpu_vic_boot()) {
+		int i;
+		unsigned long flags;
+		__u8 cpuid = hard_smp_processor_id();
+
+		local_irq_save(flags);
+
+		for(i = 0; i<4; i++) {
+			/* FIXME: this would be >>3 &0x7 on the 32 way */
+			if(((cpuid >> 2) & 0x03) == i)
+				/* don't lower our own mask! */
+				continue;
+
+			/* masquerade as local Quad CPU */
+			outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID);
+			/* enable the startup CPI */
+			outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1);
+			/* restore cpu id */
+			outb(0, QIC_PROCESSOR_ID);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+
+/* Set up all the basic stuff: read the SMP config and make all the
+ * SMP information reflect only the boot cpu.  All others will be
+ * brought on-line later. */
+void __init 
+find_smp_config(void)
+{
+	int i;
+
+	boot_cpu_id = hard_smp_processor_id();
+
+	printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
+
+	/* initialize the CPU structures (moved from smp_boot_cpus) */
+	for(i=0; i<NR_CPUS; i++) {
+		cpu_irq_affinity[i] = ~0;
+	}
+	cpu_online_map = cpumask_of_cpu(boot_cpu_id);
+
+	/* The boot CPU must be extended */
+	voyager_extended_vic_processors = 1<<boot_cpu_id;
+	/* initially, all of the first 8 cpu's can boot */
+	voyager_allowed_boot_processors = 0xff;
+	/* set up everything for just this CPU, we can alter
+	 * this as we start the other CPUs later */
+	/* now get the CPU disposition from the extended CMOS */
+	cpus_addr(phys_cpu_present_map)[0] = voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
+	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
+	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 2) << 16;
+	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24;
+	printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", cpus_addr(phys_cpu_present_map)[0]);
+	/* Here we set up the VIC to enable SMP */
+	/* enable the CPIs by writing the base vector to their register */
+	outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
+	outb(1, VIC_REDIRECT_REGISTER_1);
+	/* set the claim registers for static routing --- Boot CPU gets
+	 * all interrupts untill all other CPUs started */
+	outb(0xff, VIC_CLAIM_REGISTER_0);
+	outb(0xff, VIC_CLAIM_REGISTER_1);
+	/* Set the Primary and Secondary Microchannel vector
+	 * bases to be the same as the ordinary interrupts
+	 *
+	 * FIXME: This would be more efficient using separate
+	 * vectors. */
+	outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
+	outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
+
+	/* Finally tell the firmware that we're driving */
+	outb(inb(VOYAGER_SUS_IN_CONTROL_PORT) | VOYAGER_IN_CONTROL_FLAG,
+	     VOYAGER_SUS_IN_CONTROL_PORT);
+
+	current_thread_info()->cpu = boot_cpu_id;
+}
+
+/*
+ *	The bootstrap kernel entry code has set these up. Save them
+ *	for a given CPU, id is physical */
+void __init
+smp_store_cpu_info(int id)
+{
+	struct cpuinfo_x86 *c=&cpu_data[id];
+
+	*c = boot_cpu_data;
+
+	identify_cpu(c);
+}
+
+/* set up the trampoline and return the physical address of the code */
+static __u32 __init
+setup_trampoline(void)
+{
+	/* these two are global symbols in trampoline.S */
+	extern __u8 trampoline_end[];
+	extern __u8 trampoline_data[];
+
+	memcpy((__u8 *)trampoline_base, trampoline_data,
+	       trampoline_end - trampoline_data);
+	return virt_to_phys((__u8 *)trampoline_base);
+}
+
+/* Routine initially called when a non-boot CPU is brought online */
+static void __init
+start_secondary(void *unused)
+{
+	__u8 cpuid = hard_smp_processor_id();
+	/* external functions not defined in the headers */
+	extern void calibrate_delay(void);
+
+	cpu_init();
+
+	/* OK, we're in the routine */
+	ack_CPI(VIC_CPU_BOOT_CPI);
+
+	/* setup the 8259 master slave pair belonging to this CPU ---
+         * we won't actually receive any until the boot CPU
+         * relinquishes it's static routing mask */
+	vic_setup_pic();
+
+	qic_setup();
+
+	if(is_cpu_quad() && !is_cpu_vic_boot()) {
+		/* clear the boot CPI */
+		__u8 dummy;
+
+		dummy = voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
+		printk("read dummy %d\n", dummy);
+	}
+
+	/* lower the mask to receive CPIs */
+	vic_enable_cpi();
+
+	VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
+
+	/* enable interrupts */
+	local_irq_enable();
+
+	/* get our bogomips */
+	calibrate_delay();
+
+	/* save our processor parameters */
+	smp_store_cpu_info(cpuid);
+
+	/* if we're a quad, we may need to bootstrap other CPUs */
+	do_quad_bootstrap();
+
+	/* FIXME: this is rather a poor hack to prevent the CPU
+	 * activating softirqs while it's supposed to be waiting for
+	 * permission to proceed.  Without this, the new per CPU stuff
+	 * in the softirqs will fail */
+	local_irq_disable();
+	cpu_set(cpuid, cpu_callin_map);
+
+	/* signal that we're done */
+	cpu_booted_map = 1;
+
+	while (!cpu_isset(cpuid, smp_commenced_mask))
+		rep_nop();
+	local_irq_enable();
+
+	local_flush_tlb();
+
+	cpu_set(cpuid, cpu_online_map);
+	wmb();
+	cpu_idle();
+}
+
+
+/* Routine to kick start the given CPU and wait for it to report ready
+ * (or timeout in startup).  When this routine returns, the requested
+ * CPU is either fully running and configured or known to be dead.
+ *
+ * We call this routine sequentially 1 CPU at a time, so no need for
+ * locking */
+
+static void __init
+do_boot_cpu(__u8 cpu)
+{
+	struct task_struct *idle;
+	int timeout;
+	unsigned long flags;
+	int quad_boot = (1<<cpu) & voyager_quad_processors 
+		& ~( voyager_extended_vic_processors
+		     & voyager_allowed_boot_processors);
+
+	/* For the 486, we can't use the 4Mb page table trick, so
+	 * must map a region of memory */
+#ifdef CONFIG_M486
+	int i;
+	unsigned long *page_table_copies = (unsigned long *)
+		__get_free_page(GFP_KERNEL);
+#endif
+	pgd_t orig_swapper_pg_dir0;
+
+	/* This is an area in head.S which was used to set up the
+	 * initial kernel stack.  We need to alter this to give the
+	 * booting CPU a new stack (taken from its idle process) */
+	extern struct {
+		__u8 *esp;
+		unsigned short ss;
+	} stack_start;
+	/* This is the format of the CPI IDT gate (in real mode) which
+	 * we're hijacking to boot the CPU */
+	union 	IDTFormat {
+		struct seg {
+			__u16	Offset;
+			__u16	Segment;
+		} idt;
+		__u32 val;
+	} hijack_source;
+
+	__u32 *hijack_vector;
+	__u32 start_phys_address = setup_trampoline();
+
+	/* There's a clever trick to this: The linux trampoline is
+	 * compiled to begin at absolute location zero, so make the
+	 * address zero but have the data segment selector compensate
+	 * for the actual address */
+	hijack_source.idt.Offset = start_phys_address & 0x000F;
+	hijack_source.idt.Segment = (start_phys_address >> 4) & 0xFFFF;
+
+	cpucount++;
+	idle = fork_idle(cpu);
+	if(IS_ERR(idle))
+		panic("failed fork for CPU%d", cpu);
+	idle->thread.eip = (unsigned long) start_secondary;
+	/* init_tasks (in sched.c) is indexed logically */
+	stack_start.esp = (void *) idle->thread.esp;
+
+	irq_ctx_init(cpu);
+
+	/* Note: Don't modify initial ss override */
+	VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, 
+		(unsigned long)hijack_source.val, hijack_source.idt.Segment,
+		hijack_source.idt.Offset, stack_start.esp));
+	/* set the original swapper_pg_dir[0] to map 0 to 4Mb transparently
+	 * (so that the booting CPU can find start_32 */
+	orig_swapper_pg_dir0 = swapper_pg_dir[0];
+#ifdef CONFIG_M486
+	if(page_table_copies == NULL)
+		panic("No free memory for 486 page tables\n");
+	for(i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++)
+		page_table_copies[i] = (i * PAGE_SIZE) 
+			| _PAGE_RW | _PAGE_USER | _PAGE_PRESENT;
+
+	((unsigned long *)swapper_pg_dir)[0] = 
+		((virt_to_phys(page_table_copies)) & PAGE_MASK)
+		| _PAGE_RW | _PAGE_USER | _PAGE_PRESENT;
+#else
+	((unsigned long *)swapper_pg_dir)[0] = 
+		(virt_to_phys(pg0) & PAGE_MASK)
+		| _PAGE_RW | _PAGE_USER | _PAGE_PRESENT;
+#endif
+
+	if(quad_boot) {
+		printk("CPU %d: non extended Quad boot\n", cpu);
+		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE)*4);
+		*hijack_vector = hijack_source.val;
+	} else {
+		printk("CPU%d: extended VIC boot\n", cpu);
+		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE)*4);
+		*hijack_vector = hijack_source.val;
+		/* VIC errata, may also receive interrupt at this address */
+		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + VIC_DEFAULT_CPI_BASE)*4);
+		*hijack_vector = hijack_source.val;
+	}
+	/* All non-boot CPUs start with interrupts fully masked.  Need
+	 * to lower the mask of the CPI we're about to send.  We do
+	 * this in the VIC by masquerading as the processor we're
+	 * about to boot and lowering its interrupt mask */
+	local_irq_save(flags);
+	if(quad_boot) {
+		send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI);
+	} else {
+		outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
+		/* here we're altering registers belonging to `cpu' */
+		
+		outb(VIC_BOOT_INTERRUPT_MASK, 0x21);
+		/* now go back to our original identity */
+		outb(boot_cpu_id, VIC_PROCESSOR_ID);
+
+		/* and boot the CPU */
+
+		send_CPI((1<<cpu), VIC_CPU_BOOT_CPI);
+	}
+	cpu_booted_map = 0;
+	local_irq_restore(flags);
+
+	/* now wait for it to become ready (or timeout) */
+	for(timeout = 0; timeout < 50000; timeout++) {
+		if(cpu_booted_map)
+			break;
+		udelay(100);
+	}
+	/* reset the page table */
+	swapper_pg_dir[0] = orig_swapper_pg_dir0;
+	local_flush_tlb();
+#ifdef CONFIG_M486
+	free_page((unsigned long)page_table_copies);
+#endif
+	  
+	if (cpu_booted_map) {
+		VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n",
+			cpu, smp_processor_id()));
+	
+		printk("CPU%d: ", cpu);
+		print_cpu_info(&cpu_data[cpu]);
+		wmb();
+		cpu_set(cpu, cpu_callout_map);
+	}
+	else {
+		printk("CPU%d FAILED TO BOOT: ", cpu);
+		if (*((volatile unsigned char *)phys_to_virt(start_phys_address))==0xA5)
+			printk("Stuck.\n");
+		else
+			printk("Not responding.\n");
+		
+		cpucount--;
+	}
+}
+
+void __init
+smp_boot_cpus(void)
+{
+	int i;
+
+	/* CAT BUS initialisation must be done after the memory */
+	/* FIXME: The L4 has a catbus too, it just needs to be
+	 * accessed in a totally different way */
+	if(voyager_level == 5) {
+		voyager_cat_init();
+
+		/* now that the cat has probed the Voyager System Bus, sanity
+		 * check the cpu map */
+		if( ((voyager_quad_processors | voyager_extended_vic_processors)
+		     & cpus_addr(phys_cpu_present_map)[0]) != cpus_addr(phys_cpu_present_map)[0]) {
+			/* should panic */
+			printk("\n\n***WARNING*** Sanity check of CPU present map FAILED\n");
+		}
+	} else if(voyager_level == 4)
+		voyager_extended_vic_processors = cpus_addr(phys_cpu_present_map)[0];
+
+	/* this sets up the idle task to run on the current cpu */
+	voyager_extended_cpus = 1;
+	/* Remove the global_irq_holder setting, it triggers a BUG() on
+	 * schedule at the moment */
+	//global_irq_holder = boot_cpu_id;
+
+	/* FIXME: Need to do something about this but currently only works
+	 * on CPUs with a tsc which none of mine have. 
+	smp_tune_scheduling();
+	 */
+	smp_store_cpu_info(boot_cpu_id);
+	printk("CPU%d: ", boot_cpu_id);
+	print_cpu_info(&cpu_data[boot_cpu_id]);
+
+	if(is_cpu_quad()) {
+		/* booting on a Quad CPU */
+		printk("VOYAGER SMP: Boot CPU is Quad\n");
+		qic_setup();
+		do_quad_bootstrap();
+	}
+
+	/* enable our own CPIs */
+	vic_enable_cpi();
+
+	cpu_set(boot_cpu_id, cpu_online_map);
+	cpu_set(boot_cpu_id, cpu_callout_map);
+	
+	/* loop over all the extended VIC CPUs and boot them.  The 
+	 * Quad CPUs must be bootstrapped by their extended VIC cpu */
+	for(i = 0; i < NR_CPUS; i++) {
+		if(i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
+			continue;
+		do_boot_cpu(i);
+		/* This udelay seems to be needed for the Quad boots
+		 * don't remove unless you know what you're doing */
+		udelay(1000);
+	}
+	/* we could compute the total bogomips here, but why bother?,
+	 * Code added from smpboot.c */
+	{
+		unsigned long bogosum = 0;
+		for (i = 0; i < NR_CPUS; i++)
+			if (cpu_isset(i, cpu_online_map))
+				bogosum += cpu_data[i].loops_per_jiffy;
+		printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+			cpucount+1,
+			bogosum/(500000/HZ),
+			(bogosum/(5000/HZ))%100);
+	}
+	voyager_extended_cpus = hweight32(voyager_extended_vic_processors);
+	printk("VOYAGER: Extended (interrupt handling CPUs): %d, non-extended: %d\n", voyager_extended_cpus, num_booting_cpus() - voyager_extended_cpus);
+	/* that's it, switch to symmetric mode */
+	outb(0, VIC_PRIORITY_REGISTER);
+	outb(0, VIC_CLAIM_REGISTER_0);
+	outb(0, VIC_CLAIM_REGISTER_1);
+	
+	VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus()));
+}
+
+/* Reload the secondary CPUs task structure (this function does not
+ * return ) */
+void __init 
+initialize_secondary(void)
+{
+#if 0
+	// AC kernels only
+	set_current(hard_get_current());
+#endif
+
+	/*
+	 * We don't actually need to load the full TSS,
+	 * basically just the stack pointer and the eip.
+	 */
+
+	asm volatile(
+		"movl %0,%%esp\n\t"
+		"jmp *%1"
+		:
+		:"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+/* handle a Voyager SYS_INT -- If we don't, the base board will
+ * panic the system.
+ *
+ * System interrupts occur because some problem was detected on the
+ * various busses.  To find out what you have to probe all the
+ * hardware via the CAT bus.  FIXME: At the moment we do nothing. */
+fastcall void
+smp_vic_sys_interrupt(struct pt_regs *regs)
+{
+	ack_CPI(VIC_SYS_INT);
+	printk("Voyager SYSTEM INTERRUPT\n");
+}
+
+/* Handle a voyager CMN_INT; These interrupts occur either because of
+ * a system status change or because a single bit memory error
+ * occurred.  FIXME: At the moment, ignore all this. */
+fastcall void
+smp_vic_cmn_interrupt(struct pt_regs *regs)
+{
+	static __u8 in_cmn_int = 0;
+	static DEFINE_SPINLOCK(cmn_int_lock);
+
+	/* common ints are broadcast, so make sure we only do this once */
+	_raw_spin_lock(&cmn_int_lock);
+	if(in_cmn_int)
+		goto unlock_end;
+
+	in_cmn_int++;
+	_raw_spin_unlock(&cmn_int_lock);
+
+	VDEBUG(("Voyager COMMON INTERRUPT\n"));
+
+	if(voyager_level == 5)
+		voyager_cat_do_common_interrupt();
+
+	_raw_spin_lock(&cmn_int_lock);
+	in_cmn_int = 0;
+ unlock_end:
+	_raw_spin_unlock(&cmn_int_lock);
+	ack_CPI(VIC_CMN_INT);
+}
+
+/*
+ * Reschedule call back. Nothing to do, all the work is done
+ * automatically when we return from the interrupt.  */
+static void
+smp_reschedule_interrupt(void)
+{
+	/* do nothing */
+}
+
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static DEFINE_SPINLOCK(tlbstate_lock);
+#define FLUSH_ALL	0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context, 
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+static inline void
+leave_mm (unsigned long cpu)
+{
+	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+		BUG();
+	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+	load_cr3(swapper_pg_dir);
+}
+
+
+/*
+ * Invalidate call-back
+ */
+static void 
+smp_invalidate_interrupt(void)
+{
+	__u8 cpu = smp_processor_id();
+
+	if (!test_bit(cpu, &smp_invalidate_needed))
+		return;
+	/* This will flood messages.  Don't uncomment unless you see
+	 * Problems with cross cpu invalidation
+	VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
+		smp_processor_id()));
+	*/
+
+	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+			if (flush_va == FLUSH_ALL)
+				local_flush_tlb();
+			else
+				__flush_tlb_one(flush_va);
+		} else
+			leave_mm(cpu);
+	}
+	smp_mb__before_clear_bit();
+	clear_bit(cpu, &smp_invalidate_needed);
+	smp_mb__after_clear_bit();
+}
+
+/* All the new flush operations for 2.4 */
+
+
+/* This routine is called with a physical cpu mask */
+static void
+flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
+						unsigned long va)
+{
+	int stuck = 50000;
+
+	if (!cpumask)
+		BUG();
+	if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask)
+		BUG();
+	if (cpumask & (1 << smp_processor_id()))
+		BUG();
+	if (!mm)
+		BUG();
+
+	spin_lock(&tlbstate_lock);
+	
+	flush_mm = mm;
+	flush_va = va;
+	atomic_set_mask(cpumask, &smp_invalidate_needed);
+	/*
+	 * We have to send the CPI only to
+	 * CPUs affected.
+	 */
+	send_CPI(cpumask, VIC_INVALIDATE_CPI);
+
+	while (smp_invalidate_needed) {
+		mb();
+		if(--stuck == 0) {
+			printk("***WARNING*** Stuck doing invalidate CPI (CPU%d)\n", smp_processor_id());
+			break;
+		}
+	}
+
+	/* Uncomment only to debug invalidation problems
+	VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
+	*/
+
+	flush_mm = NULL;
+	flush_va = 0;
+	spin_unlock(&tlbstate_lock);
+}
+
+void
+flush_tlb_current_task(void)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long cpu_mask;
+
+	preempt_disable();
+
+	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
+	local_flush_tlb();
+	if (cpu_mask)
+		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+
+	preempt_enable();
+}
+
+
+void
+flush_tlb_mm (struct mm_struct * mm)
+{
+	unsigned long cpu_mask;
+
+	preempt_disable();
+
+	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
+
+	if (current->active_mm == mm) {
+		if (current->mm)
+			local_flush_tlb();
+		else
+			leave_mm(smp_processor_id());
+	}
+	if (cpu_mask)
+		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+
+	preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long cpu_mask;
+
+	preempt_disable();
+
+	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
+	if (current->active_mm == mm) {
+		if(current->mm)
+			__flush_tlb_one(va);
+		 else
+		 	leave_mm(smp_processor_id());
+	}
+
+	if (cpu_mask)
+		flush_tlb_others(cpu_mask, mm, va);
+
+	preempt_enable();
+}
+
+/* enable the requested IRQs */
+static void
+smp_enable_irq_interrupt(void)
+{
+	__u8 irq;
+	__u8 cpu = get_cpu();
+
+	VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu,
+	       vic_irq_enable_mask[cpu]));
+
+	spin_lock(&vic_irq_lock);
+	for(irq = 0; irq < 16; irq++) {
+		if(vic_irq_enable_mask[cpu] & (1<<irq))
+			enable_local_vic_irq(irq);
+	}
+	vic_irq_enable_mask[cpu] = 0;
+	spin_unlock(&vic_irq_lock);
+
+	put_cpu_no_resched();
+}
+	
+/*
+ *	CPU halt call-back
+ */
+static void
+smp_stop_cpu_function(void *dummy)
+{
+	VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id()));
+	cpu_clear(smp_processor_id(), cpu_online_map);
+	local_irq_disable();
+	for(;;)
+	       __asm__("hlt");
+}
+
+static DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+	void (*func) (void *info);
+	void *info;
+	volatile unsigned long started;
+	volatile unsigned long finished;
+	int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/* execute a thread on a new CPU.  The function to be called must be
+ * previously set up.  This is used to schedule a function for
+ * execution on all CPU's - set up the function then broadcast a
+ * function_interrupt CPI to come here on each CPU */
+static void
+smp_call_function_interrupt(void)
+{
+	void (*func) (void *info) = call_data->func;
+	void *info = call_data->info;
+	/* must take copy of wait because call_data may be replaced
+	 * unless the function is waiting for us to finish */
+	int wait = call_data->wait;
+	__u8 cpu = smp_processor_id();
+
+	/*
+	 * Notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	if(!test_and_clear_bit(cpu, &call_data->started)) {
+		/* If the bit wasn't set, this could be a replay */
+		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion with no call pending\n", cpu);
+		return;
+	}
+	/*
+	 * At this point the info structure may be out of scope unless wait==1
+	 */
+	irq_enter();
+	(*func)(info);
+	irq_exit();
+	if (wait) {
+		mb();
+		clear_bit(cpu, &call_data->finished);
+	}
+}
+
+/* Call this function on all CPUs using the function_interrupt above 
+    <func> The function to run. This must be fast and non-blocking.
+    <info> An arbitrary pointer to pass to the function.
+    <retry> If true, keep retrying until ready.
+    <wait> If true, wait until function has completed on other CPUs.
+    [RETURNS] 0 on success, else a negative status code. Does not return until
+    remote CPUs are nearly ready to execute <<func>> or are or have executed.
+*/
+int
+smp_call_function (void (*func) (void *info), void *info, int retry,
+		   int wait)
+{
+	struct call_data_struct data;
+	__u32 mask = cpus_addr(cpu_online_map)[0];
+
+	mask &= ~(1<<smp_processor_id());
+
+	if (!mask)
+		return 0;
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	data.func = func;
+	data.info = info;
+	data.started = mask;
+	data.wait = wait;
+	if (wait)
+		data.finished = mask;
+
+	spin_lock(&call_lock);
+	call_data = &data;
+	wmb();
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_CPI_allbutself(VIC_CALL_FUNCTION_CPI);
+
+	/* Wait for response */
+	while (data.started)
+		barrier();
+
+	if (wait)
+		while (data.finished)
+			barrier();
+
+	spin_unlock(&call_lock);
+
+	return 0;
+}
+
+/* Sorry about the name.  In an APIC based system, the APICs
+ * themselves are programmed to send a timer interrupt.  This is used
+ * by linux to reschedule the processor.  Voyager doesn't have this,
+ * so we use the system clock to interrupt one processor, which in
+ * turn, broadcasts a timer CPI to all the others --- we receive that
+ * CPI here.  We don't use this actually for counting so losing
+ * ticks doesn't matter 
+ *
+ * FIXME: For those CPU's which actually have a local APIC, we could
+ * try to use it to trigger this interrupt instead of having to
+ * broadcast the timer tick.  Unfortunately, all my pentium DYADs have
+ * no local APIC, so I can't do this
+ *
+ * This function is currently a placeholder and is unused in the code */
+fastcall void 
+smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+	wrapper_smp_local_timer_interrupt(regs);
+}
+
+/* All of the QUAD interrupt GATES */
+fastcall void
+smp_qic_timer_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_TIMER_CPI);
+	wrapper_smp_local_timer_interrupt(regs);
+}
+
+fastcall void
+smp_qic_invalidate_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_INVALIDATE_CPI);
+	smp_invalidate_interrupt();
+}
+
+fastcall void
+smp_qic_reschedule_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_RESCHEDULE_CPI);
+	smp_reschedule_interrupt();
+}
+
+fastcall void
+smp_qic_enable_irq_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
+	smp_enable_irq_interrupt();
+}
+
+fastcall void
+smp_qic_call_function_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
+	smp_call_function_interrupt();
+}
+
+fastcall void
+smp_vic_cpi_interrupt(struct pt_regs *regs)
+{
+	__u8 cpu = smp_processor_id();
+
+	if(is_cpu_quad())
+		ack_QIC_CPI(VIC_CPI_LEVEL0);
+	else
+		ack_VIC_CPI(VIC_CPI_LEVEL0);
+
+	if(test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
+		wrapper_smp_local_timer_interrupt(regs);
+	if(test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
+		smp_invalidate_interrupt();
+	if(test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
+		smp_reschedule_interrupt();
+	if(test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
+		smp_enable_irq_interrupt();
+	if(test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
+		smp_call_function_interrupt();
+}
+
+static void
+do_flush_tlb_all(void* info)
+{
+	unsigned long cpu = smp_processor_id();
+
+	__flush_tlb_all();
+	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+		leave_mm(cpu);
+}
+
+
+/* flush the TLB of every active CPU in the system */
+void
+flush_tlb_all(void)
+{
+	on_each_cpu(do_flush_tlb_all, 0, 1, 1);
+}
+
+/* used to set up the trampoline for other CPUs when the memory manager
+ * is sorted out */
+void __init
+smp_alloc_memory(void)
+{
+	trampoline_base = (__u32)alloc_bootmem_low_pages(PAGE_SIZE);
+	if(__pa(trampoline_base) >= 0x93000)
+		BUG();
+}
+
+/* send a reschedule CPI to one CPU by physical CPU number*/
+void
+smp_send_reschedule(int cpu)
+{
+	send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
+}
+
+
+int
+hard_smp_processor_id(void)
+{
+	__u8 i;
+	__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
+	if((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
+		return cpumask & 0x1F;
+
+	for(i = 0; i < 8; i++) {
+		if(cpumask & (1<<i))
+			return i;
+	}
+	printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask);
+	return 0;
+}
+
+/* broadcast a halt to all other CPUs */
+void
+smp_send_stop(void)
+{
+	smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
+}
+
+/* this function is triggered in time.c when a clock tick fires
+ * we need to re-broadcast the tick to all CPUs */
+void
+smp_vic_timer_interrupt(struct pt_regs *regs)
+{
+	send_CPI_allbutself(VIC_TIMER_CPI);
+	smp_local_timer_interrupt(regs);
+}
+
+static inline void
+wrapper_smp_local_timer_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	smp_local_timer_interrupt(regs);
+	irq_exit();
+}
+
+/* local (per CPU) timer interrupt.  It does both profiling and
+ * process statistics/rescheduling.
+ *
+ * We do profiling in every local tick, statistics/rescheduling
+ * happen only every 'profiling multiplier' ticks. The default
+ * multiplier is 1 and it can be changed by writing the new multiplier
+ * value into /proc/profile.
+ */
+void
+smp_local_timer_interrupt(struct pt_regs * regs)
+{
+	int cpu = smp_processor_id();
+	long weight;
+
+	profile_tick(CPU_PROFILING, regs);
+	if (--per_cpu(prof_counter, cpu) <= 0) {
+		/*
+		 * The multiplier may have changed since the last time we got
+		 * to this point as a result of the user writing to
+		 * /proc/profile. In this case we need to adjust the APIC
+		 * timer accordingly.
+		 *
+		 * Interrupts are already masked off at this point.
+		 */
+		per_cpu(prof_counter,cpu) = per_cpu(prof_multiplier, cpu);
+		if (per_cpu(prof_counter, cpu) !=
+					per_cpu(prof_old_multiplier, cpu)) {
+			/* FIXME: need to update the vic timer tick here */
+			per_cpu(prof_old_multiplier, cpu) =
+						per_cpu(prof_counter, cpu);
+		}
+
+		update_process_times(user_mode(regs));
+	}
+
+	if( ((1<<cpu) & voyager_extended_vic_processors) == 0)
+		/* only extended VIC processors participate in
+		 * interrupt distribution */
+		return;
+
+	/*
+	 * We take the 'long' return path, and there every subsystem
+	 * grabs the apropriate locks (kernel lock/ irq lock).
+	 *
+	 * we might want to decouple profiling from the 'long path',
+	 * and do the profiling totally in assembly.
+	 *
+	 * Currently this isn't too much of an issue (performance wise),
+	 * we can take more than 100K local irqs per second on a 100 MHz P5.
+	 */
+
+	if((++vic_tick[cpu] & 0x7) != 0)
+		return;
+	/* get here every 16 ticks (about every 1/6 of a second) */
+
+	/* Change our priority to give someone else a chance at getting
+         * the IRQ. The algorithm goes like this:
+	 *
+	 * In the VIC, the dynamically routed interrupt is always
+	 * handled by the lowest priority eligible (i.e. receiving
+	 * interrupts) CPU.  If >1 eligible CPUs are equal lowest, the
+	 * lowest processor number gets it.
+	 *
+	 * The priority of a CPU is controlled by a special per-CPU
+	 * VIC priority register which is 3 bits wide 0 being lowest
+	 * and 7 highest priority..
+	 *
+	 * Therefore we subtract the average number of interrupts from
+	 * the number we've fielded.  If this number is negative, we
+	 * lower the activity count and if it is positive, we raise
+	 * it.
+	 *
+	 * I'm afraid this still leads to odd looking interrupt counts:
+	 * the totals are all roughly equal, but the individual ones
+	 * look rather skewed.
+	 *
+	 * FIXME: This algorithm is total crap when mixed with SMP
+	 * affinity code since we now try to even up the interrupt
+	 * counts when an affinity binding is keeping them on a
+	 * particular CPU*/
+	weight = (vic_intr_count[cpu]*voyager_extended_cpus
+		  - vic_intr_total) >> 4;
+	weight += 4;
+	if(weight > 7)
+		weight = 7;
+	if(weight < 0)
+		weight = 0;
+	
+	outb((__u8)weight, VIC_PRIORITY_REGISTER);
+
+#ifdef VOYAGER_DEBUG
+	if((vic_tick[cpu] & 0xFFF) == 0) {
+		/* print this message roughly every 25 secs */
+		printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n",
+		       cpu, vic_tick[cpu], weight);
+	}
+#endif
+}
+
+/* setup the profiling timer */
+int 
+setup_profiling_timer(unsigned int multiplier)
+{
+	int i;
+
+	if ( (!multiplier))
+		return -EINVAL;
+
+	/* 
+	 * Set the new multiplier for each CPU. CPUs don't start using the
+	 * new values until the next timer interrupt in which they do process
+	 * accounting.
+	 */
+	for (i = 0; i < NR_CPUS; ++i)
+		per_cpu(prof_multiplier, i) = multiplier;
+
+	return 0;
+}
+
+
+/*  The CPIs are handled in the per cpu 8259s, so they must be
+ *  enabled to be received: FIX: enabling the CPIs in the early
+ *  boot sequence interferes with bug checking; enable them later
+ *  on in smp_init */
+#define VIC_SET_GATE(cpi, vector) \
+	set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector))
+#define QIC_SET_GATE(cpi, vector) \
+	set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
+
+void __init
+smp_intr_init(void)
+{
+	int i;
+
+	/* initialize the per cpu irq mask to all disabled */
+	for(i = 0; i < NR_CPUS; i++)
+		vic_irq_mask[i] = 0xFFFF;
+
+	VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
+
+	VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt);
+	VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt);
+
+	QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt);
+	QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt);
+	QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt);
+	QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt);
+	QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt);
+	
+
+	/* now put the VIC descriptor into the first 48 IRQs 
+	 *
+	 * This is for later: first 16 correspond to PC IRQs; next 16
+	 * are Primary MC IRQs and final 16 are Secondary MC IRQs */
+	for(i = 0; i < 48; i++)
+		irq_desc[i].handler = &vic_irq_type;
+}
+
+/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per
+ * processor to receive CPI */
+static void
+send_CPI(__u32 cpuset, __u8 cpi)
+{
+	int cpu;
+	__u32 quad_cpuset = (cpuset & voyager_quad_processors);
+
+	if(cpi < VIC_START_FAKE_CPI) {
+		/* fake CPI are only used for booting, so send to the 
+		 * extended quads as well---Quads must be VIC booted */
+		outb((__u8)(cpuset), VIC_CPI_Registers[cpi]);
+		return;
+	}
+	if(quad_cpuset)
+		send_QIC_CPI(quad_cpuset, cpi);
+	cpuset &= ~quad_cpuset;
+	cpuset &= 0xff;		/* only first 8 CPUs vaild for VIC CPI */
+	if(cpuset == 0)
+		return;
+	for_each_online_cpu(cpu) {
+		if(cpuset & (1<<cpu))
+			set_bit(cpi, &vic_cpi_mailbox[cpu]);
+	}
+	if(cpuset)
+		outb((__u8)cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
+}
+
+/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and
+ * set the cache line to shared by reading it.
+ *
+ * DON'T make this inline otherwise the cache line read will be
+ * optimised away
+ * */
+static int
+ack_QIC_CPI(__u8 cpi) {
+	__u8 cpu = hard_smp_processor_id();
+
+	cpi &= 7;
+
+	outb(1<<cpi, QIC_INTERRUPT_CLEAR1);
+	return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi;
+}
+
+static void
+ack_special_QIC_CPI(__u8 cpi)
+{
+	switch(cpi) {
+	case VIC_CMN_INT:
+		outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0);
+		break;
+	case VIC_SYS_INT:
+		outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0);
+		break;
+	}
+	/* also clear at the VIC, just in case (nop for non-extended proc) */
+	ack_VIC_CPI(cpi);
+}
+
+/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */
+static void
+ack_VIC_CPI(__u8 cpi)
+{
+#ifdef VOYAGER_DEBUG
+	unsigned long flags;
+	__u16 isr;
+	__u8 cpu = smp_processor_id();
+
+	local_irq_save(flags);
+	isr = vic_read_isr();
+	if((isr & (1<<(cpi &7))) == 0) {
+		printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi);
+	}
+#endif
+	/* send specific EOI; the two system interrupts have
+	 * bit 4 set for a separate vector but behave as the
+	 * corresponding 3 bit intr */
+	outb_p(0x60|(cpi & 7),0x20);
+
+#ifdef VOYAGER_DEBUG
+	if((vic_read_isr() & (1<<(cpi &7))) != 0) {
+		printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi);
+	}
+	local_irq_restore(flags);
+#endif
+}
+
+/* cribbed with thanks from irq.c */
+#define __byte(x,y) 	(((unsigned char *)&(y))[x])
+#define cached_21(cpu)	(__byte(0,vic_irq_mask[cpu]))
+#define cached_A1(cpu)	(__byte(1,vic_irq_mask[cpu]))
+
+static unsigned int
+startup_vic_irq(unsigned int irq)
+{
+	enable_vic_irq(irq);
+
+	return 0;
+}
+
+/* The enable and disable routines.  This is where we run into
+ * conflicting architectural philosophy.  Fundamentally, the voyager
+ * architecture does not expect to have to disable interrupts globally
+ * (the IRQ controllers belong to each CPU).  The processor masquerade
+ * which is used to start the system shouldn't be used in a running OS
+ * since it will cause great confusion if two separate CPUs drive to
+ * the same IRQ controller (I know, I've tried it).
+ *
+ * The solution is a variant on the NCR lazy SPL design:
+ *
+ * 1) To disable an interrupt, do nothing (other than set the
+ *    IRQ_DISABLED flag).  This dares the interrupt actually to arrive.
+ *
+ * 2) If the interrupt dares to come in, raise the local mask against
+ *    it (this will result in all the CPU masks being raised
+ *    eventually).
+ *
+ * 3) To enable the interrupt, lower the mask on the local CPU and
+ *    broadcast an Interrupt enable CPI which causes all other CPUs to
+ *    adjust their masks accordingly.  */
+
+static void
+enable_vic_irq(unsigned int irq)
+{
+	/* linux doesn't to processor-irq affinity, so enable on
+	 * all CPUs we know about */
+	int cpu = smp_processor_id(), real_cpu;
+	__u16 mask = (1<<irq);
+	__u32 processorList = 0;
+	unsigned long flags;
+
+	VDEBUG(("VOYAGER: enable_vic_irq(%d) CPU%d affinity 0x%lx\n",
+		irq, cpu, cpu_irq_affinity[cpu]));
+	spin_lock_irqsave(&vic_irq_lock, flags);
+	for_each_online_cpu(real_cpu) {
+		if(!(voyager_extended_vic_processors & (1<<real_cpu)))
+			continue;
+		if(!(cpu_irq_affinity[real_cpu] & mask)) {
+			/* irq has no affinity for this CPU, ignore */
+			continue;
+		}
+		if(real_cpu == cpu) {
+			enable_local_vic_irq(irq);
+		}
+		else if(vic_irq_mask[real_cpu] & mask) {
+			vic_irq_enable_mask[real_cpu] |= mask;
+			processorList |= (1<<real_cpu);
+		}
+	}
+	spin_unlock_irqrestore(&vic_irq_lock, flags);
+	if(processorList)
+		send_CPI(processorList, VIC_ENABLE_IRQ_CPI);
+}
+
+static void
+disable_vic_irq(unsigned int irq)
+{
+	/* lazy disable, do nothing */
+}
+
+static void
+enable_local_vic_irq(unsigned int irq)
+{
+	__u8 cpu = smp_processor_id();
+	__u16 mask = ~(1 << irq);
+	__u16 old_mask = vic_irq_mask[cpu];
+
+	vic_irq_mask[cpu] &= mask;
+	if(vic_irq_mask[cpu] == old_mask)
+		return;
+
+	VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n",
+		irq, cpu));
+
+	if (irq & 8) {
+		outb_p(cached_A1(cpu),0xA1);
+		(void)inb_p(0xA1);
+	}
+	else {
+		outb_p(cached_21(cpu),0x21);
+		(void)inb_p(0x21);
+	}
+}
+
+static void
+disable_local_vic_irq(unsigned int irq)
+{
+	__u8 cpu = smp_processor_id();
+	__u16 mask = (1 << irq);
+	__u16 old_mask = vic_irq_mask[cpu];
+
+	if(irq == 7)
+		return;
+
+	vic_irq_mask[cpu] |= mask;
+	if(old_mask == vic_irq_mask[cpu])
+		return;
+
+	VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n",
+		irq, cpu));
+
+	if (irq & 8) {
+		outb_p(cached_A1(cpu),0xA1);
+		(void)inb_p(0xA1);
+	}
+	else {
+		outb_p(cached_21(cpu),0x21);
+		(void)inb_p(0x21);
+	}
+}
+
+/* The VIC is level triggered, so the ack can only be issued after the
+ * interrupt completes.  However, we do Voyager lazy interrupt
+ * handling here: It is an extremely expensive operation to mask an
+ * interrupt in the vic, so we merely set a flag (IRQ_DISABLED).  If
+ * this interrupt actually comes in, then we mask and ack here to push
+ * the interrupt off to another CPU */
+static void
+before_handle_vic_irq(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	__u8 cpu = smp_processor_id();
+
+	_raw_spin_lock(&vic_irq_lock);
+	vic_intr_total++;
+	vic_intr_count[cpu]++;
+
+	if(!(cpu_irq_affinity[cpu] & (1<<irq))) {
+		/* The irq is not in our affinity mask, push it off
+		 * onto another CPU */
+		VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d on cpu %d\n",
+			irq, cpu));
+		disable_local_vic_irq(irq);
+		/* set IRQ_INPROGRESS to prevent the handler in irq.c from
+		 * actually calling the interrupt routine */
+		desc->status |= IRQ_REPLAY | IRQ_INPROGRESS;
+	} else if(desc->status & IRQ_DISABLED) {
+		/* Damn, the interrupt actually arrived, do the lazy
+		 * disable thing. The interrupt routine in irq.c will
+		 * not handle a IRQ_DISABLED interrupt, so nothing more
+		 * need be done here */
+		VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n",
+			irq, cpu));
+		disable_local_vic_irq(irq);
+		desc->status |= IRQ_REPLAY;
+	} else {
+		desc->status &= ~IRQ_REPLAY;
+	}
+
+	_raw_spin_unlock(&vic_irq_lock);
+}
+
+/* Finish the VIC interrupt: basically mask */
+static void
+after_handle_vic_irq(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+
+	_raw_spin_lock(&vic_irq_lock);
+	{
+		unsigned int status = desc->status & ~IRQ_INPROGRESS;
+#ifdef VOYAGER_DEBUG
+		__u16 isr;
+#endif
+
+		desc->status = status;
+		if ((status & IRQ_DISABLED))
+			disable_local_vic_irq(irq);
+#ifdef VOYAGER_DEBUG
+		/* DEBUG: before we ack, check what's in progress */
+		isr = vic_read_isr();
+		if((isr & (1<<irq) && !(status & IRQ_REPLAY)) == 0) {
+			int i;
+			__u8 cpu = smp_processor_id();
+			__u8 real_cpu;
+			int mask; /* Um... initialize me??? --RR */
+
+			printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
+			       cpu, irq);
+			for_each_cpu(real_cpu, mask) {
+
+				outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
+				     VIC_PROCESSOR_ID);
+				isr = vic_read_isr();
+				if(isr & (1<<irq)) {
+					printk("VOYAGER SMP: CPU%d ack irq %d\n",
+					       real_cpu, irq);
+					ack_vic_irq(irq);
+				}
+				outb(cpu, VIC_PROCESSOR_ID);
+			}
+		}
+#endif /* VOYAGER_DEBUG */
+		/* as soon as we ack, the interrupt is eligible for
+		 * receipt by another CPU so everything must be in
+		 * order here  */
+		ack_vic_irq(irq);
+		if(status & IRQ_REPLAY) {
+			/* replay is set if we disable the interrupt
+			 * in the before_handle_vic_irq() routine, so
+			 * clear the in progress bit here to allow the
+			 * next CPU to handle this correctly */
+			desc->status &= ~(IRQ_REPLAY | IRQ_INPROGRESS);
+		}
+#ifdef VOYAGER_DEBUG
+		isr = vic_read_isr();
+		if((isr & (1<<irq)) != 0)
+			printk("VOYAGER SMP: after_handle_vic_irq() after ack irq=%d, isr=0x%x\n",
+			       irq, isr);
+#endif /* VOYAGER_DEBUG */
+	}
+	_raw_spin_unlock(&vic_irq_lock);
+
+	/* All code after this point is out of the main path - the IRQ
+	 * may be intercepted by another CPU if reasserted */
+}
+
+
+/* Linux processor - interrupt affinity manipulations.
+ *
+ * For each processor, we maintain a 32 bit irq affinity mask.
+ * Initially it is set to all 1's so every processor accepts every
+ * interrupt.  In this call, we change the processor's affinity mask:
+ *
+ * Change from enable to disable:
+ *
+ * If the interrupt ever comes in to the processor, we will disable it
+ * and ack it to push it off to another CPU, so just accept the mask here.
+ *
+ * Change from disable to enable:
+ *
+ * change the mask and then do an interrupt enable CPI to re-enable on
+ * the selected processors */
+
+void
+set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	/* Only extended processors handle interrupts */
+	unsigned long real_mask;
+	unsigned long irq_mask = 1 << irq;
+	int cpu;
+
+	real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors;
+	
+	if(cpus_addr(mask)[0] == 0)
+		/* can't have no cpu's to accept the interrupt -- extremely
+		 * bad things will happen */
+		return;
+
+	if(irq == 0)
+		/* can't change the affinity of the timer IRQ.  This
+		 * is due to the constraint in the voyager
+		 * architecture that the CPI also comes in on and IRQ
+		 * line and we have chosen IRQ0 for this.  If you
+		 * raise the mask on this interrupt, the processor
+		 * will no-longer be able to accept VIC CPIs */
+		return;
+
+	if(irq >= 32) 
+		/* You can only have 32 interrupts in a voyager system
+		 * (and 32 only if you have a secondary microchannel
+		 * bus) */
+		return;
+
+	for_each_online_cpu(cpu) {
+		unsigned long cpu_mask = 1 << cpu;
+		
+		if(cpu_mask & real_mask) {
+			/* enable the interrupt for this cpu */
+			cpu_irq_affinity[cpu] |= irq_mask;
+		} else {
+			/* disable the interrupt for this cpu */
+			cpu_irq_affinity[cpu] &= ~irq_mask;
+		}
+	}
+	/* this is magic, we now have the correct affinity maps, so
+	 * enable the interrupt.  This will send an enable CPI to
+	 * those cpu's who need to enable it in their local masks,
+	 * causing them to correct for the new affinity . If the
+	 * interrupt is currently globally disabled, it will simply be
+	 * disabled again as it comes in (voyager lazy disable).  If
+	 * the affinity map is tightened to disable the interrupt on a
+	 * cpu, it will be pushed off when it comes in */
+	enable_vic_irq(irq);
+}
+
+static void
+ack_vic_irq(unsigned int irq)
+{
+	if (irq & 8) {
+		outb(0x62,0x20);	/* Specific EOI to cascade */
+		outb(0x60|(irq & 7),0xA0);
+	} else {
+		outb(0x60 | (irq & 7),0x20);
+	}
+}
+
+/* enable the CPIs.  In the VIC, the CPIs are delivered by the 8259
+ * but are not vectored by it.  This means that the 8259 mask must be
+ * lowered to receive them */
+static __init void
+vic_enable_cpi(void)
+{
+	__u8 cpu = smp_processor_id();
+	
+	/* just take a copy of the current mask (nop for boot cpu) */
+	vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id];
+
+	enable_local_vic_irq(VIC_CPI_LEVEL0);
+	enable_local_vic_irq(VIC_CPI_LEVEL1);
+	/* for sys int and cmn int */
+	enable_local_vic_irq(7);
+
+	if(is_cpu_quad()) {
+		outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
+		outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
+		VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n",
+			cpu, QIC_CPI_ENABLE));
+	}
+
+	VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n",
+		cpu, vic_irq_mask[cpu]));
+}
+
+void
+voyager_smp_dump()
+{
+	int old_cpu = smp_processor_id(), cpu;
+
+	/* dump the interrupt masks of each processor */
+	for_each_online_cpu(cpu) {
+		__u16 imr, isr, irr;
+		unsigned long flags;
+
+		local_irq_save(flags);
+		outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
+		imr = (inb(0xa1) << 8) | inb(0x21);
+		outb(0x0a, 0xa0);
+		irr = inb(0xa0) << 8;
+		outb(0x0a, 0x20);
+		irr |= inb(0x20);
+		outb(0x0b, 0xa0);
+		isr = inb(0xa0) << 8;
+		outb(0x0b, 0x20);
+		isr |= inb(0x20);
+		outb(old_cpu, VIC_PROCESSOR_ID);
+		local_irq_restore(flags);
+		printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n",
+		       cpu, vic_irq_mask[cpu], imr, irr, isr);
+#if 0
+		/* These lines are put in to try to unstick an un ack'd irq */
+		if(isr != 0) {
+			int irq;
+			for(irq=0; irq<16; irq++) {
+				if(isr & (1<<irq)) {
+					printk("\tCPU%d: ack irq %d\n",
+					       cpu, irq);
+					local_irq_save(flags);
+					outb(VIC_CPU_MASQUERADE_ENABLE | cpu,
+					     VIC_PROCESSOR_ID);
+					ack_vic_irq(irq);
+					outb(old_cpu, VIC_PROCESSOR_ID);
+					local_irq_restore(flags);
+				}
+			}
+		}
+#endif
+	}
+}
+
+void
+smp_voyager_power_off(void *dummy)
+{
+	if(smp_processor_id() == boot_cpu_id) 
+		voyager_power_off();
+	else
+		smp_stop_cpu_function(NULL);
+}
+
+void __init
+smp_prepare_cpus(unsigned int max_cpus)
+{
+	/* FIXME: ignore max_cpus for now */
+	smp_boot_cpus();
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+	cpu_set(smp_processor_id(), cpu_online_map);
+	cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+int __devinit
+__cpu_up(unsigned int cpu)
+{
+	/* This only works at boot for x86.  See "rewrite" above. */
+	if (cpu_isset(cpu, smp_commenced_mask))
+		return -ENOSYS;
+
+	/* In case one didn't come up */
+	if (!cpu_isset(cpu, cpu_callin_map))
+		return -EIO;
+	/* Unleash the CPU! */
+	cpu_set(cpu, smp_commenced_mask);
+	while (!cpu_isset(cpu, cpu_online_map))
+		mb();
+	return 0;
+}
+
+void __init 
+smp_cpus_done(unsigned int max_cpus)
+{
+	zap_low_mappings();
+}
diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c
new file mode 100644
index 00000000000..9980eef31fd
--- /dev/null
+++ b/arch/i386/mach-voyager/voyager_thread.c
@@ -0,0 +1,167 @@
+/* -*- mode: c; c-basic-offset: 8 -*- */
+
+/* Copyright (C) 2001
+ *
+ * Author: J.E.J.Bottomley@HansenPartnership.com
+ *
+ * linux/arch/i386/kernel/voyager_thread.c
+ *
+ * This module provides the machine status monitor thread for the
+ * voyager architecture.  This allows us to monitor the machine
+ * environment (temp, voltage, fan function) and the front panel and
+ * internal UPS.  If a fault is detected, this thread takes corrective
+ * action (usually just informing init)
+ * */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/kmod.h>
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <asm/desc.h>
+#include <asm/voyager.h>
+#include <asm/vic.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+
+#include <linux/irq.h>
+
+#define THREAD_NAME "kvoyagerd"
+
+/* external variables */
+int kvoyagerd_running = 0;
+DECLARE_MUTEX_LOCKED(kvoyagerd_sem);
+
+static int thread(void *);
+
+static __u8 set_timeout = 0;
+
+/* Start the machine monitor thread.  Return 1 if OK, 0 if fail */
+static int __init
+voyager_thread_start(void)
+{
+	if(kernel_thread(thread, NULL, CLONE_KERNEL) < 0) {
+		/* This is serious, but not fatal */
+		printk(KERN_ERR "Voyager: Failed to create system monitor thread!!!\n");
+		return 1;
+	}
+	return 0;
+}
+
+static int
+execute(const char *string)
+{
+	int ret;
+
+	char *envp[] = {
+		"HOME=/",
+		"TERM=linux",
+		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+		NULL,
+	};
+	char *argv[] = {
+		"/bin/bash",
+		"-c",
+		(char *)string,
+		NULL,
+	};
+
+	if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) {
+		printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
+		       string, ret);
+	}
+	return ret;
+}
+
+static void
+check_from_kernel(void)
+{
+	if(voyager_status.switch_off) {
+		
+		/* FIXME: This should be configureable via proc */
+		execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1");
+	} else if(voyager_status.power_fail) {
+		VDEBUG(("Voyager daemon detected AC power failure\n"));
+		
+		/* FIXME: This should be configureable via proc */
+		execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1");
+		set_timeout = 1;
+	}
+}
+
+static void
+check_continuing_condition(void)
+{
+	if(voyager_status.power_fail) {
+		__u8 data;
+		voyager_cat_psi(VOYAGER_PSI_SUBREAD, 
+				VOYAGER_PSI_AC_FAIL_REG, &data);
+		if((data & 0x1f) == 0) {
+			/* all power restored */
+			printk(KERN_NOTICE "VOYAGER AC power restored, cancelling shutdown\n");
+			/* FIXME: should be user configureable */
+			execute("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
+			set_timeout = 0;
+		}
+	}
+}
+
+static void
+wakeup(unsigned long unused)
+{
+	up(&kvoyagerd_sem);
+}
+
+static int
+thread(void *unused)
+{
+	struct timer_list wakeup_timer;
+
+	kvoyagerd_running = 1;
+
+	reparent_to_init();
+	daemonize(THREAD_NAME);
+
+	set_timeout = 0;
+
+	init_timer(&wakeup_timer);
+
+	sigfillset(&current->blocked);
+	current->signal->tty = NULL;
+
+	printk(KERN_NOTICE "Voyager starting monitor thread\n");
+
+	for(;;) {
+		down_interruptible(&kvoyagerd_sem);
+		VDEBUG(("Voyager Daemon awoken\n"));
+		if(voyager_status.request_from_kernel == 0) {
+			/* probably awoken from timeout */
+			check_continuing_condition();
+		} else {
+			check_from_kernel();
+			voyager_status.request_from_kernel = 0;
+		}
+		if(set_timeout) {
+			del_timer(&wakeup_timer);
+			wakeup_timer.expires = HZ + jiffies;
+			wakeup_timer.function = wakeup;
+			add_timer(&wakeup_timer);
+		}
+	}
+}
+
+static void __exit
+voyager_thread_stop(void)
+{
+	/* FIXME: do nothing at the moment */
+}
+
+module_init(voyager_thread_start);
+//module_exit(voyager_thread_stop);
diff --git a/arch/i386/math-emu/Makefile b/arch/i386/math-emu/Makefile
new file mode 100644
index 00000000000..9c943fa6ce6
--- /dev/null
+++ b/arch/i386/math-emu/Makefile
@@ -0,0 +1,30 @@
+#
+#               Makefile for wm-FPU-emu
+#
+
+#DEBUG	= -DDEBUGGING
+DEBUG	=
+PARANOID = -DPARANOID
+CFLAGS	:= $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
+
+EXTRA_AFLAGS	:= $(PARANOID)
+
+# From 'C' language sources:
+C_OBJS =fpu_entry.o errors.o \
+	fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \
+	load_store.o get_address.o \
+	poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \
+	reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \
+	reg_ld_str.o reg_divide.o reg_mul.o
+
+# From 80x86 assembler sources:
+A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \
+	div_small.o reg_norm.o reg_round.o \
+	wm_shrx.o wm_sqrt.o \
+	div_Xsig.o polynom_Xsig.o round_Xsig.o \
+	shr_Xsig.o mul_Xsig.o
+
+obj-y =$(C_OBJS) $(A_OBJS)
+
+proto:
+	cproto -e -DMAKING_PROTO *.c >fpu_proto.h
diff --git a/arch/i386/math-emu/README b/arch/i386/math-emu/README
new file mode 100644
index 00000000000..e6235491d6e
--- /dev/null
+++ b/arch/i386/math-emu/README
@@ -0,0 +1,427 @@
+ +---------------------------------------------------------------------------+
+ |  wm-FPU-emu   an FPU emulator for 80386 and 80486SX microprocessors.      |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1995,1996,1997,1999                          |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@melbpc.org.au              |
+ |                                                                           |
+ |    This program is free software; you can redistribute it and/or modify   |
+ |    it under the terms of the GNU General Public License version 2 as      |
+ |    published by the Free Software Foundation.                             |
+ |                                                                           |
+ |    This program is distributed in the hope that it will be useful,        |
+ |    but WITHOUT ANY WARRANTY; without even the implied warranty of         |
+ |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          |
+ |    GNU General Public License for more details.                           |
+ |                                                                           |
+ |    You should have received a copy of the GNU General Public License      |
+ |    along with this program; if not, write to the Free Software            |
+ |    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              |
+ |                                                                           |
+ +---------------------------------------------------------------------------+
+
+
+
+wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387
+which was my 80387 emulator for early versions of djgpp (gcc under
+msdos); wm-emu387 was in turn based upon emu387 which was written by
+DJ Delorie for djgpp.  The interface to the Linux kernel is based upon
+the original Linux math emulator by Linus Torvalds.
+
+My target FPU for wm-FPU-emu is that described in the Intel486
+Programmer's Reference Manual (1992 edition). Unfortunately, numerous
+facets of the functioning of the FPU are not well covered in the
+Reference Manual. The information in the manual has been supplemented
+with measurements on real 80486's. Unfortunately, it is simply not
+possible to be sure that all of the peculiarities of the 80486 have
+been discovered, so there is always likely to be obscure differences
+in the detailed behaviour of the emulator and a real 80486.
+
+wm-FPU-emu does not implement all of the behaviour of the 80486 FPU,
+but is very close.  See "Limitations" later in this file for a list of
+some differences.
+
+Please report bugs, etc to me at:
+       billm@melbpc.org.au
+or     b.metzenthen@medoto.unimelb.edu.au
+
+For more information on the emulator and on floating point topics, see
+my web pages, currently at  http://www.suburbia.net/~billm/
+
+
+--Bill Metzenthen
+  December 1999
+
+
+----------------------- Internals of wm-FPU-emu -----------------------
+
+Numeric algorithms:
+(1) Add, subtract, and multiply. Nothing remarkable in these.
+(2) Divide has been tuned to get reasonable performance. The algorithm
+    is not the obvious one which most people seem to use, but is designed
+    to take advantage of the characteristics of the 80386. I expect that
+    it has been invented many times before I discovered it, but I have not
+    seen it. It is based upon one of those ideas which one carries around
+    for years without ever bothering to check it out.
+(3) The sqrt function has been tuned to get good performance. It is based
+    upon Newton's classic method. Performance was improved by capitalizing
+    upon the properties of Newton's method, and the code is once again
+    structured taking account of the 80386 characteristics.
+(4) The trig, log, and exp functions are based in each case upon quasi-
+    "optimal" polynomial approximations. My definition of "optimal" was
+    based upon getting good accuracy with reasonable speed.
+(5) The argument reducing code for the trig function effectively uses
+    a value of pi which is accurate to more than 128 bits. As a consequence,
+    the reduced argument is accurate to more than 64 bits for arguments up
+    to a few pi, and accurate to more than 64 bits for most arguments,
+    even for arguments approaching 2^63. This is far superior to an
+    80486, which uses a value of pi which is accurate to 66 bits.
+
+The code of the emulator is complicated slightly by the need to
+account for a limited form of re-entrancy. Normally, the emulator will
+emulate each FPU instruction to completion without interruption.
+However, it may happen that when the emulator is accessing the user
+memory space, swapping may be needed. In this case the emulator may be
+temporarily suspended while disk i/o takes place. During this time
+another process may use the emulator, thereby perhaps changing static
+variables. The code which accesses user memory is confined to five
+files:
+    fpu_entry.c
+    reg_ld_str.c
+    load_store.c
+    get_address.c
+    errors.c
+As from version 1.12 of the emulator, no static variables are used
+(apart from those in the kernel's per-process tables). The emulator is
+therefore now fully re-entrant, rather than having just the restricted
+form of re-entrancy which is required by the Linux kernel.
+
+----------------------- Limitations of wm-FPU-emu -----------------------
+
+There are a number of differences between the current wm-FPU-emu
+(version 2.01) and the 80486 FPU (apart from bugs).  The differences
+are fewer than those which applied to the 1.xx series of the emulator.
+Some of the more important differences are listed below:
+
+The Roundup flag does not have much meaning for the transcendental
+functions and its 80486 value with these functions is likely to differ
+from its emulator value.
+
+In a few rare cases the Underflow flag obtained with the emulator will
+be different from that obtained with an 80486. This occurs when the
+following conditions apply simultaneously:
+(a) the operands have a higher precision than the current setting of the
+    precision control (PC) flags.
+(b) the underflow exception is masked.
+(c) the magnitude of the exact result (before rounding) is less than 2^-16382.
+(d) the magnitude of the final result (after rounding) is exactly 2^-16382.
+(e) the magnitude of the exact result would be exactly 2^-16382 if the
+    operands were rounded to the current precision before the arithmetic
+    operation was performed.
+If all of these apply, the emulator will set the Underflow flag but a real
+80486 will not.
+
+NOTE: Certain formats of Extended Real are UNSUPPORTED. They are
+unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities,
+and Unnormals. None of these will be generated by an 80486 or by the
+emulator. Do not use them. The emulator treats them differently in
+detail from the way an 80486 does.
+
+Self modifying code can cause the emulator to fail. An example of such
+code is:
+          movl %esp,[%ebx]
+	  fld1
+The FPU instruction may be (usually will be) loaded into the pre-fetch
+queue of the CPU before the mov instruction is executed. If the
+destination of the 'movl' overlaps the FPU instruction then the bytes
+in the prefetch queue and memory will be inconsistent when the FPU
+instruction is executed. The emulator will be invoked but will not be
+able to find the instruction which caused the device-not-present
+exception. For this case, the emulator cannot emulate the behaviour of
+an 80486DX.
+
+Handling of the address size override prefix byte (0x67) has not been
+extensively tested yet. A major problem exists because using it in
+vm86 mode can cause a general protection fault. Address offsets
+greater than 0xffff appear to be illegal in vm86 mode but are quite
+acceptable (and work) in real mode. A small test program developed to
+check the addressing, and which runs successfully in real mode,
+crashes dosemu under Linux and also brings Windows down with a general
+protection fault message when run under the MS-DOS prompt of Windows
+3.1. (The program simply reads data from a valid address).
+
+The emulator supports 16-bit protected mode, with one difference from
+an 80486DX.  A 80486DX will allow some floating point instructions to
+write a few bytes below the lowest address of the stack.  The emulator
+will not allow this in 16-bit protected mode: no instructions are
+allowed to write outside the bounds set by the protection.
+
+----------------------- Performance of wm-FPU-emu -----------------------
+
+Speed.
+-----
+
+The speed of floating point computation with the emulator will depend
+upon instruction mix. Relative performance is best for the instructions
+which require most computation. The simple instructions are adversely
+affected by the FPU instruction trap overhead.
+
+
+Timing: Some simple timing tests have been made on the emulator functions.
+The times include load/store instructions. All times are in microseconds
+measured on a 33MHz 386 with 64k cache. The Turbo C tests were under
+ms-dos, the next two columns are for emulators running with the djgpp
+ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97,
+using libm4.0 (hard).
+
+function      Turbo C        djgpp 1.06        WM-emu387     wm-FPU-emu
+
+   +          60.5           154.8              76.5          139.4
+   -          61.1-65.5      157.3-160.8        76.2-79.5     142.9-144.7
+   *          71.0           190.8              79.6          146.6
+   /          61.2-75.0      261.4-266.9        75.3-91.6     142.2-158.1
+
+ sin()        310.8          4692.0            319.0          398.5
+ cos()        284.4          4855.2            308.0          388.7
+ tan()        495.0          8807.1            394.9          504.7
+ atan()       328.9          4866.4            601.1          419.5-491.9
+
+ sqrt()       128.7          crashed           145.2          227.0
+ log()        413.1-419.1    5103.4-5354.21    254.7-282.2    409.4-437.1
+ exp()        479.1          6619.2            469.1          850.8
+
+
+The performance under Linux is improved by the use of look-ahead code.
+The following results show the improvement which is obtained under
+Linux due to the look-ahead code. Also given are the times for the
+original Linux emulator with the 4.1 'soft' lib.
+
+ [ Linus' note: I changed look-ahead to be the default under linux, as
+   there was no reason not to use it after I had edited it to be
+   disabled during tracing ]
+
+            wm-FPU-emu w     original w
+            look-ahead       'soft' lib
+   +         106.4             190.2
+   -         108.6-111.6      192.4-216.2
+   *         113.4             193.1
+   /         108.8-124.4      700.1-706.2
+
+ sin()       390.5            2642.0
+ cos()       381.5            2767.4
+ tan()       496.5            3153.3
+ atan()      367.2-435.5     2439.4-3396.8
+
+ sqrt()      195.1            4732.5
+ log()       358.0-387.5     3359.2-3390.3
+ exp()       619.3            4046.4
+
+
+These figures are now somewhat out-of-date. The emulator has become
+progressively slower for most functions as more of the 80486 features
+have been implemented.
+
+
+----------------------- Accuracy of wm-FPU-emu -----------------------
+
+
+The accuracy of the emulator is in almost all cases equal to or better
+than that of an Intel 80486 FPU.
+
+The results of the basic arithmetic functions (+,-,*,/), and fsqrt
+match those of an 80486 FPU. They are the best possible; the error for
+these never exceeds 1/2 an lsb. The fprem and fprem1 instructions
+return exact results; they have no error.
+
+
+The following table compares the emulator accuracy for the sqrt(),
+trig and log functions against the Turbo C "emulator". For this table,
+each function was tested at about 400 points. Ideal worst-case results
+would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for
+arguments greater than pi/4 can be thought of as being related to the
+precision of the argument x; e.g. an argument of pi/2-(1e-10) which is
+accurate to 64 bits can result in a relative accuracy in cos() of
+about 64 + log2(cos(x)) = 31 bits.
+
+
+Function      Tested x range            Worst result                Turbo C
+                                        (relative bits)
+
+sqrt(x)       1 .. 2                    64.1                         63.2
+atan(x)       1e-10 .. 200              64.2                         62.8
+cos(x)        0 .. pi/2-(1e-10)         64.4 (x <= pi/4)             62.4
+                                        64.1 (x = pi/2-(1e-10))      31.9
+sin(x)        1e-10 .. pi/2             64.0                         62.8
+tan(x)        1e-10 .. pi/2-(1e-10)     64.0 (x <= pi/4)             62.1
+                                        64.1 (x = pi/2-(1e-10))      31.9
+exp(x)        0 .. 1                    63.1 **                      62.9
+log(x)        1+1e-6 .. 2               63.8 **                      62.1
+
+** The accuracy for exp() and log() is low because the FPU (emulator)
+does not compute them directly; two operations are required.
+
+
+The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or
+later) for 'float' variables (24 bit precision numbers) when precision
+control is set to 24, 53 or 64 bits, and for 'double' variables (53
+bit precision numbers) when precision control is set to 53 bits (a
+properly performing FPU cannot pass the 'paranoia' tests for 'double'
+variables when precision control is set to 64 bits).
+
+The code for reducing the argument for the trig functions (fsin, fcos,
+fptan and fsincos) has been improved and now effectively uses a value
+for pi which is accurate to more than 128 bits precision. As a
+consequence, the accuracy of these functions for large arguments has
+been dramatically improved (and is now very much better than an 80486
+FPU). There is also now no degradation of accuracy for fcos and fptan
+for operands close to pi/2. Measured results are (note that the
+definition of accuracy has changed slightly from that used for the
+above table):
+
+Function      Tested x range          Worst result
+                                     (absolute bits)
+
+cos(x)        0 .. 9.22e+18              62.0
+sin(x)        1e-16 .. 9.22e+18          62.1
+tan(x)        1e-16 .. 9.22e+18          61.8
+
+It is possible with some effort to find very large arguments which
+give much degraded precision. For example, the integer number
+           8227740058411162616.0
+is within about 10e-7 of a multiple of pi. To find the tan (for
+example) of this number to 64 bits precision it would be necessary to
+have a value of pi which had about 150 bits precision. The FPU
+emulator computes the result to about 42.6 bits precision (the correct
+result is about -9.739715e-8). On the other hand, an 80486 FPU returns
+0.01059, which in relative terms is hopelessly inaccurate.
+
+For arguments close to critical angles (which occur at multiples of
+pi/2) the emulator is more accurate than an 80486 FPU. For very large
+arguments, the emulator is far more accurate.
+
+
+Prior to version 1.20 of the emulator, the accuracy of the results for
+the transcendental functions (in their principal range) was not as
+good as the results from an 80486 FPU. From version 1.20, the accuracy
+has been considerably improved and these functions now give measured
+worst-case results which are better than the worst-case results given
+by an 80486 FPU.
+
+The following table gives the measured results for the emulator. The
+number of randomly selected arguments in each case is about half a
+million.  The group of three columns gives the frequency of the given
+accuracy in number of times per million, thus the second of these
+columns shows that an accuracy of between 63.80 and 63.89 bits was
+found at a rate of 133 times per one million measurements for fsin.
+The results show that the fsin, fcos and fptan instructions return
+results which are in error (i.e. less accurate than the best possible
+result (which is 64 bits)) for about one per cent of all arguments
+between -pi/2 and +pi/2.  The other instructions have a lower
+frequency of results which are in error.  The last two columns give
+the worst accuracy which was found (in bits) and the approximate value
+of the argument which produced it.
+
+                                frequency (per M)
+                               -------------------   ---------------
+instr   arg range    # tests   63.7   63.8    63.9   worst   at arg
+                               bits   bits    bits    bits
+-----  ------------  -------   ----   ----   -----   -----  --------
+fsin     (0,pi/2)     547756      0    133   10673   63.89  0.451317
+fcos     (0,pi/2)     547563      0    126   10532   63.85  0.700801
+fptan    (0,pi/2)     536274     11    267   10059   63.74  0.784876
+fpatan  4 quadrants   517087      0      8    1855   63.88  0.435121 (4q)
+fyl2x     (0,20)      541861      0      0    1323   63.94  1.40923  (x)
+fyl2xp1 (-.293,.414)  520256      0      0    5678   63.93  0.408542 (x)
+f2xm1     (-1,1)      538847      4    481    6488   63.79  0.167709
+
+
+Tests performed on an 80486 FPU showed results of lower accuracy. The
+following table gives the results which were obtained with an AMD
+486DX2/66 (other tests indicate that an Intel 486DX produces
+identical results).  The tests were basically the same as those used
+to measure the emulator (the values, being random, were in general not
+the same).  The total number of tests for each instruction are given
+at the end of the table, in case each about 100k tests were performed.
+Another line of figures at the end of the table shows that most of the
+instructions return results which are in error for more than 10
+percent of the arguments tested.
+
+The numbers in the body of the table give the approx number of times a
+result of the given accuracy in bits (given in the left-most column)
+was obtained per one million arguments. For three of the instructions,
+two columns of results are given: * The second column for f2xm1 gives
+the number cases where the results of the first column were for a
+positive argument, this shows that this instruction gives better
+results for positive arguments than it does for negative.  * In the
+cases of fcos and fptan, the first column gives the results when all
+cases where arguments greater than 1.5 were removed from the results
+given in the second column. Unlike the emulator, an 80486 FPU returns
+results of relatively poor accuracy for these instructions when the
+argument approaches pi/2. The table does not show those cases when the
+accuracy of the results were less than 62 bits, which occurs quite
+often for fsin and fptan when the argument approaches pi/2. This poor
+accuracy is discussed above in relation to the Turbo C "emulator", and
+the accuracy of the value of pi.
+
+
+bits   f2xm1  f2xm1 fpatan   fcos   fcos  fyl2x fyl2xp1  fsin  fptan  fptan
+62.0       0      0      0      0    437      0      0      0      0    925
+62.1       0      0     10      0    894      0      0      0      0   1023
+62.2      14      0      0      0   1033      0      0      0      0    945
+62.3      57      0      0      0   1202      0      0      0      0   1023
+62.4     385      0      0     10   1292      0     23      0      0   1178
+62.5    1140      0      0    119   1649      0     39      0      0   1149
+62.6    2037      0      0    189   1620      0     16      0      0   1169
+62.7    5086     14      0    646   2315     10    101     35     39   1402
+62.8    8818     86      0    984   3050     59    287    131    224   2036
+62.9   11340   1355      0   2126   4153     79    605    357    321   1948
+63.0   15557   4750      0   3319   5376    246   1281    862    808   2688
+63.1   20016   8288      0   4620   6628    511   2569   1723   1510   3302
+63.2   24945  11127     10   6588   8098   1120   4470   2968   2990   4724
+63.3   25686  12382     69   8774  10682   1906   6775   4482   5474   7236
+63.4   29219  14722     79  11109  12311   3094   9414   7259   8912  10587
+63.5   30458  14936    393  13802  15014   5874  12666   9609  13762  15262
+63.6   32439  16448   1277  17945  19028  10226  15537  14657  19158  20346
+63.7   35031  16805   4067  23003  23947  18910  20116  21333  25001  26209
+63.8   33251  15820   7673  24781  25675  24617  25354  24440  29433  30329
+63.9   33293  16833  18529  28318  29233  31267  31470  27748  29676  30601
+
+Per cent with error:
+        30.9           3.2          18.5    9.8   13.1   11.6          17.4
+Total arguments tested:
+       70194  70099 101784 100641 100641 101799 128853 114893 102675 102675
+
+
+------------------------- Contributors -------------------------------
+
+A number of people have contributed to the development of the
+emulator, often by just reporting bugs, sometimes with suggested
+fixes, and a few kind people have provided me with access in one way
+or another to an 80486 machine. Contributors include (to those people
+who I may have forgotten, please forgive me):
+
+Linus Torvalds
+Tommy.Thorn@daimi.aau.dk
+Andrew.Tridgell@anu.edu.au
+Nick Holloway, alfie@dcs.warwick.ac.uk
+Hermano Moura, moura@dcs.gla.ac.uk
+Jon Jagger, J.Jagger@scp.ac.uk
+Lennart Benschop
+Brian Gallew, geek+@CMU.EDU
+Thomas Staniszewski, ts3v+@andrew.cmu.edu
+Martin Howell, mph@plasma.apana.org.au
+M Saggaf, alsaggaf@athena.mit.edu
+Peter Barker, PETER@socpsy.sci.fau.edu
+tom@vlsivie.tuwien.ac.at
+Dan Russel, russed@rpi.edu
+Daniel Carosone, danielce@ee.mu.oz.au
+cae@jpmorgan.com
+Hamish Coleman, t933093@minyos.xx.rmit.oz.au
+Bruce Evans, bde@kralizec.zeta.org.au
+Timo Korvola, Timo.Korvola@hut.fi
+Rick Lyons, rick@razorback.brisnet.org.au
+Rick, jrs@world.std.com
+ 
+...and numerous others who responded to my request for help with
+a real 80486.
+
diff --git a/arch/i386/math-emu/control_w.h b/arch/i386/math-emu/control_w.h
new file mode 100644
index 00000000000..ae2274dbd30
--- /dev/null
+++ b/arch/i386/math-emu/control_w.h
@@ -0,0 +1,45 @@
+/*---------------------------------------------------------------------------+
+ |  control_w.h                                                              |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _CONTROLW_H_
+#define _CONTROLW_H_
+
+#ifdef __ASSEMBLY__
+#define	_Const_(x)	$##x
+#else
+#define	_Const_(x)	x
+#endif
+
+#define CW_RC		_Const_(0x0C00)	/* rounding control */
+#define CW_PC		_Const_(0x0300)	/* precision control */
+
+#define CW_Precision	Const_(0x0020)	/* loss of precision mask */
+#define CW_Underflow	Const_(0x0010)	/* underflow mask */
+#define CW_Overflow	Const_(0x0008)	/* overflow mask */
+#define CW_ZeroDiv	Const_(0x0004)	/* divide by zero mask */
+#define CW_Denormal	Const_(0x0002)	/* denormalized operand mask */
+#define CW_Invalid	Const_(0x0001)	/* invalid operation mask */
+
+#define CW_Exceptions  	_Const_(0x003f)	/* all masks */
+
+#define RC_RND		_Const_(0x0000)
+#define RC_DOWN		_Const_(0x0400)
+#define RC_UP		_Const_(0x0800)
+#define RC_CHOP		_Const_(0x0C00)
+
+/* p 15-5: Precision control bits affect only the following:
+   ADD, SUB(R), MUL, DIV(R), and SQRT */
+#define PR_24_BITS        _Const_(0x000)
+#define PR_53_BITS        _Const_(0x200)
+#define PR_64_BITS        _Const_(0x300)
+#define PR_RESERVED_BITS  _Const_(0x100)
+/* FULL_PRECISION simulates all exceptions masked */
+#define FULL_PRECISION  (PR_64_BITS | RC_RND | 0x3f)
+
+#endif /* _CONTROLW_H_ */
diff --git a/arch/i386/math-emu/div_Xsig.S b/arch/i386/math-emu/div_Xsig.S
new file mode 100644
index 00000000000..f77ba3058b3
--- /dev/null
+++ b/arch/i386/math-emu/div_Xsig.S
@@ -0,0 +1,365 @@
+	.file	"div_Xsig.S"
+/*---------------------------------------------------------------------------+
+ |  div_Xsig.S                                                               |
+ |                                                                           |
+ | Division subroutine for 96 bit quantities                                 |
+ |                                                                           |
+ | Copyright (C) 1994,1995                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
+ | put the 96 bit result at the location d.                                  |
+ |                                                                           |
+ | The result may not be accurate to 96 bits. It is intended for use where   |
+ | a result better than 64 bits is required. The result should usually be    |
+ | good to at least 94 bits.                                                 |
+ | The returned result is actually divided by one half. This is done to      |
+ | prevent overflow.                                                         |
+ |                                                                           |
+ |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
+ |                                                                           |
+ |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_emu.h"
+
+
+#define	XsigLL(x)	(x)
+#define	XsigL(x)	4(x)
+#define	XsigH(x)	8(x)
+
+
+#ifndef NON_REENTRANT_FPU
+/*
+	Local storage on the stack:
+	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
+ */
+#define FPU_accum_3	-4(%ebp)
+#define FPU_accum_2	-8(%ebp)
+#define FPU_accum_1	-12(%ebp)
+#define FPU_accum_0	-16(%ebp)
+#define FPU_result_3	-20(%ebp)
+#define FPU_result_2	-24(%ebp)
+#define FPU_result_1	-28(%ebp)
+
+#else
+.data
+/*
+	Local storage in a static area:
+	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
+ */
+	.align 4,0
+FPU_accum_3:
+	.long	0
+FPU_accum_2:
+	.long	0
+FPU_accum_1:
+	.long	0
+FPU_accum_0:
+	.long	0
+FPU_result_3:
+	.long	0
+FPU_result_2:
+	.long	0
+FPU_result_1:
+	.long	0
+#endif /* NON_REENTRANT_FPU */
+
+
+.text
+ENTRY(div_Xsig)
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$28,%esp
+#endif /* NON_REENTRANT_FPU */ 
+
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi	/* pointer to num */
+	movl	PARAM2,%ebx	/* pointer to denom */
+
+#ifdef PARANOID
+	testl	$0x80000000, XsigH(%ebx)	/* Divisor */
+	je	L_bugged
+#endif /* PARANOID */
+
+
+/*---------------------------------------------------------------------------+
+ |  Divide:   Return  arg1/arg2 to arg3.                                     |
+ |                                                                           |
+ |  The maximum returned value is (ignoring exponents)                       |
+ |               .ffffffff ffffffff                                          |
+ |               ------------------  =  1.ffffffff fffffffe                  |
+ |               .80000000 00000000                                          |
+ | and the minimum is                                                        |
+ |               .80000000 00000000                                          |
+ |               ------------------  =  .80000000 00000001   (rounded)       |
+ |               .ffffffff ffffffff                                          |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+	/* Save extended dividend in local register */
+
+	/* Divide by 2 to prevent overflow */
+	clc
+	movl	XsigH(%esi),%eax
+	rcrl	%eax
+	movl	%eax,FPU_accum_3
+	movl	XsigL(%esi),%eax
+	rcrl	%eax
+	movl	%eax,FPU_accum_2
+	movl	XsigLL(%esi),%eax
+	rcrl	%eax
+	movl	%eax,FPU_accum_1
+	movl	$0,%eax
+	rcrl	%eax
+	movl	%eax,FPU_accum_0
+
+	movl	FPU_accum_2,%eax	/* Get the current num */
+	movl	FPU_accum_3,%edx
+
+/*----------------------------------------------------------------------*/
+/* Initialization done.
+   Do the first 32 bits. */
+
+	/* We will divide by a number which is too large */
+	movl	XsigH(%ebx),%ecx
+	addl	$1,%ecx
+	jnc	LFirst_div_not_1
+
+	/* here we need to divide by 100000000h,
+	   i.e., no division at all.. */
+	mov	%edx,%eax
+	jmp	LFirst_div_done
+
+LFirst_div_not_1:
+	divl	%ecx		/* Divide the numerator by the augmented
+				   denom ms dw */
+
+LFirst_div_done:
+	movl	%eax,FPU_result_3	/* Put the result in the answer */
+
+	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
+
+	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_3
+
+	movl	FPU_result_3,%eax	/* Get the result back */
+	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
+
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+	sbbl	$0,FPU_accum_3
+	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
+
+#ifdef PARANOID
+	jb	L_bugged_1
+#endif /* PARANOID */ 
+
+	/* need to subtract another once of the denom */
+	incl	FPU_result_3	/* Correct the answer */
+
+	movl	XsigL(%ebx),%eax
+	movl	XsigH(%ebx),%edx
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+
+#ifdef PARANOID
+	sbbl	$0,FPU_accum_3
+	jne	L_bugged_1	/* Must check for non-zero result here */
+#endif /* PARANOID */ 
+
+/*----------------------------------------------------------------------*/
+/* Half of the main problem is done, there is just a reduced numerator
+   to handle now.
+   Work with the second 32 bits, FPU_accum_0 not used from now on */
+LDo_2nd_32_bits:
+	movl	FPU_accum_2,%edx	/* get the reduced num */
+	movl	FPU_accum_1,%eax
+
+	/* need to check for possible subsequent overflow */
+	cmpl	XsigH(%ebx),%edx
+	jb	LDo_2nd_div
+	ja	LPrevent_2nd_overflow
+
+	cmpl	XsigL(%ebx),%eax
+	jb	LDo_2nd_div
+
+LPrevent_2nd_overflow:
+/* The numerator is greater or equal, would cause overflow */
+	/* prevent overflow */
+	subl	XsigL(%ebx),%eax
+	sbbl	XsigH(%ebx),%edx
+	movl	%edx,FPU_accum_2
+	movl	%eax,FPU_accum_1
+
+	incl	FPU_result_3	/* Reflect the subtraction in the answer */
+
+#ifdef PARANOID
+	je	L_bugged_2	/* Can't bump the result to 1.0 */
+#endif /* PARANOID */ 
+
+LDo_2nd_div:
+	cmpl	$0,%ecx		/* augmented denom msw */
+	jnz	LSecond_div_not_1
+
+	/* %ecx == 0, we are dividing by 1.0 */
+	mov	%edx,%eax
+	jmp	LSecond_div_done
+
+LSecond_div_not_1:
+	divl	%ecx		/* Divide the numerator by the denom ms dw */
+
+LSecond_div_done:
+	movl	%eax,FPU_result_2	/* Put the result in the answer */
+
+	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
+
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+#endif /* PARANOID */
+
+	movl	FPU_result_2,%eax	/* Get the result back */
+	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
+
+	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+#endif /* PARANOID */
+
+	jz	LDo_3rd_32_bits
+
+#ifdef PARANOID
+	cmpl	$1,FPU_accum_2
+	jne	L_bugged_2
+#endif /* PARANOID */ 
+
+	/* need to subtract another once of the denom */
+	movl	XsigL(%ebx),%eax
+	movl	XsigH(%ebx),%edx
+	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_1
+	sbbl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+	jne	L_bugged_2
+#endif /* PARANOID */ 
+
+	addl	$1,FPU_result_2	/* Correct the answer */
+	adcl	$0,FPU_result_3
+
+#ifdef PARANOID
+	jc	L_bugged_2	/* Must check for non-zero result here */
+#endif /* PARANOID */ 
+
+/*----------------------------------------------------------------------*/
+/* The division is essentially finished here, we just need to perform
+   tidying operations.
+   Deal with the 3rd 32 bits */
+LDo_3rd_32_bits:
+	/* We use an approximation for the third 32 bits.
+	To take account of the 3rd 32 bits of the divisor
+	(call them del), we subtract  del * (a/b) */
+
+	movl	FPU_result_3,%eax	/* a/b */
+	mull	XsigLL(%ebx)		/* del */
+
+	subl	%edx,FPU_accum_1
+
+	/* A borrow indicates that the result is negative */
+	jnb	LTest_over
+
+	movl	XsigH(%ebx),%edx
+	addl	%edx,FPU_accum_1
+
+	subl	$1,FPU_result_2		/* Adjust the answer */
+	sbbl	$0,FPU_result_3
+
+	/* The above addition might not have been enough, check again. */
+	movl	FPU_accum_1,%edx	/* get the reduced num */
+	cmpl	XsigH(%ebx),%edx	/* denom */
+	jb	LDo_3rd_div
+
+	movl	XsigH(%ebx),%edx
+	addl	%edx,FPU_accum_1
+
+	subl	$1,FPU_result_2		/* Adjust the answer */
+	sbbl	$0,FPU_result_3
+	jmp	LDo_3rd_div
+
+LTest_over:
+	movl	FPU_accum_1,%edx	/* get the reduced num */
+
+	/* need to check for possible subsequent overflow */
+	cmpl	XsigH(%ebx),%edx	/* denom */
+	jb	LDo_3rd_div
+
+	/* prevent overflow */
+	subl	XsigH(%ebx),%edx
+	movl	%edx,FPU_accum_1
+
+	addl	$1,FPU_result_2	/* Reflect the subtraction in the answer */
+	adcl	$0,FPU_result_3
+
+LDo_3rd_div:
+	movl	FPU_accum_0,%eax
+	movl	FPU_accum_1,%edx
+	divl	XsigH(%ebx)
+
+	movl    %eax,FPU_result_1       /* Rough estimate of third word */
+
+	movl	PARAM3,%esi		/* pointer to answer */
+
+	movl	FPU_result_1,%eax
+	movl	%eax,XsigLL(%esi)
+	movl	FPU_result_2,%eax
+	movl	%eax,XsigL(%esi)
+	movl	FPU_result_3,%eax
+	movl	%eax,XsigH(%esi)
+
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+
+	leave
+	ret
+
+
+#ifdef PARANOID
+/* The logic is wrong if we got here */
+L_bugged:
+	pushl	EX_INTERNAL|0x240
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_bugged_1:
+	pushl	EX_INTERNAL|0x241
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_bugged_2:
+	pushl	EX_INTERNAL|0x242
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+#endif /* PARANOID */ 
diff --git a/arch/i386/math-emu/div_small.S b/arch/i386/math-emu/div_small.S
new file mode 100644
index 00000000000..47099628fa4
--- /dev/null
+++ b/arch/i386/math-emu/div_small.S
@@ -0,0 +1,47 @@
+	.file	"div_small.S"
+/*---------------------------------------------------------------------------+
+ |  div_small.S                                                              |
+ |                                                                           |
+ | Divide a 64 bit integer by a 32 bit integer & return remainder.           |
+ |                                                                           |
+ | Copyright (C) 1992,1995                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |    unsigned long FPU_div_small(unsigned long long *x, unsigned long y)    |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_emu.h"
+
+.text
+ENTRY(FPU_div_small)
+	pushl	%ebp
+	movl	%esp,%ebp
+
+	pushl	%esi
+
+	movl	PARAM1,%esi	/* pointer to num */
+	movl	PARAM2,%ecx	/* The denominator */
+
+	movl	4(%esi),%eax	/* Get the current num msw */
+	xorl	%edx,%edx
+	divl	%ecx
+
+	movl	%eax,4(%esi)
+
+	movl	(%esi),%eax	/* Get the num lsw */
+	divl	%ecx
+
+	movl	%eax,(%esi)
+
+	movl	%edx,%eax	/* Return the remainder in eax */
+
+	popl	%esi
+
+	leave
+	ret
+
diff --git a/arch/i386/math-emu/errors.c b/arch/i386/math-emu/errors.c
new file mode 100644
index 00000000000..a1b0d22f697
--- /dev/null
+++ b/arch/i386/math-emu/errors.c
@@ -0,0 +1,739 @@
+/*---------------------------------------------------------------------------+
+ |  errors.c                                                                 |
+ |                                                                           |
+ |  The error handling functions for wm-FPU-emu                              |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1996                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@jacobi.maths.monash.edu.au                |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+#include <linux/signal.h>
+
+#include <asm/uaccess.h>
+
+#include "fpu_emu.h"
+#include "fpu_system.h"
+#include "exception.h"
+#include "status_w.h"
+#include "control_w.h"
+#include "reg_constant.h"
+#include "version.h"
+
+/* */
+#undef PRINT_MESSAGES
+/* */
+
+
+#if 0
+void Un_impl(void)
+{
+  u_char byte1, FPU_modrm;
+  unsigned long address = FPU_ORIG_EIP;
+
+  RE_ENTRANT_CHECK_OFF;
+  /* No need to check access_ok(), we have previously fetched these bytes. */
+  printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address);
+  if ( FPU_CS == __USER_CS )
+    {
+      while ( 1 )
+	{
+	  FPU_get_user(byte1, (u_char __user *) address);
+	  if ( (byte1 & 0xf8) == 0xd8 ) break;
+	  printk("[%02x]", byte1);
+	  address++;
+	}
+      printk("%02x ", byte1);
+      FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
+      
+      if (FPU_modrm >= 0300)
+	printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
+      else
+	printk("/%d\n", (FPU_modrm >> 3) & 7);
+    }
+  else
+    {
+      printk("cs selector = %04x\n", FPU_CS);
+    }
+
+  RE_ENTRANT_CHECK_ON;
+
+  EXCEPTION(EX_Invalid);
+
+}
+#endif  /*  0  */
+
+
+/*
+   Called for opcodes which are illegal and which are known to result in a
+   SIGILL with a real 80486.
+   */
+void FPU_illegal(void)
+{
+  math_abort(FPU_info,SIGILL);
+}
+
+
+
+void FPU_printall(void)
+{
+  int i;
+  static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
+                              "DeNorm", "Inf", "NaN" };
+  u_char byte1, FPU_modrm;
+  unsigned long address = FPU_ORIG_EIP;
+
+  RE_ENTRANT_CHECK_OFF;
+  /* No need to check access_ok(), we have previously fetched these bytes. */
+  printk("At %p:", (void *) address);
+  if ( FPU_CS == __USER_CS )
+    {
+#define MAX_PRINTED_BYTES 20
+      for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
+	{
+	  FPU_get_user(byte1, (u_char __user *) address);
+	  if ( (byte1 & 0xf8) == 0xd8 )
+	    {
+	      printk(" %02x", byte1);
+	      break;
+	    }
+	  printk(" [%02x]", byte1);
+	  address++;
+	}
+      if ( i == MAX_PRINTED_BYTES )
+	printk(" [more..]\n");
+      else
+	{
+	  FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
+	  
+	  if (FPU_modrm >= 0300)
+	    printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
+	  else
+	    printk(" /%d, mod=%d rm=%d\n",
+		   (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
+	}
+    }
+  else
+    {
+      printk("%04x\n", FPU_CS);
+    }
+
+  partial_status = status_word();
+
+#ifdef DEBUGGING
+if ( partial_status & SW_Backward )    printk("SW: backward compatibility\n");
+if ( partial_status & SW_C3 )          printk("SW: condition bit 3\n");
+if ( partial_status & SW_C2 )          printk("SW: condition bit 2\n");
+if ( partial_status & SW_C1 )          printk("SW: condition bit 1\n");
+if ( partial_status & SW_C0 )          printk("SW: condition bit 0\n");
+if ( partial_status & SW_Summary )     printk("SW: exception summary\n");
+if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
+if ( partial_status & SW_Precision )   printk("SW: loss of precision\n");
+if ( partial_status & SW_Underflow )   printk("SW: underflow\n");
+if ( partial_status & SW_Overflow )    printk("SW: overflow\n");
+if ( partial_status & SW_Zero_Div )    printk("SW: divide by zero\n");
+if ( partial_status & SW_Denorm_Op )   printk("SW: denormalized operand\n");
+if ( partial_status & SW_Invalid )     printk("SW: invalid operation\n");
+#endif /* DEBUGGING */
+
+  printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
+	 partial_status & 0x8000 ? 1 : 0,   /* busy */
+	 (partial_status & 0x3800) >> 11,   /* stack top pointer */
+	 partial_status & 0x80 ? 1 : 0,     /* Error summary status */
+	 partial_status & 0x40 ? 1 : 0,     /* Stack flag */
+	 partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
+	 partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
+	 partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
+	 partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
+	 partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
+  
+printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d     ef=%d%d%d%d%d%d\n",
+	 control_word & 0x1000 ? 1 : 0,
+	 (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
+	 (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
+	 control_word & 0x80 ? 1 : 0,
+	 control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
+	 control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
+	 control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
+
+  for ( i = 0; i < 8; i++ )
+    {
+      FPU_REG *r = &st(i);
+      u_char tagi = FPU_gettagi(i);
+      switch (tagi)
+	{
+	case TAG_Empty:
+	  continue;
+	  break;
+	case TAG_Zero:
+	case TAG_Special:
+	  tagi = FPU_Special(r);
+	case TAG_Valid:
+	  printk("st(%d)  %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
+		 getsign(r) ? '-' : '+',
+		 (long)(r->sigh >> 16),
+		 (long)(r->sigh & 0xFFFF),
+		 (long)(r->sigl >> 16),
+		 (long)(r->sigl & 0xFFFF),
+		 exponent(r) - EXP_BIAS + 1);
+	  break;
+	default:
+	  printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi);
+	  continue;
+	  break;
+	}
+      printk("%s\n", tag_desc[(int) (unsigned) tagi]);
+    }
+
+  RE_ENTRANT_CHECK_ON;
+
+}
+
+static struct {
+  int type;
+  const char *name;
+} exception_names[] = {
+  { EX_StackOver, "stack overflow" },
+  { EX_StackUnder, "stack underflow" },
+  { EX_Precision, "loss of precision" },
+  { EX_Underflow, "underflow" },
+  { EX_Overflow, "overflow" },
+  { EX_ZeroDiv, "divide by zero" },
+  { EX_Denormal, "denormalized operand" },
+  { EX_Invalid, "invalid operation" },
+  { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
+  { 0, NULL }
+};
+
+/*
+ EX_INTERNAL is always given with a code which indicates where the
+ error was detected.
+
+ Internal error types:
+       0x14   in fpu_etc.c
+       0x1nn  in a *.c file:
+              0x101  in reg_add_sub.c
+              0x102  in reg_mul.c
+              0x104  in poly_atan.c
+              0x105  in reg_mul.c
+              0x107  in fpu_trig.c
+	      0x108  in reg_compare.c
+	      0x109  in reg_compare.c
+	      0x110  in reg_add_sub.c
+	      0x111  in fpe_entry.c
+	      0x112  in fpu_trig.c
+	      0x113  in errors.c
+	      0x115  in fpu_trig.c
+	      0x116  in fpu_trig.c
+	      0x117  in fpu_trig.c
+	      0x118  in fpu_trig.c
+	      0x119  in fpu_trig.c
+	      0x120  in poly_atan.c
+	      0x121  in reg_compare.c
+	      0x122  in reg_compare.c
+	      0x123  in reg_compare.c
+	      0x125  in fpu_trig.c
+	      0x126  in fpu_entry.c
+	      0x127  in poly_2xm1.c
+	      0x128  in fpu_entry.c
+	      0x129  in fpu_entry.c
+	      0x130  in get_address.c
+	      0x131  in get_address.c
+	      0x132  in get_address.c
+	      0x133  in get_address.c
+	      0x140  in load_store.c
+	      0x141  in load_store.c
+              0x150  in poly_sin.c
+              0x151  in poly_sin.c
+	      0x160  in reg_ld_str.c
+	      0x161  in reg_ld_str.c
+	      0x162  in reg_ld_str.c
+	      0x163  in reg_ld_str.c
+	      0x164  in reg_ld_str.c
+	      0x170  in fpu_tags.c
+	      0x171  in fpu_tags.c
+	      0x172  in fpu_tags.c
+	      0x180  in reg_convert.c
+       0x2nn  in an *.S file:
+              0x201  in reg_u_add.S
+              0x202  in reg_u_div.S
+              0x203  in reg_u_div.S
+              0x204  in reg_u_div.S
+              0x205  in reg_u_mul.S
+              0x206  in reg_u_sub.S
+              0x207  in wm_sqrt.S
+	      0x208  in reg_div.S
+              0x209  in reg_u_sub.S
+              0x210  in reg_u_sub.S
+              0x211  in reg_u_sub.S
+              0x212  in reg_u_sub.S
+	      0x213  in wm_sqrt.S
+	      0x214  in wm_sqrt.S
+	      0x215  in wm_sqrt.S
+	      0x220  in reg_norm.S
+	      0x221  in reg_norm.S
+	      0x230  in reg_round.S
+	      0x231  in reg_round.S
+	      0x232  in reg_round.S
+	      0x233  in reg_round.S
+	      0x234  in reg_round.S
+	      0x235  in reg_round.S
+	      0x236  in reg_round.S
+	      0x240  in div_Xsig.S
+	      0x241  in div_Xsig.S
+	      0x242  in div_Xsig.S
+ */
+
+asmlinkage void FPU_exception(int n)
+{
+  int i, int_type;
+
+  int_type = 0;         /* Needed only to stop compiler warnings */
+  if ( n & EX_INTERNAL )
+    {
+      int_type = n - EX_INTERNAL;
+      n = EX_INTERNAL;
+      /* Set lots of exception bits! */
+      partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
+    }
+  else
+    {
+      /* Extract only the bits which we use to set the status word */
+      n &= (SW_Exc_Mask);
+      /* Set the corresponding exception bit */
+      partial_status |= n;
+      /* Set summary bits iff exception isn't masked */
+      if ( partial_status & ~control_word & CW_Exceptions )
+	partial_status |= (SW_Summary | SW_Backward);
+      if ( n & (SW_Stack_Fault | EX_Precision) )
+	{
+	  if ( !(n & SW_C1) )
+	    /* This bit distinguishes over- from underflow for a stack fault,
+	       and roundup from round-down for precision loss. */
+	    partial_status &= ~SW_C1;
+	}
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
+    {
+#ifdef PRINT_MESSAGES
+      /* My message from the sponsor */
+      printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
+#endif /* PRINT_MESSAGES */
+      
+      /* Get a name string for error reporting */
+      for (i=0; exception_names[i].type; i++)
+	if ( (exception_names[i].type & n) == exception_names[i].type )
+	  break;
+      
+      if (exception_names[i].type)
+	{
+#ifdef PRINT_MESSAGES
+	  printk("FP Exception: %s!\n", exception_names[i].name);
+#endif /* PRINT_MESSAGES */
+	}
+      else
+	printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
+      
+      if ( n == EX_INTERNAL )
+	{
+	  printk("FPU emulator: Internal error type 0x%04x\n", int_type);
+	  FPU_printall();
+	}
+#ifdef PRINT_MESSAGES
+      else
+	FPU_printall();
+#endif /* PRINT_MESSAGES */
+
+      /*
+       * The 80486 generates an interrupt on the next non-control FPU
+       * instruction. So we need some means of flagging it.
+       * We use the ES (Error Summary) bit for this.
+       */
+    }
+  RE_ENTRANT_CHECK_ON;
+
+#ifdef __DEBUG__
+  math_abort(FPU_info,SIGFPE);
+#endif /* __DEBUG__ */
+
+}
+
+
+/* Real operation attempted on a NaN. */
+/* Returns < 0 if the exception is unmasked */
+int real_1op_NaN(FPU_REG *a)
+{
+  int signalling, isNaN;
+
+  isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
+
+  /* The default result for the case of two "equal" NaNs (signs may
+     differ) is chosen to reproduce 80486 behaviour */
+  signalling = isNaN && !(a->sigh & 0x40000000);
+
+  if ( !signalling )
+    {
+      if ( !isNaN )  /* pseudo-NaN, or other unsupported? */
+	{
+	  if ( control_word & CW_Invalid )
+	    {
+	      /* Masked response */
+	      reg_copy(&CONST_QNaN, a);
+	    }
+	  EXCEPTION(EX_Invalid);
+	  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+	}
+      return TAG_Special;
+    }
+
+  if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      if ( !(a->sigh & 0x80000000) )  /* pseudo-NaN ? */
+	{
+	  reg_copy(&CONST_QNaN, a);
+	}
+      /* ensure a Quiet NaN */
+      a->sigh |= 0x40000000;
+    }
+
+  EXCEPTION(EX_Invalid);
+
+  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+}
+
+
+/* Real operation attempted on two operands, one a NaN. */
+/* Returns < 0 if the exception is unmasked */
+int real_2op_NaN(FPU_REG const *b, u_char tagb,
+		 int deststnr,
+		 FPU_REG const *defaultNaN)
+{
+  FPU_REG *dest = &st(deststnr);
+  FPU_REG const *a = dest;
+  u_char taga = FPU_gettagi(deststnr);
+  FPU_REG const *x;
+  int signalling, unsupported;
+
+  if ( taga == TAG_Special )
+    taga = FPU_Special(a);
+  if ( tagb == TAG_Special )
+    tagb = FPU_Special(b);
+
+  /* TW_NaN is also used for unsupported data types. */
+  unsupported = ((taga == TW_NaN)
+		 && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000)))
+    || ((tagb == TW_NaN)
+	&& !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
+  if ( unsupported )
+    {
+      if ( control_word & CW_Invalid )
+	{
+	  /* Masked response */
+	  FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+	}
+      EXCEPTION(EX_Invalid);
+      return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+    }
+
+  if (taga == TW_NaN)
+    {
+      x = a;
+      if (tagb == TW_NaN)
+	{
+	  signalling = !(a->sigh & b->sigh & 0x40000000);
+	  if ( significand(b) > significand(a) )
+	    x = b;
+	  else if ( significand(b) == significand(a) )
+	    {
+	      /* The default result for the case of two "equal" NaNs (signs may
+		 differ) is chosen to reproduce 80486 behaviour */
+	      x = defaultNaN;
+	    }
+	}
+      else
+	{
+	  /* return the quiet version of the NaN in a */
+	  signalling = !(a->sigh & 0x40000000);
+	}
+    }
+  else
+#ifdef PARANOID
+    if (tagb == TW_NaN)
+#endif /* PARANOID */
+    {
+      signalling = !(b->sigh & 0x40000000);
+      x = b;
+    }
+#ifdef PARANOID
+  else
+    {
+      signalling = 0;
+      EXCEPTION(EX_INTERNAL|0x113);
+      x = &CONST_QNaN;
+    }
+#endif /* PARANOID */
+
+  if ( (!signalling) || (control_word & CW_Invalid) )
+    {
+      if ( ! x )
+	x = b;
+
+      if ( !(x->sigh & 0x80000000) )  /* pseudo-NaN ? */
+	x = &CONST_QNaN;
+
+      FPU_copy_to_regi(x, TAG_Special, deststnr);
+
+      if ( !signalling )
+	return TAG_Special;
+
+      /* ensure a Quiet NaN */
+      dest->sigh |= 0x40000000;
+    }
+
+  EXCEPTION(EX_Invalid);
+
+  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+}
+
+
+/* Invalid arith operation on Valid registers */
+/* Returns < 0 if the exception is unmasked */
+asmlinkage int arith_invalid(int deststnr)
+{
+
+  EXCEPTION(EX_Invalid);
+  
+  if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+    }
+  
+  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
+
+}
+
+
+/* Divide a finite number by zero */
+asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
+{
+  FPU_REG *dest = &st(deststnr);
+  int tag = TAG_Valid;
+
+  if ( control_word & CW_ZeroDiv )
+    {
+      /* The masked response */
+      FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
+      setsign(dest, sign);
+      tag = TAG_Special;
+    }
+ 
+  EXCEPTION(EX_ZeroDiv);
+
+  return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
+
+}
+
+
+/* This may be called often, so keep it lean */
+int set_precision_flag(int flags)
+{
+  if ( control_word & CW_Precision )
+    {
+      partial_status &= ~(SW_C1 & flags);
+      partial_status |= flags;   /* The masked response */
+      return 0;
+    }
+  else
+    {
+      EXCEPTION(flags);
+      return 1;
+    }
+}
+
+
+/* This may be called often, so keep it lean */
+asmlinkage void set_precision_flag_up(void)
+{
+  if ( control_word & CW_Precision )
+    partial_status |= (SW_Precision | SW_C1);   /* The masked response */
+  else
+    EXCEPTION(EX_Precision | SW_C1);
+}
+
+
+/* This may be called often, so keep it lean */
+asmlinkage void set_precision_flag_down(void)
+{
+  if ( control_word & CW_Precision )
+    {   /* The masked response */
+      partial_status &= ~SW_C1;
+      partial_status |= SW_Precision;
+    }
+  else
+    EXCEPTION(EX_Precision);
+}
+
+
+asmlinkage int denormal_operand(void)
+{
+  if ( control_word & CW_Denormal )
+    {   /* The masked response */
+      partial_status |= SW_Denorm_Op;
+      return TAG_Special;
+    }
+  else
+    {
+      EXCEPTION(EX_Denormal);
+      return TAG_Special | FPU_Exception;
+    }
+}
+
+
+asmlinkage int arith_overflow(FPU_REG *dest)
+{
+  int tag = TAG_Valid;
+
+  if ( control_word & CW_Overflow )
+    {
+      /* The masked response */
+/* ###### The response here depends upon the rounding mode */
+      reg_copy(&CONST_INF, dest);
+      tag = TAG_Special;
+    }
+  else
+    {
+      /* Subtract the magic number from the exponent */
+      addexponent(dest, (-3 * (1 << 13)));
+    }
+
+  EXCEPTION(EX_Overflow);
+  if ( control_word & CW_Overflow )
+    {
+      /* The overflow exception is masked. */
+      /* By definition, precision is lost.
+	 The roundup bit (C1) is also set because we have
+	 "rounded" upwards to Infinity. */
+      EXCEPTION(EX_Precision | SW_C1);
+      return tag;
+    }
+
+  return tag;
+
+}
+
+
+asmlinkage int arith_underflow(FPU_REG *dest)
+{
+  int tag = TAG_Valid;
+
+  if ( control_word & CW_Underflow )
+    {
+      /* The masked response */
+      if ( exponent16(dest) <= EXP_UNDER - 63 )
+	{
+	  reg_copy(&CONST_Z, dest);
+	  partial_status &= ~SW_C1;       /* Round down. */
+	  tag = TAG_Zero;
+	}
+      else
+	{
+	  stdexp(dest);
+	}
+    }
+  else
+    {
+      /* Add the magic number to the exponent. */
+      addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
+    }
+
+  EXCEPTION(EX_Underflow);
+  if ( control_word & CW_Underflow )
+    {
+      /* The underflow exception is masked. */
+      EXCEPTION(EX_Precision);
+      return tag;
+    }
+
+  return tag;
+
+}
+
+
+void FPU_stack_overflow(void)
+{
+
+ if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      top--;
+      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+    }
+
+  EXCEPTION(EX_StackOver);
+
+  return;
+
+}
+
+
+void FPU_stack_underflow(void)
+{
+
+ if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+    }
+
+  EXCEPTION(EX_StackUnder);
+
+  return;
+
+}
+
+
+void FPU_stack_underflow_i(int i)
+{
+
+ if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
+    }
+
+  EXCEPTION(EX_StackUnder);
+
+  return;
+
+}
+
+
+void FPU_stack_underflow_pop(int i)
+{
+
+ if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
+      FPU_pop();
+    }
+
+  EXCEPTION(EX_StackUnder);
+
+  return;
+
+}
+
diff --git a/arch/i386/math-emu/exception.h b/arch/i386/math-emu/exception.h
new file mode 100644
index 00000000000..b463f21a811
--- /dev/null
+++ b/arch/i386/math-emu/exception.h
@@ -0,0 +1,53 @@
+/*---------------------------------------------------------------------------+
+ |  exception.h                                                              |
+ |                                                                           |
+ | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _EXCEPTION_H_
+#define _EXCEPTION_H_
+
+
+#ifdef __ASSEMBLY__
+#define	Const_(x)	$##x
+#else
+#define	Const_(x)	x
+#endif
+
+#ifndef SW_C1
+#include "fpu_emu.h"
+#endif /* SW_C1 */
+
+#define FPU_BUSY        Const_(0x8000)   /* FPU busy bit (8087 compatibility) */
+#define EX_ErrorSummary Const_(0x0080)   /* Error summary status */
+/* Special exceptions: */
+#define	EX_INTERNAL	Const_(0x8000)	/* Internal error in wm-FPU-emu */
+#define EX_StackOver	Const_(0x0041|SW_C1)	/* stack overflow */
+#define EX_StackUnder	Const_(0x0041)	/* stack underflow */
+/* Exception flags: */
+#define EX_Precision	Const_(0x0020)	/* loss of precision */
+#define EX_Underflow	Const_(0x0010)	/* underflow */
+#define EX_Overflow	Const_(0x0008)	/* overflow */
+#define EX_ZeroDiv	Const_(0x0004)	/* divide by zero */
+#define EX_Denormal	Const_(0x0002)	/* denormalized operand */
+#define EX_Invalid	Const_(0x0001)	/* invalid operation */
+
+
+#define PRECISION_LOST_UP    Const_((EX_Precision | SW_C1))
+#define PRECISION_LOST_DOWN  Const_(EX_Precision)
+
+
+#ifndef __ASSEMBLY__
+
+#ifdef DEBUG
+#define	EXCEPTION(x)	{ printk("exception in %s at line %d\n", \
+	__FILE__, __LINE__); FPU_exception(x); }
+#else
+#define	EXCEPTION(x)	FPU_exception(x)
+#endif
+
+#endif /* __ASSEMBLY__ */ 
+
+#endif /* _EXCEPTION_H_ */
diff --git a/arch/i386/math-emu/fpu_arith.c b/arch/i386/math-emu/fpu_arith.c
new file mode 100644
index 00000000000..6972dec01af
--- /dev/null
+++ b/arch/i386/math-emu/fpu_arith.c
@@ -0,0 +1,174 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_arith.c                                                              |
+ |                                                                           |
+ | Code to implement the FPU register/register arithmetic instructions       |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1997                                              |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "status_w.h"
+
+
+void fadd__(void)
+{
+  /* fadd st,st(i) */
+  int i = FPU_rm;
+  clear_C1();
+  FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
+}
+
+
+void fmul__(void)
+{
+  /* fmul st,st(i) */
+  int i = FPU_rm;
+  clear_C1();
+  FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
+}
+
+
+
+void fsub__(void)
+{
+  /* fsub st,st(i) */
+  clear_C1();
+  FPU_sub(0, FPU_rm, control_word);
+}
+
+
+void fsubr_(void)
+{
+  /* fsubr st,st(i) */
+  clear_C1();
+  FPU_sub(REV, FPU_rm, control_word);
+}
+
+
+void fdiv__(void)
+{
+  /* fdiv st,st(i) */
+  clear_C1();
+  FPU_div(0, FPU_rm, control_word);
+}
+
+
+void fdivr_(void)
+{
+  /* fdivr st,st(i) */
+  clear_C1();
+  FPU_div(REV, FPU_rm, control_word);
+}
+
+
+
+void fadd_i(void)
+{
+  /* fadd st(i),st */
+  int i = FPU_rm;
+  clear_C1();
+  FPU_add(&st(i), FPU_gettagi(i), i, control_word);
+}
+
+
+void fmul_i(void)
+{
+  /* fmul st(i),st */
+  clear_C1();
+  FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
+}
+
+
+void fsubri(void)
+{
+  /* fsubr st(i),st */
+  clear_C1();
+  FPU_sub(DEST_RM, FPU_rm, control_word);
+}
+
+
+void fsub_i(void)
+{
+  /* fsub st(i),st */
+  clear_C1();
+  FPU_sub(REV|DEST_RM, FPU_rm, control_word);
+}
+
+
+void fdivri(void)
+{
+  /* fdivr st(i),st */
+  clear_C1();
+  FPU_div(DEST_RM, FPU_rm, control_word);
+}
+
+
+void fdiv_i(void)
+{
+  /* fdiv st(i),st */
+  clear_C1();
+  FPU_div(REV|DEST_RM, FPU_rm, control_word);
+}
+
+
+
+void faddp_(void)
+{
+  /* faddp st(i),st */
+  int i = FPU_rm;
+  clear_C1();
+  if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 )
+    FPU_pop();
+}
+
+
+void fmulp_(void)
+{
+  /* fmulp st(i),st */
+  clear_C1();
+  if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 )
+    FPU_pop();
+}
+
+
+
+void fsubrp(void)
+{
+  /* fsubrp st(i),st */
+  clear_C1();
+  if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 )
+    FPU_pop();
+}
+
+
+void fsubp_(void)
+{
+  /* fsubp st(i),st */
+  clear_C1();
+  if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 )
+    FPU_pop();
+}
+
+
+void fdivrp(void)
+{
+  /* fdivrp st(i),st */
+  clear_C1();
+  if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 )
+    FPU_pop();
+}
+
+
+void fdivp_(void)
+{
+  /* fdivp st(i),st */
+  clear_C1();
+  if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 )
+    FPU_pop();
+}
diff --git a/arch/i386/math-emu/fpu_asm.h b/arch/i386/math-emu/fpu_asm.h
new file mode 100644
index 00000000000..9ba12416df1
--- /dev/null
+++ b/arch/i386/math-emu/fpu_asm.h
@@ -0,0 +1,32 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_asm.h                                                                |
+ |                                                                           |
+ | Copyright (C) 1992,1995,1997                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@suburbia.net               |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _FPU_ASM_H_
+#define _FPU_ASM_H_
+
+#include <linux/linkage.h>
+
+#define	EXCEPTION	FPU_exception
+
+
+#define PARAM1	8(%ebp)
+#define	PARAM2	12(%ebp)
+#define	PARAM3	16(%ebp)
+#define	PARAM4	20(%ebp)
+#define	PARAM5	24(%ebp)
+#define	PARAM6	28(%ebp)
+#define	PARAM7	32(%ebp)
+
+#define SIGL_OFFSET 0
+#define	EXP(x)	8(x)
+#define SIG(x)	SIGL_OFFSET##(x)
+#define	SIGL(x)	SIGL_OFFSET##(x)
+#define	SIGH(x)	4(x)
+
+#endif /* _FPU_ASM_H_ */
diff --git a/arch/i386/math-emu/fpu_aux.c b/arch/i386/math-emu/fpu_aux.c
new file mode 100644
index 00000000000..20886cfb9f7
--- /dev/null
+++ b/arch/i386/math-emu/fpu_aux.c
@@ -0,0 +1,204 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_aux.c                                                                |
+ |                                                                           |
+ | Code to implement some of the FPU auxiliary instructions.                 |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "control_w.h"
+
+
+static void fnop(void)
+{
+}
+
+static void fclex(void)
+{
+  partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
+		   SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
+		   SW_Invalid);
+  no_ip_update = 1;
+}
+
+/* Needs to be externally visible */
+void finit(void)
+{
+  control_word = 0x037f;
+  partial_status = 0;
+  top = 0;            /* We don't keep top in the status word internally. */
+  fpu_tag_word = 0xffff;
+  /* The behaviour is different from that detailed in
+     Section 15.1.6 of the Intel manual */
+  operand_address.offset = 0;
+  operand_address.selector = 0;
+  instruction_address.offset = 0;
+  instruction_address.selector = 0;
+  instruction_address.opcode = 0;
+  no_ip_update = 1;
+}
+
+/*
+ * These are nops on the i387..
+ */
+#define feni fnop
+#define fdisi fnop
+#define fsetpm fnop
+
+static FUNC const finit_table[] = {
+  feni, fdisi, fclex, finit,
+  fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
+};
+
+void finit_(void)
+{
+  (finit_table[FPU_rm])();
+}
+
+
+static void fstsw_ax(void)
+{
+  *(short *) &FPU_EAX = status_word();
+  no_ip_update = 1;
+}
+
+static FUNC const fstsw_table[] = {
+  fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
+  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+};
+
+void fstsw_(void)
+{
+  (fstsw_table[FPU_rm])();
+}
+
+
+static FUNC const fp_nop_table[] = {
+  fnop, FPU_illegal, FPU_illegal, FPU_illegal,
+  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+};
+
+void fp_nop(void)
+{
+  (fp_nop_table[FPU_rm])();
+}
+
+
+void fld_i_(void)
+{
+  FPU_REG *st_new_ptr;
+  int i;
+  u_char tag;
+
+  if ( STACK_OVERFLOW )
+    { FPU_stack_overflow(); return; }
+
+  /* fld st(i) */
+  i = FPU_rm;
+  if ( NOT_EMPTY(i) )
+    {
+      reg_copy(&st(i), st_new_ptr);
+      tag = FPU_gettagi(i);
+      push();
+      FPU_settag0(tag);
+    }
+  else
+    {
+      if ( control_word & CW_Invalid )
+	{
+	  /* The masked response */
+	  FPU_stack_underflow();
+	}
+      else
+	EXCEPTION(EX_StackUnder);
+    }
+
+}
+
+
+void fxch_i(void)
+{
+  /* fxch st(i) */
+  FPU_REG t;
+  int i = FPU_rm;
+  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
+  long tag_word = fpu_tag_word;
+  int regnr = top & 7, regnri = ((regnr + i) & 7);
+  u_char st0_tag = (tag_word >> (regnr*2)) & 3;
+  u_char sti_tag = (tag_word >> (regnri*2)) & 3;
+
+  if ( st0_tag == TAG_Empty )
+    {
+      if ( sti_tag == TAG_Empty )
+	{
+	  FPU_stack_underflow();
+	  FPU_stack_underflow_i(i);
+	  return;
+	}
+      if ( control_word & CW_Invalid )
+	{
+	  /* Masked response */
+	  FPU_copy_to_reg0(sti_ptr, sti_tag);
+	}
+      FPU_stack_underflow_i(i);
+      return;
+    }
+  if ( sti_tag == TAG_Empty )
+    {
+      if ( control_word & CW_Invalid )
+	{
+	  /* Masked response */
+	  FPU_copy_to_regi(st0_ptr, st0_tag, i);
+	}
+      FPU_stack_underflow();
+      return;
+    }
+  clear_C1();
+
+  reg_copy(st0_ptr, &t);
+  reg_copy(sti_ptr, st0_ptr);
+  reg_copy(&t, sti_ptr);
+
+  tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2));
+  tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2));
+  fpu_tag_word = tag_word;
+}
+
+
+void ffree_(void)
+{
+  /* ffree st(i) */
+  FPU_settagi(FPU_rm, TAG_Empty);
+}
+
+
+void ffreep(void)
+{
+  /* ffree st(i) + pop - unofficial code */
+  FPU_settagi(FPU_rm, TAG_Empty);
+  FPU_pop();
+}
+
+
+void fst_i_(void)
+{
+  /* fst st(i) */
+  FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
+}
+
+
+void fstp_i(void)
+{
+  /* fstp st(i) */
+  FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
+  FPU_pop();
+}
+
diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h
new file mode 100644
index 00000000000..d62b20a3e66
--- /dev/null
+++ b/arch/i386/math-emu/fpu_emu.h
@@ -0,0 +1,217 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_emu.h                                                                |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@suburbia.net             |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+
+#ifndef _FPU_EMU_H_
+#define _FPU_EMU_H_
+
+/*
+ * Define PECULIAR_486 to get a closer approximation to 80486 behaviour,
+ * rather than behaviour which appears to be cleaner.
+ * This is a matter of opinion: for all I know, the 80486 may simply
+ * be complying with the IEEE spec. Maybe one day I'll get to see the
+ * spec...
+ */
+#define PECULIAR_486
+
+#ifdef __ASSEMBLY__
+#include "fpu_asm.h"
+#define	Const(x)	$##x
+#else
+#define	Const(x)	x
+#endif
+
+#define EXP_BIAS	Const(0)
+#define EXP_OVER	Const(0x4000)    /* smallest invalid large exponent */
+#define	EXP_UNDER	Const(-0x3fff)   /* largest invalid small exponent */
+#define EXP_WAY_UNDER   Const(-0x6000)   /* Below the smallest denormal, but
+					    still a 16 bit nr. */
+#define EXP_Infinity    EXP_OVER
+#define EXP_NaN         EXP_OVER
+
+#define EXTENDED_Ebias Const(0x3fff)
+#define EXTENDED_Emin (-0x3ffe)  /* smallest valid exponent */
+
+#define SIGN_POS	Const(0)
+#define SIGN_NEG	Const(0x80)
+
+#define SIGN_Positive	Const(0)
+#define SIGN_Negative	Const(0x8000)
+
+
+/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */
+/* The following fold to 2 (Special) in the Tag Word */
+#define TW_Denormal     Const(4)        /* De-normal */
+#define TW_Infinity	Const(5)	/* + or - infinity */
+#define	TW_NaN		Const(6)	/* Not a Number */
+#define	TW_Unsupported	Const(7)	/* Not supported by an 80486 */
+
+#define TAG_Valid	Const(0)	/* valid */
+#define TAG_Zero	Const(1)	/* zero */
+#define TAG_Special	Const(2)	/* De-normal, + or - infinity,
+					   or Not a Number */
+#define TAG_Empty	Const(3)	/* empty */
+
+#define LOADED_DATA	Const(10101)	/* Special st() number to identify
+					   loaded data (not on stack). */
+
+/* A few flags (must be >= 0x10). */
+#define REV             0x10
+#define DEST_RM         0x20
+#define LOADED          0x40
+
+#define FPU_Exception   Const(0x80000000)   /* Added to tag returns. */
+
+
+#ifndef __ASSEMBLY__
+
+#include "fpu_system.h"
+
+#include <asm/sigcontext.h>   /* for struct _fpstate */
+#include <asm/math_emu.h>
+#include <linux/linkage.h>
+
+/*
+#define RE_ENTRANT_CHECKING
+ */
+
+#ifdef RE_ENTRANT_CHECKING
+extern u_char emulating;
+#  define RE_ENTRANT_CHECK_OFF emulating = 0
+#  define RE_ENTRANT_CHECK_ON emulating = 1
+#else
+#  define RE_ENTRANT_CHECK_OFF
+#  define RE_ENTRANT_CHECK_ON
+#endif /* RE_ENTRANT_CHECKING */
+
+#define FWAIT_OPCODE 0x9b
+#define OP_SIZE_PREFIX 0x66
+#define ADDR_SIZE_PREFIX 0x67
+#define PREFIX_CS 0x2e
+#define PREFIX_DS 0x3e
+#define PREFIX_ES 0x26
+#define PREFIX_SS 0x36
+#define PREFIX_FS 0x64
+#define PREFIX_GS 0x65
+#define PREFIX_REPE 0xf3
+#define PREFIX_REPNE 0xf2
+#define PREFIX_LOCK 0xf0
+#define PREFIX_CS_ 1
+#define PREFIX_DS_ 2
+#define PREFIX_ES_ 3
+#define PREFIX_FS_ 4
+#define PREFIX_GS_ 5
+#define PREFIX_SS_ 6
+#define PREFIX_DEFAULT 7
+
+struct address {
+  unsigned int offset;
+  unsigned int selector:16;
+  unsigned int opcode:11;
+  unsigned int empty:5;
+};
+struct fpu__reg {
+  unsigned sigl;
+  unsigned sigh;
+  short exp;
+};
+
+typedef void (*FUNC)(void);
+typedef struct fpu__reg FPU_REG;
+typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag);
+typedef struct { u_char address_size, operand_size, segment; }
+        overrides;
+/* This structure is 32 bits: */
+typedef struct { overrides override;
+		 u_char default_mode; } fpu_addr_modes;
+/* PROTECTED has a restricted meaning in the emulator; it is used
+   to signal that the emulator needs to do special things to ensure
+   that protection is respected in a segmented model. */
+#define PROTECTED 4
+#define SIXTEEN   1         /* We rely upon this being 1 (true) */
+#define VM86      SIXTEEN
+#define PM16      (SIXTEEN | PROTECTED)
+#define SEG32     PROTECTED
+extern u_char const data_sizes_16[32];
+
+#define register_base ((u_char *) registers )
+#define fpu_register(x)  ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) )
+#define	st(x)      ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) )
+
+#define	STACK_OVERFLOW	(FPU_stackoverflow(&st_new_ptr))
+#define	NOT_EMPTY(i)	(!FPU_empty_i(i))
+
+#define	NOT_EMPTY_ST0	(st0_tag ^ TAG_Empty)
+
+#define poppop() { FPU_pop(); FPU_pop(); }
+
+/* push() does not affect the tags */
+#define push()	{ top--; }
+
+#define signbyte(a) (((u_char *)(a))[9])
+#define getsign(a) (signbyte(a) & 0x80)
+#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
+#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
+                        else signbyte(b) &= 0x7f; }
+#define changesign(a) { signbyte(a) ^= 0x80; }
+#define setpositive(a) { signbyte(a) &= 0x7f; }
+#define setnegative(a) { signbyte(a) |= 0x80; }
+#define signpositive(a) ( (signbyte(a) & 0x80) == 0 )
+#define signnegative(a) (signbyte(a) & 0x80)
+
+static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
+{
+  *(short *)&(y->exp) = *(const short *)&(x->exp); 
+  *(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
+}
+
+#define exponent(x)  (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias)
+#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
+  ((y) + EXTENDED_Ebias) & 0x7fff; }
+#define exponent16(x)         (*(short *)&((x)->exp))
+#define setexponent16(x,y)  { (*(short *)&((x)->exp)) = (y); }
+#define addexponent(x,y)    { (*(short *)&((x)->exp)) += (y); }
+#define stdexp(x)           { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
+
+#define isdenormal(ptr)   (exponent(ptr) == EXP_BIAS+EXP_UNDER)
+
+#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
+
+
+/*----- Prototypes for functions written in assembler -----*/
+/* extern void reg_move(FPU_REG *a, FPU_REG *b); */
+
+asmlinkage int FPU_normalize(FPU_REG *x);
+asmlinkage int FPU_normalize_nuo(FPU_REG *x);
+asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
+			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 int expa, int expb);
+asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
+			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 int expon);
+asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
+			 FPU_REG *answ, unsigned int control_w, u_char sign);
+asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
+			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 int expa, int expb);
+asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2,
+		       unsigned int control_w, u_char sign);
+asmlinkage unsigned	FPU_shrx(void *l, unsigned x);
+asmlinkage unsigned	FPU_shrxs(void *v, unsigned x);
+asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y);
+asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
+			 unsigned int control_w, u_char sign);
+
+#ifndef MAKING_PROTO
+#include "fpu_proto.h"
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _FPU_EMU_H_ */
diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c
new file mode 100644
index 00000000000..d93f16ef828
--- /dev/null
+++ b/arch/i386/math-emu/fpu_entry.c
@@ -0,0 +1,760 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_entry.c                                                              |
+ |                                                                           |
+ | The entry functions for wm-FPU-emu                                        |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1996,1997                                    |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ | See the files "README" and "COPYING" for further copyright and warranty   |
+ | information.                                                              |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | math_emulate(), restore_i387_soft() and save_i387_soft() are the only     |
+ | entry points for wm-FPU-emu.                                              |
+ +---------------------------------------------------------------------------*/
+
+#include <linux/signal.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/desc.h>
+
+#include "fpu_system.h"
+#include "fpu_emu.h"
+#include "exception.h"
+#include "control_w.h"
+#include "status_w.h"
+
+#define __BAD__ FPU_illegal   /* Illegal on an 80486, causes SIGILL */
+
+#ifndef NO_UNDOC_CODE    /* Un-documented FPU op-codes supported by default. */
+
+/* WARNING: These codes are not documented by Intel in their 80486 manual
+   and may not work on FPU clones or later Intel FPUs. */
+
+/* Changes to support the un-doc codes provided by Linus Torvalds. */
+
+#define _d9_d8_ fstp_i    /* unofficial code (19) */
+#define _dc_d0_ fcom_st   /* unofficial code (14) */
+#define _dc_d8_ fcompst   /* unofficial code (1c) */
+#define _dd_c8_ fxch_i    /* unofficial code (0d) */
+#define _de_d0_ fcompst   /* unofficial code (16) */
+#define _df_c0_ ffreep    /* unofficial code (07) ffree + pop */
+#define _df_c8_ fxch_i    /* unofficial code (0f) */
+#define _df_d0_ fstp_i    /* unofficial code (17) */
+#define _df_d8_ fstp_i    /* unofficial code (1f) */
+
+static FUNC const st_instr_table[64] = {
+  fadd__,   fld_i_,     __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  _df_c0_,
+  fmul__,   fxch_i,     __BAD__, __BAD__, fmul_i,  _dd_c8_, fmulp_,  _df_c8_,
+  fcom_st,  fp_nop,     __BAD__, __BAD__, _dc_d0_, fst_i_,  _de_d0_, _df_d0_,
+  fcompst,  _d9_d8_,    __BAD__, __BAD__, _dc_d8_, fstp_i,  fcompp,  _df_d8_,
+  fsub__,   FPU_etc,    __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
+  fsubr_,   fconst,     fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
+  fdiv__,   FPU_triga,  __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
+  fdivr_,   FPU_trigb,  __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
+};
+
+#else     /* Support only documented FPU op-codes */
+
+static FUNC const st_instr_table[64] = {
+  fadd__,   fld_i_,     __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  __BAD__,
+  fmul__,   fxch_i,     __BAD__, __BAD__, fmul_i,  __BAD__, fmulp_,  __BAD__,
+  fcom_st,  fp_nop,     __BAD__, __BAD__, __BAD__, fst_i_,  __BAD__, __BAD__,
+  fcompst,  __BAD__,    __BAD__, __BAD__, __BAD__, fstp_i,  fcompp,  __BAD__,
+  fsub__,   FPU_etc,    __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
+  fsubr_,   fconst,     fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
+  fdiv__,   FPU_triga,  __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
+  fdivr_,   FPU_trigb,  __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
+};
+
+#endif /* NO_UNDOC_CODE */
+
+
+#define _NONE_ 0   /* Take no special action */
+#define _REG0_ 1   /* Need to check for not empty st(0) */
+#define _REGI_ 2   /* Need to check for not empty st(0) and st(rm) */
+#define _REGi_ 0   /* Uses st(rm) */
+#define _PUSH_ 3   /* Need to check for space to push onto stack */
+#define _null_ 4   /* Function illegal or not implemented */
+#define _REGIi 5   /* Uses st(0) and st(rm), result to st(rm) */
+#define _REGIp 6   /* Uses st(0) and st(rm), result to st(rm) then pop */
+#define _REGIc 0   /* Compare st(0) and st(rm) */
+#define _REGIn 0   /* Uses st(0) and st(rm), but handle checks later */
+
+#ifndef NO_UNDOC_CODE
+
+/* Un-documented FPU op-codes supported by default. (see above) */
+
+static u_char const type_table[64] = {
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
+  _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
+  _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+  _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+};
+
+#else     /* Support only documented FPU op-codes */
+
+static u_char const type_table[64] = {
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
+  _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+  _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
+  _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
+  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+};
+
+#endif /* NO_UNDOC_CODE */
+
+
+#ifdef RE_ENTRANT_CHECKING
+u_char emulating=0;
+#endif /* RE_ENTRANT_CHECKING */
+
+static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
+			overrides *override);
+
+asmlinkage void math_emulate(long arg)
+{
+  u_char  FPU_modrm, byte1;
+  unsigned short code;
+  fpu_addr_modes addr_modes;
+  int unmasked;
+  FPU_REG loaded_data;
+  FPU_REG *st0_ptr;
+  u_char	  loaded_tag, st0_tag;
+  void __user *data_address;
+  struct address data_sel_off;
+  struct address entry_sel_off;
+  unsigned long code_base = 0;
+  unsigned long code_limit = 0;  /* Initialized to stop compiler warnings */
+  struct desc_struct code_descriptor;
+
+#ifdef RE_ENTRANT_CHECKING
+  if ( emulating )
+    {
+      printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
+    }
+  RE_ENTRANT_CHECK_ON;
+#endif /* RE_ENTRANT_CHECKING */
+
+  if (!used_math())
+    {
+      finit();
+      set_used_math();
+    }
+
+  SETUP_DATA_AREA(arg);
+
+  FPU_ORIG_EIP = FPU_EIP;
+
+  if ( (FPU_EFLAGS & 0x00020000) != 0 )
+    {
+      /* Virtual 8086 mode */
+      addr_modes.default_mode = VM86;
+      FPU_EIP += code_base = FPU_CS << 4;
+      code_limit = code_base + 0xffff;  /* Assumes code_base <= 0xffff0000 */
+    }
+  else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS )
+    {
+      addr_modes.default_mode = 0;
+    }
+  else if ( FPU_CS == __KERNEL_CS )
+    {
+      printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
+      panic("Math emulation needed in kernel");
+    }
+  else
+    {
+
+      if ( (FPU_CS & 4) != 4 )   /* Must be in the LDT */
+	{
+	  /* Can only handle segmented addressing via the LDT
+	     for now, and it must be 16 bit */
+	  printk("FPU emulator: Unsupported addressing mode\n");
+	  math_abort(FPU_info, SIGILL);
+	}
+
+      code_descriptor = LDT_DESCRIPTOR(FPU_CS);
+      if ( SEG_D_SIZE(code_descriptor) )
+	{
+	  /* The above test may be wrong, the book is not clear */
+	  /* Segmented 32 bit protected mode */
+	  addr_modes.default_mode = SEG32;
+	}
+      else
+	{
+	  /* 16 bit protected mode */
+	  addr_modes.default_mode = PM16;
+	}
+      FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
+      code_limit = code_base
+	+ (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
+	  - 1;
+      if ( code_limit < code_base ) code_limit = 0xffffffff;
+    }
+
+  FPU_lookahead = 1;
+  if (current->ptrace & PT_PTRACED)
+    FPU_lookahead = 0;
+
+  if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
+		     &addr_modes.override) )
+    {
+      RE_ENTRANT_CHECK_OFF;
+      printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
+	     "FPU emulator: self-modifying code! (emulation impossible)\n",
+	     byte1);
+      RE_ENTRANT_CHECK_ON;
+      EXCEPTION(EX_INTERNAL|0x126);
+      math_abort(FPU_info,SIGILL);
+    }
+
+do_another_FPU_instruction:
+
+  no_ip_update = 0;
+
+  FPU_EIP++;  /* We have fetched the prefix and first code bytes. */
+
+  if ( addr_modes.default_mode )
+    {
+      /* This checks for the minimum instruction bytes.
+	 We also need to check any extra (address mode) code access. */
+      if ( FPU_EIP > code_limit )
+	math_abort(FPU_info,SIGSEGV);
+    }
+
+  if ( (byte1 & 0xf8) != 0xd8 )
+    {
+      if ( byte1 == FWAIT_OPCODE )
+	{
+	  if (partial_status & SW_Summary)
+	    goto do_the_FPU_interrupt;
+	  else
+	    goto FPU_fwait_done;
+	}
+#ifdef PARANOID
+      EXCEPTION(EX_INTERNAL|0x128);
+      math_abort(FPU_info,SIGILL);
+#endif /* PARANOID */
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_code_access_ok(1);
+  FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
+  RE_ENTRANT_CHECK_ON;
+  FPU_EIP++;
+
+  if (partial_status & SW_Summary)
+    {
+      /* Ignore the error for now if the current instruction is a no-wait
+	 control instruction */
+      /* The 80486 manual contradicts itself on this topic,
+	 but a real 80486 uses the following instructions:
+	 fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
+       */
+      code = (FPU_modrm << 8) | byte1;
+      if ( ! ( (((code & 0xf803) == 0xe003) ||    /* fnclex, fninit, fnstsw */
+		(((code & 0x3003) == 0x3001) &&   /* fnsave, fnstcw, fnstenv,
+						     fnstsw */
+		 ((code & 0xc000) != 0xc000))) ) )
+	{
+	  /*
+	   *  We need to simulate the action of the kernel to FPU
+	   *  interrupts here.
+	   */
+	do_the_FPU_interrupt:
+
+	  FPU_EIP = FPU_ORIG_EIP;	/* Point to current FPU instruction. */
+
+	  RE_ENTRANT_CHECK_OFF;
+	  current->thread.trap_no = 16;
+	  current->thread.error_code = 0;
+	  send_sig(SIGFPE, current, 1);
+	  return;
+	}
+    }
+
+  entry_sel_off.offset = FPU_ORIG_EIP;
+  entry_sel_off.selector = FPU_CS;
+  entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
+
+  FPU_rm = FPU_modrm & 7;
+
+  if ( FPU_modrm < 0300 )
+    {
+      /* All of these instructions use the mod/rm byte to get a data address */
+
+      if ( (addr_modes.default_mode & SIXTEEN)
+	  ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
+	data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
+					  addr_modes);
+      else
+	data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
+				       addr_modes);
+
+      if ( addr_modes.default_mode )
+	{
+	  if ( FPU_EIP-1 > code_limit )
+	    math_abort(FPU_info,SIGSEGV);
+	}
+
+      if ( !(byte1 & 1) )
+	{
+	  unsigned short status1 = partial_status;
+
+	  st0_ptr = &st(0);
+	  st0_tag = FPU_gettag0();
+
+	  /* Stack underflow has priority */
+	  if ( NOT_EMPTY_ST0 )
+	    {
+	      if ( addr_modes.default_mode & PROTECTED )
+		{
+		  /* This table works for 16 and 32 bit protected mode */
+		  if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
+		    math_abort(FPU_info,SIGSEGV);
+		}
+
+	      unmasked = 0;  /* Do this here to stop compiler warnings. */
+	      switch ( (byte1 >> 1) & 3 )
+		{
+		case 0:
+		  unmasked = FPU_load_single((float __user *)data_address,
+					     &loaded_data);
+		  loaded_tag = unmasked & 0xff;
+		  unmasked &= ~0xff;
+		  break;
+		case 1:
+		  loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
+		  break;
+		case 2:
+		  unmasked = FPU_load_double((double __user *)data_address,
+					     &loaded_data);
+		  loaded_tag = unmasked & 0xff;
+		  unmasked &= ~0xff;
+		  break;
+		case 3:
+		default:  /* Used here to suppress gcc warnings. */
+		  loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
+		  break;
+		}
+
+	      /* No more access to user memory, it is safe
+		 to use static data now */
+
+	      /* NaN operands have the next priority. */
+	      /* We have to delay looking at st(0) until after
+		 loading the data, because that data might contain an SNaN */
+	      if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) ||
+		  ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) )
+		{
+		  /* Restore the status word; we might have loaded a
+		     denormal. */
+		  partial_status = status1;
+		  if ( (FPU_modrm & 0x30) == 0x10 )
+		    {
+		      /* fcom or fcomp */
+		      EXCEPTION(EX_Invalid);
+		      setcc(SW_C3 | SW_C2 | SW_C0);
+		      if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
+			FPU_pop();             /* fcomp, masked, so we pop. */
+		    }
+		  else
+		    {
+		      if ( loaded_tag == TAG_Special )
+			loaded_tag = FPU_Special(&loaded_data);
+#ifdef PECULIAR_486
+		      /* This is not really needed, but gives behaviour
+			 identical to an 80486 */
+		      if ( (FPU_modrm & 0x28) == 0x20 )
+			/* fdiv or fsub */
+			real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data);
+		      else
+#endif /* PECULIAR_486 */ 
+			/* fadd, fdivr, fmul, or fsubr */
+			real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr);
+		    }
+		  goto reg_mem_instr_done;
+		}
+
+	      if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
+		{
+		  /* Is not a comparison instruction. */
+		  if ( (FPU_modrm & 0x38) == 0x38 )
+		    {
+		      /* fdivr */
+		      if ( (st0_tag == TAG_Zero) &&
+			   ((loaded_tag == TAG_Valid)
+			    || (loaded_tag == TAG_Special
+				&& isdenormal(&loaded_data))) )
+			{
+			  if ( FPU_divide_by_zero(0, getsign(&loaded_data))
+			       < 0 )
+			    {
+			      /* We use the fact here that the unmasked
+				 exception in the loaded data was for a
+				 denormal operand */
+			      /* Restore the state of the denormal op bit */
+			      partial_status &= ~SW_Denorm_Op;
+			      partial_status |= status1 & SW_Denorm_Op;
+			    }
+			  else
+			    setsign(st0_ptr, getsign(&loaded_data));
+			}
+		    }
+		  goto reg_mem_instr_done;
+		}
+
+	      switch ( (FPU_modrm >> 3) & 7 )
+		{
+		case 0:         /* fadd */
+		  clear_C1();
+		  FPU_add(&loaded_data, loaded_tag, 0, control_word);
+		  break;
+		case 1:         /* fmul */
+		  clear_C1();
+		  FPU_mul(&loaded_data, loaded_tag, 0, control_word);
+		  break;
+		case 2:         /* fcom */
+		  FPU_compare_st_data(&loaded_data, loaded_tag);
+		  break;
+		case 3:         /* fcomp */
+		  if ( !FPU_compare_st_data(&loaded_data, loaded_tag)
+		       && !unmasked )
+		    FPU_pop();
+		  break;
+		case 4:         /* fsub */
+		  clear_C1();
+		  FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word);
+		  break;
+		case 5:         /* fsubr */
+		  clear_C1();
+		  FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
+		  break;
+		case 6:         /* fdiv */
+		  clear_C1();
+		  FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word);
+		  break;
+		case 7:         /* fdivr */
+		  clear_C1();
+		  if ( st0_tag == TAG_Zero )
+		    partial_status = status1;  /* Undo any denorm tag,
+						  zero-divide has priority. */
+		  FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
+		  break;
+		}
+	    }
+	  else
+	    {
+	      if ( (FPU_modrm & 0x30) == 0x10 )
+		{
+		  /* The instruction is fcom or fcomp */
+		  EXCEPTION(EX_StackUnder);
+		  setcc(SW_C3 | SW_C2 | SW_C0);
+		  if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
+		    FPU_pop();             /* fcomp */
+		}
+	      else
+		FPU_stack_underflow();
+	    }
+	reg_mem_instr_done:
+	  operand_address = data_sel_off;
+	}
+      else
+	{
+	  if ( !(no_ip_update =
+		 FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
+				addr_modes, data_address)) )
+	    {
+	      operand_address = data_sel_off;
+	    }
+	}
+
+    }
+  else
+    {
+      /* None of these instructions access user memory */
+      u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
+
+#ifdef PECULIAR_486
+      /* This is supposed to be undefined, but a real 80486 seems
+	 to do this: */
+      operand_address.offset = 0;
+      operand_address.selector = FPU_DS;
+#endif /* PECULIAR_486 */
+
+      st0_ptr = &st(0);
+      st0_tag = FPU_gettag0();
+      switch ( type_table[(int) instr_index] )
+	{
+	case _NONE_:   /* also _REGIc: _REGIn */
+	  break;
+	case _REG0_:
+	  if ( !NOT_EMPTY_ST0 )
+	    {
+	      FPU_stack_underflow();
+	      goto FPU_instruction_done;
+	    }
+	  break;
+	case _REGIi:
+	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
+	    {
+	      FPU_stack_underflow_i(FPU_rm);
+	      goto FPU_instruction_done;
+	    }
+	  break;
+	case _REGIp:
+	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
+	    {
+	      FPU_stack_underflow_pop(FPU_rm);
+	      goto FPU_instruction_done;
+	    }
+	  break;
+	case _REGI_:
+	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
+	    {
+	      FPU_stack_underflow();
+	      goto FPU_instruction_done;
+	    }
+	  break;
+	case _PUSH_:     /* Only used by the fld st(i) instruction */
+	  break;
+	case _null_:
+	  FPU_illegal();
+	  goto FPU_instruction_done;
+	default:
+	  EXCEPTION(EX_INTERNAL|0x111);
+	  goto FPU_instruction_done;
+	}
+      (*st_instr_table[(int) instr_index])();
+
+FPU_instruction_done:
+      ;
+    }
+
+  if ( ! no_ip_update )
+    instruction_address = entry_sel_off;
+
+FPU_fwait_done:
+
+#ifdef DEBUG
+  RE_ENTRANT_CHECK_OFF;
+  FPU_printall();
+  RE_ENTRANT_CHECK_ON;
+#endif /* DEBUG */
+
+  if (FPU_lookahead && !need_resched())
+    {
+      FPU_ORIG_EIP = FPU_EIP - code_base;
+      if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
+			&addr_modes.override) )
+	goto do_another_FPU_instruction;
+    }
+
+  if ( addr_modes.default_mode )
+    FPU_EIP -= code_base;
+
+  RE_ENTRANT_CHECK_OFF;
+}
+
+
+/* Support for prefix bytes is not yet complete. To properly handle
+   all prefix bytes, further changes are needed in the emulator code
+   which accesses user address space. Access to separate segments is
+   important for msdos emulation. */
+static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
+			overrides *override)
+{
+  u_char byte;
+  u_char __user *ip = *fpu_eip;
+
+  *override = (overrides) { 0, 0, PREFIX_DEFAULT };       /* defaults */
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_code_access_ok(1);
+  FPU_get_user(byte, ip);
+  RE_ENTRANT_CHECK_ON;
+
+  while ( 1 )
+    {
+      switch ( byte )
+	{
+	case ADDR_SIZE_PREFIX:
+	  override->address_size = ADDR_SIZE_PREFIX;
+	  goto do_next_byte;
+
+	case OP_SIZE_PREFIX:
+	  override->operand_size = OP_SIZE_PREFIX;
+	  goto do_next_byte;
+
+	case PREFIX_CS:
+	  override->segment = PREFIX_CS_;
+	  goto do_next_byte;
+	case PREFIX_ES:
+	  override->segment = PREFIX_ES_;
+	  goto do_next_byte;
+	case PREFIX_SS:
+	  override->segment = PREFIX_SS_;
+	  goto do_next_byte;
+	case PREFIX_FS:
+	  override->segment = PREFIX_FS_;
+	  goto do_next_byte;
+	case PREFIX_GS:
+	  override->segment = PREFIX_GS_;
+	  goto do_next_byte;
+	case PREFIX_DS:
+	  override->segment = PREFIX_DS_;
+	  goto do_next_byte;
+
+/* lock is not a valid prefix for FPU instructions,
+   let the cpu handle it to generate a SIGILL. */
+/*	case PREFIX_LOCK: */
+
+	  /* rep.. prefixes have no meaning for FPU instructions */
+	case PREFIX_REPE:
+	case PREFIX_REPNE:
+
+	do_next_byte:
+	  ip++;
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_code_access_ok(1);
+	  FPU_get_user(byte, ip);
+	  RE_ENTRANT_CHECK_ON;
+	  break;
+	case FWAIT_OPCODE:
+	  *Byte = byte;
+	  return 1;
+	default:
+	  if ( (byte & 0xf8) == 0xd8 )
+	    {
+	      *Byte = byte;
+	      *fpu_eip = ip;
+	      return 1;
+	    }
+	  else
+	    {
+	      /* Not a valid sequence of prefix bytes followed by
+		 an FPU instruction. */
+	      *Byte = byte;  /* Needed for error message. */
+	      return 0;
+	    }
+	}
+    }
+}
+
+
+void math_abort(struct info * info, unsigned int signal)
+{
+	FPU_EIP = FPU_ORIG_EIP;
+	current->thread.trap_no = 16;
+	current->thread.error_code = 0;
+	send_sig(signal,current,1);
+	RE_ENTRANT_CHECK_OFF;
+	__asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
+#ifdef PARANOID
+      printk("ERROR: wm-FPU-emu math_abort failed!\n");
+#endif /* PARANOID */
+}
+
+
+
+#define S387 ((struct i387_soft_struct *)s387)
+#define sstatus_word() \
+  ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
+
+int restore_i387_soft(void *s387, struct _fpstate __user *buf)
+{
+  u_char __user *d = (u_char __user *)buf;
+  int offset, other, i, tags, regnr, tag, newtop;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10);
+  if (__copy_from_user(&S387->cwd, d, 7*4))
+    return -1;
+  RE_ENTRANT_CHECK_ON;
+
+  d += 7*4;
+
+  S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
+  offset = (S387->ftop & 7) * 10;
+  other = 80 - offset;
+
+  RE_ENTRANT_CHECK_OFF;
+  /* Copy all registers in stack order. */
+  if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other))
+    return -1;
+  if ( offset )
+    if (__copy_from_user((u_char *)&S387->st_space, d+other, offset))
+      return -1;
+  RE_ENTRANT_CHECK_ON;
+
+  /* The tags may need to be corrected now. */
+  tags = S387->twd;
+  newtop = S387->ftop;
+  for ( i = 0; i < 8; i++ )
+    {
+      regnr = (i+newtop) & 7;
+      if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty )
+	{
+	  /* The loaded data over-rides all other cases. */
+	  tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr));
+	  tags &= ~(3 << (regnr*2));
+	  tags |= (tag & 3) << (regnr*2);
+	}
+    }
+  S387->twd = tags;
+
+  return 0;
+}
+
+
+int save_i387_soft(void *s387, struct _fpstate __user * buf)
+{
+  u_char __user *d = (u_char __user *)buf;
+  int offset = (S387->ftop & 7) * 10, other = 80 - offset;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10);
+#ifdef PECULIAR_486
+  S387->cwd &= ~0xe080;
+  /* An 80486 sets nearly all of the reserved bits to 1. */
+  S387->cwd |= 0xffff0040;
+  S387->swd = sstatus_word() | 0xffff0000;
+  S387->twd |= 0xffff0000;
+  S387->fcs &= ~0xf8000000;
+  S387->fos |= 0xffff0000;
+#endif /* PECULIAR_486 */
+  __copy_to_user(d, &S387->cwd, 7*4);
+  RE_ENTRANT_CHECK_ON;
+
+  d += 7*4;
+
+  RE_ENTRANT_CHECK_OFF;
+  /* Copy all registers in stack order. */
+  if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other))
+    return -1;
+  if ( offset )
+    if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset))
+      return -1
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
diff --git a/arch/i386/math-emu/fpu_etc.c b/arch/i386/math-emu/fpu_etc.c
new file mode 100644
index 00000000000..e3b5d465587
--- /dev/null
+++ b/arch/i386/math-emu/fpu_etc.c
@@ -0,0 +1,143 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_etc.c                                                                |
+ |                                                                           |
+ | Implement a few FPU instructions.                                         |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@suburbia.net             |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "reg_constant.h"
+
+
+static void fchs(FPU_REG *st0_ptr, u_char st0tag)
+{
+  if ( st0tag ^ TAG_Empty )
+    {
+      signbyte(st0_ptr) ^= SIGN_NEG;
+      clear_C1();
+    }
+  else
+    FPU_stack_underflow();
+}
+
+
+static void fabs(FPU_REG *st0_ptr, u_char st0tag)
+{
+  if ( st0tag ^ TAG_Empty )
+    {
+      setpositive(st0_ptr);
+      clear_C1();
+    }
+  else
+    FPU_stack_underflow();
+}
+
+
+static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
+{
+  switch (st0tag)
+    {
+    case TAG_Zero:
+      setcc(SW_C3);
+      break;
+    case TAG_Valid:
+      if (getsign(st0_ptr) == SIGN_POS)
+        setcc(0);
+      else
+        setcc(SW_C0);
+      break;
+    case TAG_Special:
+      switch ( FPU_Special(st0_ptr) )
+	{
+	case TW_Denormal:
+	  if (getsign(st0_ptr) == SIGN_POS)
+	    setcc(0);
+	  else
+	    setcc(SW_C0);
+	  if ( denormal_operand() < 0 )
+	    {
+#ifdef PECULIAR_486
+	      /* This is weird! */
+	      if (getsign(st0_ptr) == SIGN_POS)
+		setcc(SW_C3);
+#endif /* PECULIAR_486 */
+	      return;
+	    }
+	  break;
+	case TW_NaN:
+	  setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
+	  EXCEPTION(EX_Invalid);
+	  break;
+	case TW_Infinity:
+	  if (getsign(st0_ptr) == SIGN_POS)
+	    setcc(0);
+	  else
+	    setcc(SW_C0);
+	  break;
+	default:
+	  setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
+	  EXCEPTION(EX_INTERNAL|0x14);
+	  break;
+	}
+      break;
+    case TAG_Empty:
+      setcc(SW_C0|SW_C2|SW_C3);
+      EXCEPTION(EX_StackUnder);
+      break;
+    }
+}
+
+
+static void fxam(FPU_REG *st0_ptr, u_char st0tag)
+{
+  int c = 0;
+  switch (st0tag)
+    {
+    case TAG_Empty:
+      c = SW_C3|SW_C0;
+      break;
+    case TAG_Zero:
+      c = SW_C3;
+      break;
+    case TAG_Valid:
+      c = SW_C2;
+      break;
+    case TAG_Special:
+      switch ( FPU_Special(st0_ptr) )
+	{
+	case TW_Denormal:
+	  c = SW_C2|SW_C3;  /* Denormal */
+	  break;
+	case TW_NaN:
+	  /* We also use NaN for unsupported types. */
+	  if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) )
+	    c = SW_C0;
+	  break;
+	case TW_Infinity:
+	  c = SW_C2|SW_C0;
+	  break;
+	}
+    }
+  if ( getsign(st0_ptr) == SIGN_NEG )
+    c |= SW_C1;
+  setcc(c);
+}
+
+
+static FUNC_ST0 const fp_etc_table[] = {
+  fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
+  ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
+};
+
+void FPU_etc(void)
+{
+  (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0());
+}
diff --git a/arch/i386/math-emu/fpu_proto.h b/arch/i386/math-emu/fpu_proto.h
new file mode 100644
index 00000000000..37a8a7fe7e2
--- /dev/null
+++ b/arch/i386/math-emu/fpu_proto.h
@@ -0,0 +1,140 @@
+#ifndef _FPU_PROTO_H
+#define _FPU_PROTO_H
+
+/* errors.c */
+extern void FPU_illegal(void);
+extern void FPU_printall(void);
+asmlinkage void FPU_exception(int n);
+extern int real_1op_NaN(FPU_REG *a);
+extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr,
+			FPU_REG const *defaultNaN);
+asmlinkage int arith_invalid(int deststnr);
+asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign);
+extern int set_precision_flag(int flags);
+asmlinkage void set_precision_flag_up(void);
+asmlinkage void set_precision_flag_down(void);
+asmlinkage int denormal_operand(void);
+asmlinkage int arith_overflow(FPU_REG *dest);
+asmlinkage int arith_underflow(FPU_REG *dest);
+extern void FPU_stack_overflow(void);
+extern void FPU_stack_underflow(void);
+extern void FPU_stack_underflow_i(int i);
+extern void FPU_stack_underflow_pop(int i);
+/* fpu_arith.c */
+extern void fadd__(void);
+extern void fmul__(void);
+extern void fsub__(void);
+extern void fsubr_(void);
+extern void fdiv__(void);
+extern void fdivr_(void);
+extern void fadd_i(void);
+extern void fmul_i(void);
+extern void fsubri(void);
+extern void fsub_i(void);
+extern void fdivri(void);
+extern void fdiv_i(void);
+extern void faddp_(void);
+extern void fmulp_(void);
+extern void fsubrp(void);
+extern void fsubp_(void);
+extern void fdivrp(void);
+extern void fdivp_(void);
+/* fpu_aux.c */
+extern void finit(void);
+extern void finit_(void);
+extern void fstsw_(void);
+extern void fp_nop(void);
+extern void fld_i_(void);
+extern void fxch_i(void);
+extern void ffree_(void);
+extern void ffreep(void);
+extern void fst_i_(void);
+extern void fstp_i(void);
+/* fpu_entry.c */
+asmlinkage extern void math_emulate(long arg);
+extern void math_abort(struct info *info, unsigned int signal);
+/* fpu_etc.c */
+extern void FPU_etc(void);
+/* fpu_tags.c */
+extern int FPU_gettag0(void);
+extern int FPU_gettagi(int stnr);
+extern int FPU_gettag(int regnr);
+extern void FPU_settag0(int tag);
+extern void FPU_settagi(int stnr, int tag);
+extern void FPU_settag(int regnr, int tag);
+extern int FPU_Special(FPU_REG const *ptr);
+extern int isNaN(FPU_REG const *ptr);
+extern void FPU_pop(void);
+extern int FPU_empty_i(int stnr);
+extern int FPU_stackoverflow(FPU_REG **st_new_ptr);
+extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr);
+extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag);
+extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag);
+/* fpu_trig.c */
+extern void FPU_triga(void);
+extern void FPU_trigb(void);
+/* get_address.c */
+extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
+			 struct address *addr, fpu_addr_modes addr_modes);
+extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
+			    struct address *addr, fpu_addr_modes addr_modes);
+/* load_store.c */
+extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
+			    void __user *data_address);
+/* poly_2xm1.c */
+extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result);
+/* poly_atan.c */
+extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
+		      u_char st1_tag);
+/* poly_l2.c */
+extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
+extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
+		     FPU_REG *d);
+/* poly_sin.c */
+extern void poly_sine(FPU_REG *st0_ptr);
+extern void poly_cos(FPU_REG *st0_ptr);
+/* poly_tan.c */
+extern void poly_tan(FPU_REG *st0_ptr);
+/* reg_add_sub.c */
+extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w);
+extern int FPU_sub(int flags, int rm, int control_w);
+/* reg_compare.c */
+extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag);
+extern void fcom_st(void);
+extern void fcompst(void);
+extern void fcompp(void);
+extern void fucom_(void);
+extern void fucomp(void);
+extern void fucompp(void);
+/* reg_constant.c */
+extern void fconst(void);
+/* reg_ld_str.c */
+extern int FPU_load_extended(long double __user *s, int stnr);
+extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data);
+extern int FPU_load_single(float __user *single, FPU_REG *loaded_data);
+extern int FPU_load_int64(long long __user *_s);
+extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data);
+extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data);
+extern int FPU_load_bcd(u_char __user *s);
+extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
+			      long double __user *d);
+extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat);
+extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single);
+extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d);
+extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d);
+extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
+extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
+extern int FPU_round_to_int(FPU_REG *r, u_char tag);
+extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s);
+extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address);
+extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d);
+extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address);
+extern int FPU_tagof(FPU_REG *ptr);
+/* reg_mul.c */
+extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w);
+
+extern int FPU_div(int flags, int regrm, int control_w);
+/* reg_convert.c */
+extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x);
+#endif /* _FPU_PROTO_H */
+
diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h
new file mode 100644
index 00000000000..bf26341c8bd
--- /dev/null
+++ b/arch/i386/math-emu/fpu_system.h
@@ -0,0 +1,89 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_system.h                                                             |
+ |                                                                           |
+ | Copyright (C) 1992,1994,1997                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@suburbia.net             |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _FPU_SYSTEM_H
+#define _FPU_SYSTEM_H
+
+/* system dependent definitions */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+/* This sets the pointer FPU_info to point to the argument part
+   of the stack frame of math_emulate() */
+#define SETUP_DATA_AREA(arg)	FPU_info = (struct info *) &arg
+
+/* s is always from a cpu register, and the cpu does bounds checking
+ * during register load --> no further bounds checks needed */
+#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
+#define SEG_D_SIZE(x)		((x).b & (3 << 21))
+#define SEG_G_BIT(x)		((x).b & (1 << 23))
+#define SEG_GRANULARITY(x)	(((x).b & (1 << 23)) ? 4096 : 1)
+#define SEG_286_MODE(x)		((x).b & ( 0xff000000 | 0xf0000 | (1 << 23)))
+#define SEG_BASE_ADDR(s)	(((s).b & 0xff000000) \
+				 | (((s).b & 0xff) << 16) | ((s).a >> 16))
+#define SEG_LIMIT(s)		(((s).b & 0xff0000) | ((s).a & 0xffff))
+#define SEG_EXECUTE_ONLY(s)	(((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
+#define SEG_WRITE_PERM(s)	(((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
+#define SEG_EXPAND_DOWN(s)	(((s).b & ((1 << 11) | (1 << 10))) \
+				 == (1 << 10))
+
+#define I387			(current->thread.i387)
+#define FPU_info		(I387.soft.info)
+
+#define FPU_CS			(*(unsigned short *) &(FPU_info->___cs))
+#define FPU_SS			(*(unsigned short *) &(FPU_info->___ss))
+#define FPU_DS			(*(unsigned short *) &(FPU_info->___ds))
+#define FPU_EAX			(FPU_info->___eax)
+#define FPU_EFLAGS		(FPU_info->___eflags)
+#define FPU_EIP			(FPU_info->___eip)
+#define FPU_ORIG_EIP		(FPU_info->___orig_eip)
+
+#define FPU_lookahead           (I387.soft.lookahead)
+
+/* nz if ip_offset and cs_selector are not to be set for the current
+   instruction. */
+#define no_ip_update		(*(u_char *)&(I387.soft.no_update))
+#define FPU_rm			(*(u_char *)&(I387.soft.rm))
+
+/* Number of bytes of data which can be legally accessed by the current
+   instruction. This only needs to hold a number <= 108, so a byte will do. */
+#define access_limit		(*(u_char *)&(I387.soft.alimit))
+
+#define partial_status		(I387.soft.swd)
+#define control_word		(I387.soft.cwd)
+#define fpu_tag_word		(I387.soft.twd)
+#define registers		(I387.soft.st_space)
+#define top			(I387.soft.ftop)
+
+#define instruction_address	(*(struct address *)&I387.soft.fip)
+#define operand_address		(*(struct address *)&I387.soft.foo)
+
+#define FPU_access_ok(x,y,z)	if ( !access_ok(x,y,z) ) \
+				math_abort(FPU_info,SIGSEGV)
+
+#undef FPU_IGNORE_CODE_SEGV
+#ifdef FPU_IGNORE_CODE_SEGV
+/* access_ok() is very expensive, and causes the emulator to run
+   about 20% slower if applied to the code. Anyway, errors due to bad
+   code addresses should be much rarer than errors due to bad data
+   addresses. */
+#define	FPU_code_access_ok(z)
+#else
+/* A simpler test than access_ok() can probably be done for
+   FPU_code_access_ok() because the only possible error is to step
+   past the upper boundary of a legal code area. */
+#define	FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z)
+#endif
+
+#define FPU_get_user(x,y)       get_user((x),(y))
+#define FPU_put_user(x,y)       put_user((x),(y))
+
+#endif
diff --git a/arch/i386/math-emu/fpu_tags.c b/arch/i386/math-emu/fpu_tags.c
new file mode 100644
index 00000000000..cb436fe20e4
--- /dev/null
+++ b/arch/i386/math-emu/fpu_tags.c
@@ -0,0 +1,127 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_tags.c                                                               |
+ |                                                                           |
+ |  Set FPU register tags.                                                   |
+ |                                                                           |
+ | Copyright (C) 1997                                                        |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@jacobi.maths.monash.edu.au                |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_emu.h"
+#include "fpu_system.h"
+#include "exception.h"
+
+
+void FPU_pop(void)
+{
+  fpu_tag_word |= 3 << ((top & 7)*2);
+  top++;
+}
+
+
+int FPU_gettag0(void)
+{
+  return (fpu_tag_word >> ((top & 7)*2)) & 3;
+}
+
+
+int FPU_gettagi(int stnr)
+{
+  return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3;
+}
+
+
+int FPU_gettag(int regnr)
+{
+  return (fpu_tag_word >> ((regnr & 7)*2)) & 3;
+}
+
+
+void FPU_settag0(int tag)
+{
+  int regnr = top;
+  regnr &= 7;
+  fpu_tag_word &= ~(3 << (regnr*2));
+  fpu_tag_word |= (tag & 3) << (regnr*2);
+}
+
+
+void FPU_settagi(int stnr, int tag)
+{
+  int regnr = stnr+top;
+  regnr &= 7;
+  fpu_tag_word &= ~(3 << (regnr*2));
+  fpu_tag_word |= (tag & 3) << (regnr*2);
+}
+
+
+void FPU_settag(int regnr, int tag)
+{
+  regnr &= 7;
+  fpu_tag_word &= ~(3 << (regnr*2));
+  fpu_tag_word |= (tag & 3) << (regnr*2);
+}
+
+
+int FPU_Special(FPU_REG const *ptr)
+{
+  int exp = exponent(ptr);
+
+  if ( exp == EXP_BIAS+EXP_UNDER )
+    return TW_Denormal;
+  else if ( exp != EXP_BIAS+EXP_OVER )
+    return TW_NaN;
+  else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) )
+    return TW_Infinity;
+  return TW_NaN;
+}
+
+
+int isNaN(FPU_REG const *ptr)
+{
+  return ( (exponent(ptr) == EXP_BIAS+EXP_OVER)
+	   && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) );
+}
+
+
+int FPU_empty_i(int stnr)
+{
+  int regnr = (top+stnr) & 7;
+
+  return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty;
+}
+
+
+int FPU_stackoverflow(FPU_REG **st_new_ptr)
+{
+  *st_new_ptr = &st(-1);
+
+  return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty;
+}
+
+
+void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr)
+{
+  reg_copy(r, &st(stnr));
+  FPU_settagi(stnr, tag);
+}
+
+void FPU_copy_to_reg1(FPU_REG const *r, u_char tag)
+{
+  reg_copy(r, &st(1));
+  FPU_settagi(1, tag);
+}
+
+void FPU_copy_to_reg0(FPU_REG const *r, u_char tag)
+{
+  int regnr = top;
+  regnr &= 7;
+
+  reg_copy(r, &st(0));
+
+  fpu_tag_word &= ~(3 << (regnr*2));
+  fpu_tag_word |= (tag & 3) << (regnr*2);
+}
diff --git a/arch/i386/math-emu/fpu_trig.c b/arch/i386/math-emu/fpu_trig.c
new file mode 100644
index 00000000000..403cbde1d42
--- /dev/null
+++ b/arch/i386/math-emu/fpu_trig.c
@@ -0,0 +1,1845 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_trig.c                                                               |
+ |                                                                           |
+ | Implementation of the FPU "transcendental" functions.                     |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997,1999                                    |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@melbpc.org.au            |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "control_w.h"
+#include "reg_constant.h"	
+
+static void rem_kernel(unsigned long long st0, unsigned long long *y,
+		       unsigned long long st1,
+		       unsigned long long q, int n);
+
+#define BETTER_THAN_486
+
+#define FCOS  4
+
+/* Used only by fptan, fsin, fcos, and fsincos. */
+/* This routine produces very accurate results, similar to
+   using a value of pi with more than 128 bits precision. */
+/* Limited measurements show no results worse than 64 bit precision
+   except for the results for arguments close to 2^63, where the
+   precision of the result sometimes degrades to about 63.9 bits */
+static int trig_arg(FPU_REG *st0_ptr, int even)
+{
+  FPU_REG tmp;
+  u_char tmptag;
+  unsigned long long q;
+  int old_cw = control_word, saved_status = partial_status;
+  int tag, st0_tag = TAG_Valid;
+
+  if ( exponent(st0_ptr) >= 63 )
+    {
+      partial_status |= SW_C2;     /* Reduction incomplete. */
+      return -1;
+    }
+
+  control_word &= ~CW_RC;
+  control_word |= RC_CHOP;
+
+  setpositive(st0_ptr);
+  tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
+		  SIGN_POS);
+
+  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't overflow
+				   to 2^64 */
+  q = significand(&tmp);
+  if ( q )
+    {
+      rem_kernel(significand(st0_ptr),
+		 &significand(&tmp),
+		 significand(&CONST_PI2),
+		 q, exponent(st0_ptr) - exponent(&CONST_PI2));
+      setexponent16(&tmp, exponent(&CONST_PI2));
+      st0_tag = FPU_normalize(&tmp);
+      FPU_copy_to_reg0(&tmp, st0_tag);
+    }
+
+  if ( (even && !(q & 1)) || (!even && (q & 1)) )
+    {
+      st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION);
+
+#ifdef BETTER_THAN_486
+      /* So far, the results are exact but based upon a 64 bit
+	 precision approximation to pi/2. The technique used
+	 now is equivalent to using an approximation to pi/2 which
+	 is accurate to about 128 bits. */
+      if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) )
+	{
+	  /* This code gives the effect of having pi/2 to better than
+	     128 bits precision. */
+
+	  significand(&tmp) = q + 1;
+	  setexponent16(&tmp, 63);
+	  FPU_normalize(&tmp);
+	  tmptag =
+	    FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS,
+		      exponent(&CONST_PI2extra) + exponent(&tmp));
+	  setsign(&tmp, getsign(&CONST_PI2extra));
+	  st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
+	  if ( signnegative(st0_ptr) )
+	    {
+	      /* CONST_PI2extra is negative, so the result of the addition
+		 can be negative. This means that the argument is actually
+		 in a different quadrant. The correction is always < pi/2,
+		 so it can't overflow into yet another quadrant. */
+	      setpositive(st0_ptr);
+	      q++;
+	    }
+	}
+#endif /* BETTER_THAN_486 */
+    }
+#ifdef BETTER_THAN_486
+  else
+    {
+      /* So far, the results are exact but based upon a 64 bit
+	 precision approximation to pi/2. The technique used
+	 now is equivalent to using an approximation to pi/2 which
+	 is accurate to about 128 bits. */
+      if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
+	   || (q > 1) )
+	{
+	  /* This code gives the effect of having p/2 to better than
+	     128 bits precision. */
+
+	  significand(&tmp) = q;
+	  setexponent16(&tmp, 63);
+	  FPU_normalize(&tmp);         /* This must return TAG_Valid */
+	  tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION,
+			     SIGN_POS,
+			     exponent(&CONST_PI2extra) + exponent(&tmp));
+	  setsign(&tmp, getsign(&CONST_PI2extra));
+	  st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp,
+			    FULL_PRECISION);
+	  if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) &&
+	      ((st0_ptr->sigh > CONST_PI2.sigh)
+	       || ((st0_ptr->sigh == CONST_PI2.sigh)
+		   && (st0_ptr->sigl > CONST_PI2.sigl))) )
+	    {
+	      /* CONST_PI2extra is negative, so the result of the
+		 subtraction can be larger than pi/2. This means
+		 that the argument is actually in a different quadrant.
+		 The correction is always < pi/2, so it can't overflow
+		 into yet another quadrant. */
+	      st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2,
+				FULL_PRECISION);
+	      q++;
+	    }
+	}
+    }
+#endif /* BETTER_THAN_486 */
+
+  FPU_settag0(st0_tag);
+  control_word = old_cw;
+  partial_status = saved_status & ~SW_C2;     /* Reduction complete. */
+
+  return (q & 3) | even;
+}
+
+
+/* Convert a long to register */
+static void convert_l2reg(long const *arg, int deststnr)
+{
+  int tag;
+  long num = *arg;
+  u_char sign;
+  FPU_REG *dest = &st(deststnr);
+
+  if (num == 0)
+    {
+      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+      return;
+    }
+
+  if (num > 0)
+    { sign = SIGN_POS; }
+  else
+    { num = -num; sign = SIGN_NEG; }
+
+  dest->sigh = num;
+  dest->sigl = 0;
+  setexponent16(dest, 31);
+  tag = FPU_normalize(dest);
+  FPU_settagi(deststnr, tag);
+  setsign(dest, sign);
+  return;
+}
+
+
+static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  if ( st0_tag == TAG_Empty )
+    FPU_stack_underflow();  /* Puts a QNaN in st(0) */
+  else if ( st0_tag == TW_NaN )
+    real_1op_NaN(st0_ptr);       /* return with a NaN in st(0) */
+#ifdef PARANOID
+  else
+    EXCEPTION(EX_INTERNAL|0x0112);
+#endif /* PARANOID */
+}
+
+
+static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  int isNaN;
+
+  switch ( st0_tag )
+    {
+    case TW_NaN:
+      isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000);
+      if ( isNaN && !(st0_ptr->sigh & 0x40000000) )   /* Signaling ? */
+	{
+	  EXCEPTION(EX_Invalid);
+	  if ( control_word & CW_Invalid )
+	    {
+	      /* The masked response */
+	      /* Convert to a QNaN */
+	      st0_ptr->sigh |= 0x40000000;
+	      push();
+	      FPU_copy_to_reg0(st0_ptr, TAG_Special);
+	    }
+	}
+      else if ( isNaN )
+	{
+	  /* A QNaN */
+	  push();
+	  FPU_copy_to_reg0(st0_ptr, TAG_Special);
+	}
+      else
+	{
+	  /* pseudoNaN or other unsupported */
+	  EXCEPTION(EX_Invalid);
+	  if ( control_word & CW_Invalid )
+	    {
+	      /* The masked response */
+	      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+	      push();
+	      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+	    }
+	}
+      break;              /* return with a NaN in st(0) */
+#ifdef PARANOID
+    default:
+      EXCEPTION(EX_INTERNAL|0x0112);
+#endif /* PARANOID */
+    }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static void f2xm1(FPU_REG *st0_ptr, u_char tag)
+{
+  FPU_REG a;
+
+  clear_C1();
+
+  if ( tag == TAG_Valid )
+    {
+      /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
+      if ( exponent(st0_ptr) < 0 )
+	{
+	denormal_arg:
+
+	  FPU_to_exp16(st0_ptr, &a);
+
+	  /* poly_2xm1(x) requires 0 < st(0) < 1. */
+	  poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
+	}
+      set_precision_flag_up();   /* 80486 appears to always do this */
+      return;
+    }
+
+  if ( tag == TAG_Zero )
+    return;
+
+  if ( tag == TAG_Special )
+    tag = FPU_Special(st0_ptr);
+
+  switch ( tag )
+    {
+    case TW_Denormal:
+      if ( denormal_operand() < 0 )
+	return;
+      goto denormal_arg;
+    case TW_Infinity:
+      if ( signnegative(st0_ptr) )
+	{
+	  /* -infinity gives -1 (p16-10) */
+	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+	  setnegative(st0_ptr);
+	}
+      return;
+    default:
+      single_arg_error(st0_ptr, tag);
+    }
+}
+
+
+static void fptan(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  FPU_REG *st_new_ptr;
+  int q;
+  u_char arg_sign = getsign(st0_ptr);
+
+  /* Stack underflow has higher priority */
+  if ( st0_tag == TAG_Empty )
+    {
+      FPU_stack_underflow();  /* Puts a QNaN in st(0) */
+      if ( control_word & CW_Invalid )
+	{
+	  st_new_ptr = &st(-1);
+	  push();
+	  FPU_stack_underflow();  /* Puts a QNaN in the new st(0) */
+	}
+      return;
+    }
+
+  if ( STACK_OVERFLOW )
+    { FPU_stack_overflow(); return; }
+
+  if ( st0_tag == TAG_Valid )
+    {
+      if ( exponent(st0_ptr) > -40 )
+	{
+	  if ( (q = trig_arg(st0_ptr, 0)) == -1 )
+	    {
+	      /* Operand is out of range */
+	      return;
+	    }
+
+	  poly_tan(st0_ptr);
+	  setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
+	  set_precision_flag_up();  /* We do not really know if up or down */
+	}
+      else
+	{
+	  /* For a small arg, the result == the argument */
+	  /* Underflow may happen */
+
+	denormal_arg:
+
+	  FPU_to_exp16(st0_ptr, st0_ptr);
+      
+	  st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
+	  FPU_settag0(st0_tag);
+	}
+      push();
+      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+      return;
+    }
+
+  if ( st0_tag == TAG_Zero )
+    {
+      push();
+      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+      setcc(0);
+      return;
+    }
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+
+  if ( st0_tag == TW_Denormal )
+    {
+      if ( denormal_operand() < 0 )
+	return;
+
+      goto denormal_arg;
+    }
+
+  if ( st0_tag == TW_Infinity )
+    {
+      /* The 80486 treats infinity as an invalid operand */
+      if ( arith_invalid(0) >= 0 )
+	{
+	  st_new_ptr = &st(-1);
+	  push();
+	  arith_invalid(0);
+	}
+      return;
+    }
+
+  single_arg_2_error(st0_ptr, st0_tag);
+}
+
+
+static void fxtract(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  FPU_REG *st_new_ptr;
+  u_char sign;
+  register FPU_REG *st1_ptr = st0_ptr;  /* anticipate */
+
+  if ( STACK_OVERFLOW )
+    {  FPU_stack_overflow(); return; }
+
+  clear_C1();
+
+  if ( st0_tag == TAG_Valid )
+    {
+      long e;
+
+      push();
+      sign = getsign(st1_ptr);
+      reg_copy(st1_ptr, st_new_ptr);
+      setexponent16(st_new_ptr, exponent(st_new_ptr));
+
+    denormal_arg:
+
+      e = exponent16(st_new_ptr);
+      convert_l2reg(&e, 1);
+      setexponentpos(st_new_ptr, 0);
+      setsign(st_new_ptr, sign);
+      FPU_settag0(TAG_Valid);       /* Needed if arg was a denormal */
+      return;
+    }
+  else if ( st0_tag == TAG_Zero )
+    {
+      sign = getsign(st0_ptr);
+
+      if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 )
+	return;
+
+      push();
+      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+      setsign(st_new_ptr, sign);
+      return;
+    }
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+
+  if ( st0_tag == TW_Denormal )
+    {
+      if (denormal_operand() < 0 )
+	return;
+
+      push();
+      sign = getsign(st1_ptr);
+      FPU_to_exp16(st1_ptr, st_new_ptr);
+      goto denormal_arg;
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      sign = getsign(st0_ptr);
+      setpositive(st0_ptr);
+      push();
+      FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+      setsign(st_new_ptr, sign);
+      return;
+    }
+  else if ( st0_tag == TW_NaN )
+    {
+      if ( real_1op_NaN(st0_ptr) < 0 )
+	return;
+
+      push();
+      FPU_copy_to_reg0(st0_ptr, TAG_Special);
+      return;
+    }
+  else if ( st0_tag == TAG_Empty )
+    {
+      /* Is this the correct behaviour? */
+      if ( control_word & EX_Invalid )
+	{
+	  FPU_stack_underflow();
+	  push();
+	  FPU_stack_underflow();
+	}
+      else
+	EXCEPTION(EX_StackUnder);
+    }
+#ifdef PARANOID
+  else
+    EXCEPTION(EX_INTERNAL | 0x119);
+#endif /* PARANOID */
+}
+
+
+static void fdecstp(void)
+{
+  clear_C1();
+  top--;
+}
+
+static void fincstp(void)
+{
+  clear_C1();
+  top++;
+}
+
+
+static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  int expon;
+
+  clear_C1();
+
+  if ( st0_tag == TAG_Valid )
+    {
+      u_char tag;
+      
+      if (signnegative(st0_ptr))
+	{
+	  arith_invalid(0);  /* sqrt(negative) is invalid */
+	  return;
+	}
+
+      /* make st(0) in  [1.0 .. 4.0) */
+      expon = exponent(st0_ptr);
+
+    denormal_arg:
+
+      setexponent16(st0_ptr, (expon & 1));
+
+      /* Do the computation, the sign of the result will be positive. */
+      tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
+      addexponent(st0_ptr, expon >> 1);
+      FPU_settag0(tag);
+      return;
+    }
+
+  if ( st0_tag == TAG_Zero )
+    return;
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+
+  if ( st0_tag == TW_Infinity )
+    {
+      if ( signnegative(st0_ptr) )
+	arith_invalid(0);  /* sqrt(-Infinity) is invalid */
+      return;
+    }
+  else if ( st0_tag == TW_Denormal )
+    {
+      if (signnegative(st0_ptr))
+	{
+	  arith_invalid(0);  /* sqrt(negative) is invalid */
+	  return;
+	}
+
+      if ( denormal_operand() < 0 )
+	return;
+
+      FPU_to_exp16(st0_ptr, st0_ptr);
+
+      expon = exponent16(st0_ptr);
+
+      goto denormal_arg;
+    }
+
+  single_arg_error(st0_ptr, st0_tag);
+
+}
+
+
+static void frndint_(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  int flags, tag;
+
+  if ( st0_tag == TAG_Valid )
+    {
+      u_char sign;
+
+    denormal_arg:
+
+      sign = getsign(st0_ptr);
+
+      if (exponent(st0_ptr) > 63)
+	return;
+
+      if ( st0_tag == TW_Denormal )
+	{
+	  if (denormal_operand() < 0 )
+	    return;
+	}
+
+      /* Fortunately, this can't overflow to 2^64 */
+      if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) )
+	set_precision_flag(flags);
+
+      setexponent16(st0_ptr, 63);
+      tag = FPU_normalize(st0_ptr);
+      setsign(st0_ptr, sign);
+      FPU_settag0(tag);
+      return;
+    }
+
+  if ( st0_tag == TAG_Zero )
+    return;
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+
+  if ( st0_tag == TW_Denormal )
+    goto denormal_arg;
+  else if ( st0_tag == TW_Infinity )
+    return;
+  else
+    single_arg_error(st0_ptr, st0_tag);
+}
+
+
+static int fsin(FPU_REG *st0_ptr, u_char tag)
+{
+  u_char arg_sign = getsign(st0_ptr);
+
+  if ( tag == TAG_Valid )
+    {
+      int q;
+
+      if ( exponent(st0_ptr) > -40 )
+	{
+	  if ( (q = trig_arg(st0_ptr, 0)) == -1 )
+	    {
+	      /* Operand is out of range */
+	      return 1;
+	    }
+
+	  poly_sine(st0_ptr);
+	  
+	  if (q & 2)
+	    changesign(st0_ptr);
+
+	  setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
+
+	  /* We do not really know if up or down */
+	  set_precision_flag_up();
+	  return 0;
+	}
+      else
+	{
+	  /* For a small arg, the result == the argument */
+	  set_precision_flag_up();  /* Must be up. */
+	  return 0;
+	}
+    }
+
+  if ( tag == TAG_Zero )
+    {
+      setcc(0);
+      return 0;
+    }
+
+  if ( tag == TAG_Special )
+    tag = FPU_Special(st0_ptr);
+
+  if ( tag == TW_Denormal )
+    {
+      if ( denormal_operand() < 0 )
+	return 1;
+
+      /* For a small arg, the result == the argument */
+      /* Underflow may happen */
+      FPU_to_exp16(st0_ptr, st0_ptr);
+      
+      tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
+
+      FPU_settag0(tag);
+
+      return 0;
+    }
+  else if ( tag == TW_Infinity )
+    {
+      /* The 80486 treats infinity as an invalid operand */
+      arith_invalid(0);
+      return 1;
+    }
+  else
+    {
+      single_arg_error(st0_ptr, tag);
+      return 1;
+    }
+}
+
+
+static int f_cos(FPU_REG *st0_ptr, u_char tag)
+{
+  u_char st0_sign;
+
+  st0_sign = getsign(st0_ptr);
+
+  if ( tag == TAG_Valid )
+    {
+      int q;
+
+      if ( exponent(st0_ptr) > -40 )
+	{
+	  if ( (exponent(st0_ptr) < 0)
+	      || ((exponent(st0_ptr) == 0)
+		  && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) )
+	    {
+	      poly_cos(st0_ptr);
+
+	      /* We do not really know if up or down */
+	      set_precision_flag_down();
+	  
+	      return 0;
+	    }
+	  else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 )
+	    {
+	      poly_sine(st0_ptr);
+
+	      if ((q+1) & 2)
+		changesign(st0_ptr);
+
+	      /* We do not really know if up or down */
+	      set_precision_flag_down();
+	  
+	      return 0;
+	    }
+	  else
+	    {
+	      /* Operand is out of range */
+	      return 1;
+	    }
+	}
+      else
+	{
+	denormal_arg:
+
+	  setcc(0);
+	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+#ifdef PECULIAR_486
+	  set_precision_flag_down();  /* 80486 appears to do this. */
+#else
+	  set_precision_flag_up();  /* Must be up. */
+#endif /* PECULIAR_486 */
+	  return 0;
+	}
+    }
+  else if ( tag == TAG_Zero )
+    {
+      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+      setcc(0);
+      return 0;
+    }
+
+  if ( tag == TAG_Special )
+    tag = FPU_Special(st0_ptr);
+
+  if ( tag == TW_Denormal )
+    {
+      if ( denormal_operand() < 0 )
+	return 1;
+
+      goto denormal_arg;
+    }
+  else if ( tag == TW_Infinity )
+    {
+      /* The 80486 treats infinity as an invalid operand */
+      arith_invalid(0);
+      return 1;
+    }
+  else
+    {
+      single_arg_error(st0_ptr, tag);  /* requires st0_ptr == &st(0) */
+      return 1;
+    }
+}
+
+
+static void fcos(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  f_cos(st0_ptr, st0_tag);
+}
+
+
+static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  FPU_REG *st_new_ptr;
+  FPU_REG arg;
+  u_char tag;
+
+  /* Stack underflow has higher priority */
+  if ( st0_tag == TAG_Empty )
+    {
+      FPU_stack_underflow();  /* Puts a QNaN in st(0) */
+      if ( control_word & CW_Invalid )
+	{
+	  st_new_ptr = &st(-1);
+	  push();
+	  FPU_stack_underflow();  /* Puts a QNaN in the new st(0) */
+	}
+      return;
+    }
+
+  if ( STACK_OVERFLOW )
+    { FPU_stack_overflow(); return; }
+
+  if ( st0_tag == TAG_Special )
+    tag = FPU_Special(st0_ptr);
+  else
+    tag = st0_tag;
+
+  if ( tag == TW_NaN )
+    {
+      single_arg_2_error(st0_ptr, TW_NaN);
+      return;
+    }
+  else if ( tag == TW_Infinity )
+    {
+      /* The 80486 treats infinity as an invalid operand */
+      if ( arith_invalid(0) >= 0 )
+	{
+	  /* Masked response */
+	  push();
+	  arith_invalid(0);
+	}
+      return;
+    }
+
+  reg_copy(st0_ptr, &arg);
+  if ( !fsin(st0_ptr, st0_tag) )
+    {
+      push();
+      FPU_copy_to_reg0(&arg, st0_tag);
+      f_cos(&st(0), st0_tag);
+    }
+  else
+    {
+      /* An error, so restore st(0) */
+      FPU_copy_to_reg0(&arg, st0_tag);
+    }
+}
+
+
+/*---------------------------------------------------------------------------*/
+/* The following all require two arguments: st(0) and st(1) */
+
+/* A lean, mean kernel for the fprem instructions. This relies upon
+   the division and rounding to an integer in do_fprem giving an
+   exact result. Because of this, rem_kernel() needs to deal only with
+   the least significant 64 bits, the more significant bits of the
+   result must be zero.
+ */
+static void rem_kernel(unsigned long long st0, unsigned long long *y,
+		       unsigned long long st1,
+		       unsigned long long q, int n)
+{
+  int dummy;
+  unsigned long long x;
+
+  x = st0 << n;
+
+  /* Do the required multiplication and subtraction in the one operation */
+
+  /* lsw x -= lsw st1 * lsw q */
+  asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1"
+		:"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]),
+		"=a" (dummy)
+		:"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0])
+		:"%dx");
+  /* msw x -= msw st1 * lsw q */
+  asm volatile ("mull %3; subl %%eax,%0"
+		:"=m" (((unsigned *)&x)[1]), "=a" (dummy)
+		:"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0])
+		:"%dx");
+  /* msw x -= lsw st1 * msw q */
+  asm volatile ("mull %3; subl %%eax,%0"
+		:"=m" (((unsigned *)&x)[1]), "=a" (dummy)
+		:"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1])
+		:"%dx");
+
+  *y = x;
+}
+
+
+/* Remainder of st(0) / st(1) */
+/* This routine produces exact results, i.e. there is never any
+   rounding or truncation, etc of the result. */
+static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round)
+{
+  FPU_REG *st1_ptr = &st(1);
+  u_char st1_tag = FPU_gettagi(1);
+
+  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
+    {
+      FPU_REG tmp, st0, st1;
+      u_char st0_sign, st1_sign;
+      u_char tmptag;
+      int tag;
+      int old_cw;
+      int expdif;
+      long long q;
+      unsigned short saved_status;
+      int cc;
+
+    fprem_valid:
+      /* Convert registers for internal use. */
+      st0_sign = FPU_to_exp16(st0_ptr, &st0);
+      st1_sign = FPU_to_exp16(st1_ptr, &st1);
+      expdif = exponent16(&st0) - exponent16(&st1);
+
+      old_cw = control_word;
+      cc = 0;
+
+      /* We want the status following the denorm tests, but don't want
+	 the status changed by the arithmetic operations. */
+      saved_status = partial_status;
+      control_word &= ~CW_RC;
+      control_word |= RC_CHOP;
+
+      if ( expdif < 64 )
+	{
+	  /* This should be the most common case */
+
+	  if ( expdif > -2 )
+	    {
+	      u_char sign = st0_sign ^ st1_sign;
+	      tag = FPU_u_div(&st0, &st1, &tmp,
+			      PR_64_BITS | RC_CHOP | 0x3f,
+			      sign);
+	      setsign(&tmp, sign);
+
+	      if ( exponent(&tmp) >= 0 )
+		{
+		  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't
+						   overflow to 2^64 */
+		  q = significand(&tmp);
+
+		  rem_kernel(significand(&st0),
+			     &significand(&tmp),
+			     significand(&st1),
+			     q, expdif);
+
+		  setexponent16(&tmp, exponent16(&st1));
+		}
+	      else
+		{
+		  reg_copy(&st0, &tmp);
+		  q = 0;
+		}
+
+	      if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) )
+		{
+		  /* We may need to subtract st(1) once more,
+		     to get a result <= 1/2 of st(1). */
+		  unsigned long long x;
+		  expdif = exponent16(&st1) - exponent16(&tmp);
+		  if ( expdif <= 1 )
+		    {
+		      if ( expdif == 0 )
+			x = significand(&st1) - significand(&tmp);
+		      else /* expdif is 1 */
+			x = (significand(&st1) << 1) - significand(&tmp);
+		      if ( (x < significand(&tmp)) ||
+			  /* or equi-distant (from 0 & st(1)) and q is odd */
+			  ((x == significand(&tmp)) && (q & 1) ) )
+			{
+			  st0_sign = ! st0_sign;
+			  significand(&tmp) = x;
+			  q++;
+			}
+		    }
+		}
+
+	      if (q & 4) cc |= SW_C0;
+	      if (q & 2) cc |= SW_C3;
+	      if (q & 1) cc |= SW_C1;
+	    }
+	  else
+	    {
+	      control_word = old_cw;
+	      setcc(0);
+	      return;
+	    }
+	}
+      else
+	{
+	  /* There is a large exponent difference ( >= 64 ) */
+	  /* To make much sense, the code in this section should
+	     be done at high precision. */
+	  int exp_1, N;
+	  u_char sign;
+
+	  /* prevent overflow here */
+	  /* N is 'a number between 32 and 63' (p26-113) */
+	  reg_copy(&st0, &tmp);
+	  tmptag = st0_tag;
+	  N = (expdif & 0x0000001f) + 32;  /* This choice gives results
+					      identical to an AMD 486 */
+	  setexponent16(&tmp, N);
+	  exp_1 = exponent16(&st1);
+	  setexponent16(&st1, 0);
+	  expdif -= N;
+
+	  sign = getsign(&tmp) ^ st1_sign;
+	  tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
+			  sign);
+	  setsign(&tmp, sign);
+
+	  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't
+					   overflow to 2^64 */
+
+	  rem_kernel(significand(&st0),
+		     &significand(&tmp),
+		     significand(&st1),
+		     significand(&tmp),
+		     exponent(&tmp)
+		     ); 
+	  setexponent16(&tmp, exp_1 + expdif);
+
+	  /* It is possible for the operation to be complete here.
+	     What does the IEEE standard say? The Intel 80486 manual
+	     implies that the operation will never be completed at this
+	     point, and the behaviour of a real 80486 confirms this.
+	   */
+	  if ( !(tmp.sigh | tmp.sigl) )
+	    {
+	      /* The result is zero */
+	      control_word = old_cw;
+	      partial_status = saved_status;
+	      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+	      setsign(&st0, st0_sign);
+#ifdef PECULIAR_486
+	      setcc(SW_C2);
+#else
+	      setcc(0);
+#endif /* PECULIAR_486 */
+	      return;
+	    }
+	  cc = SW_C2;
+	}
+
+      control_word = old_cw;
+      partial_status = saved_status;
+      tag = FPU_normalize_nuo(&tmp);
+      reg_copy(&tmp, st0_ptr);
+
+      /* The only condition to be looked for is underflow,
+	 and it can occur here only if underflow is unmasked. */
+      if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
+	  && !(control_word & CW_Underflow) )
+	{
+	  setcc(cc);
+	  tag = arith_underflow(st0_ptr);
+	  setsign(st0_ptr, st0_sign);
+	  FPU_settag0(tag);
+	  return;
+	}
+      else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) )
+	{
+	  stdexp(st0_ptr);
+	  setsign(st0_ptr, st0_sign);
+	}
+      else
+	{
+	  tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
+	}
+      FPU_settag0(tag);
+      setcc(cc);
+
+      return;
+    }
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+  if ( st1_tag == TAG_Special )
+    st1_tag = FPU_Special(st1_ptr);
+
+  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
+    {
+      if ( denormal_operand() < 0 )
+	return;
+      goto fprem_valid;
+    }
+  else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
+    {
+      FPU_stack_underflow();
+      return;
+    }
+  else if ( st0_tag == TAG_Zero )
+    {
+      if ( st1_tag == TAG_Valid )
+	{
+	  setcc(0); return;
+	}
+      else if ( st1_tag == TW_Denormal )
+	{
+	  if ( denormal_operand() < 0 )
+	    return;
+	  setcc(0); return;
+	}
+      else if ( st1_tag == TAG_Zero )
+	{ arith_invalid(0); return; } /* fprem(?,0) always invalid */
+      else if ( st1_tag == TW_Infinity )
+	{ setcc(0); return; }
+    }
+  else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
+    {
+      if ( st1_tag == TAG_Zero )
+	{
+	  arith_invalid(0); /* fprem(Valid,Zero) is invalid */
+	  return;
+	}
+      else if ( st1_tag != TW_NaN )
+	{
+	  if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal))
+	       && (denormal_operand() < 0) )
+	    return;
+
+	  if ( st1_tag == TW_Infinity )
+	    {
+	      /* fprem(Valid,Infinity) is o.k. */
+	      setcc(0); return;
+	    }
+	}
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      if ( st1_tag != TW_NaN )
+	{
+	  arith_invalid(0); /* fprem(Infinity,?) is invalid */
+	  return;
+	}
+    }
+
+  /* One of the registers must contain a NaN if we got here. */
+
+#ifdef PARANOID
+  if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) )
+      EXCEPTION(EX_INTERNAL | 0x118);
+#endif /* PARANOID */
+
+  real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
+
+}
+
+
+/* ST(1) <- ST(1) * log ST;  pop ST */
+static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  FPU_REG *st1_ptr = &st(1), exponent;
+  u_char st1_tag = FPU_gettagi(1);
+  u_char sign;
+  int e, tag;
+
+  clear_C1();
+
+  if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) )
+    {
+    both_valid:
+      /* Both regs are Valid or Denormal */
+      if ( signpositive(st0_ptr) )
+	{
+	  if ( st0_tag == TW_Denormal )
+	    FPU_to_exp16(st0_ptr, st0_ptr);
+	  else
+	    /* Convert st(0) for internal use. */
+	    setexponent16(st0_ptr, exponent(st0_ptr));
+
+	  if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) )
+	    {
+	      /* Special case. The result can be precise. */
+	      u_char esign;
+	      e = exponent16(st0_ptr);
+	      if ( e >= 0 )
+		{
+		  exponent.sigh = e;
+		  esign = SIGN_POS;
+		}
+	      else
+		{
+		  exponent.sigh = -e;
+		  esign = SIGN_NEG;
+		}
+	      exponent.sigl = 0;
+	      setexponent16(&exponent, 31);
+	      tag = FPU_normalize_nuo(&exponent);
+	      stdexp(&exponent);
+	      setsign(&exponent, esign);
+	      tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION);
+	      if ( tag >= 0 )
+		FPU_settagi(1, tag);
+	    }
+	  else
+	    {
+	      /* The usual case */
+	      sign = getsign(st1_ptr);
+	      if ( st1_tag == TW_Denormal )
+		FPU_to_exp16(st1_ptr, st1_ptr);
+	      else
+		/* Convert st(1) for internal use. */
+		setexponent16(st1_ptr, exponent(st1_ptr));
+	      poly_l2(st0_ptr, st1_ptr, sign);
+	    }
+	}
+      else
+	{
+	  /* negative */
+	  if ( arith_invalid(1) < 0 )
+	    return;
+	}
+
+      FPU_pop();
+
+      return;
+    }
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+  if ( st1_tag == TAG_Special )
+    st1_tag = FPU_Special(st1_ptr);
+
+  if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
+    {
+      FPU_stack_underflow_pop(1);
+      return;
+    }
+  else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) )
+    {
+      if ( st0_tag == TAG_Zero )
+	{
+	  if ( st1_tag == TAG_Zero )
+	    {
+	      /* Both args zero is invalid */
+	      if ( arith_invalid(1) < 0 )
+		return;
+	    }
+	  else
+	    {
+	      u_char sign;
+	      sign = getsign(st1_ptr)^SIGN_NEG;
+	      if ( FPU_divide_by_zero(1, sign) < 0 )
+		return;
+
+	      setsign(st1_ptr, sign);
+	    }
+	}
+      else if ( st1_tag == TAG_Zero )
+	{
+	  /* st(1) contains zero, st(0) valid <> 0 */
+	  /* Zero is the valid answer */
+	  sign = getsign(st1_ptr);
+	  
+	  if ( signnegative(st0_ptr) )
+	    {
+	      /* log(negative) */
+	      if ( arith_invalid(1) < 0 )
+		return;
+	    }
+	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+	  else
+	    {
+	      if ( exponent(st0_ptr) < 0 )
+		sign ^= SIGN_NEG;
+
+	      FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+	      setsign(st1_ptr, sign);
+	    }
+	}
+      else
+	{
+	  /* One or both operands are denormals. */
+	  if ( denormal_operand() < 0 )
+	    return;
+	  goto both_valid;
+	}
+    }
+  else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
+    {
+      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
+	return;
+    }
+  /* One or both arg must be an infinity */
+  else if ( st0_tag == TW_Infinity )
+    {
+      if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) )
+	{
+	  /* log(-infinity) or 0*log(infinity) */
+	  if ( arith_invalid(1) < 0 )
+	    return;
+	}
+      else
+	{
+	  u_char sign = getsign(st1_ptr);
+
+	  if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+
+	  FPU_copy_to_reg1(&CONST_INF, TAG_Special);
+	  setsign(st1_ptr, sign);
+	}
+    }
+  /* st(1) must be infinity here */
+  else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
+	    && ( signpositive(st0_ptr) ) )
+    {
+      if ( exponent(st0_ptr) >= 0 )
+	{
+	  if ( (exponent(st0_ptr) == 0) &&
+	      (st0_ptr->sigh == 0x80000000) &&
+	      (st0_ptr->sigl == 0) )
+	    {
+	      /* st(0) holds 1.0 */
+	      /* infinity*log(1) */
+	      if ( arith_invalid(1) < 0 )
+		return;
+	    }
+	  /* else st(0) is positive and > 1.0 */
+	}
+      else
+	{
+	  /* st(0) is positive and < 1.0 */
+
+	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+
+	  changesign(st1_ptr);
+	}
+    }
+  else
+    {
+      /* st(0) must be zero or negative */
+      if ( st0_tag == TAG_Zero )
+	{
+	  /* This should be invalid, but a real 80486 is happy with it. */
+
+#ifndef PECULIAR_486
+	  sign = getsign(st1_ptr);
+	  if ( FPU_divide_by_zero(1, sign) < 0 )
+	    return;
+#endif /* PECULIAR_486 */
+
+	  changesign(st1_ptr);
+	}
+      else if ( arith_invalid(1) < 0 )	  /* log(negative) */
+	return;
+    }
+
+  FPU_pop();
+}
+
+
+static void fpatan(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  FPU_REG *st1_ptr = &st(1);
+  u_char st1_tag = FPU_gettagi(1);
+  int tag;
+
+  clear_C1();
+  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
+    {
+    valid_atan:
+
+      poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
+
+      FPU_pop();
+
+      return;
+    }
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+  if ( st1_tag == TAG_Special )
+    st1_tag = FPU_Special(st1_ptr);
+
+  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
+    {
+      if ( denormal_operand() < 0 )
+	return;
+
+      goto valid_atan;
+    }
+  else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
+    {
+      FPU_stack_underflow_pop(1);
+      return;
+    }
+  else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
+    {
+      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 )
+	  FPU_pop();
+      return;
+    }
+  else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) )
+    {
+      u_char sign = getsign(st1_ptr);
+      if ( st0_tag == TW_Infinity )
+	{
+	  if ( st1_tag == TW_Infinity )
+	    {
+	      if ( signpositive(st0_ptr) )
+		{
+		  FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
+		}
+	      else
+		{
+		  setpositive(st1_ptr);
+		  tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr,
+				  FULL_PRECISION, SIGN_POS,
+				  exponent(&CONST_PI4), exponent(&CONST_PI2));
+		  if ( tag >= 0 )
+		    FPU_settagi(1, tag);
+		}
+	    }
+	  else
+	    {
+	      if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
+		return;
+
+	      if ( signpositive(st0_ptr) )
+		{
+		  FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+		  setsign(st1_ptr, sign);   /* An 80486 preserves the sign */
+		  FPU_pop();
+		  return;
+		}
+	      else
+		{
+		  FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+		}
+	    }
+	}
+      else
+	{
+	  /* st(1) is infinity, st(0) not infinity */
+	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+
+	  FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
+	}
+      setsign(st1_ptr, sign);
+    }
+  else if ( st1_tag == TAG_Zero )
+    {
+      /* st(0) must be valid or zero */
+      u_char sign = getsign(st1_ptr);
+
+      if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+	return;
+
+      if ( signpositive(st0_ptr) )
+	{
+	  /* An 80486 preserves the sign */
+	  FPU_pop();
+	  return;
+	}
+
+      FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+      setsign(st1_ptr, sign);
+    }
+  else if ( st0_tag == TAG_Zero )
+    {
+      /* st(1) must be TAG_Valid here */
+      u_char sign = getsign(st1_ptr);
+
+      if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
+	return;
+
+      FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
+      setsign(st1_ptr, sign);
+    }
+#ifdef PARANOID
+  else
+    EXCEPTION(EX_INTERNAL | 0x125);
+#endif /* PARANOID */
+
+  FPU_pop();
+  set_precision_flag_up();  /* We do not really know if up or down */
+}
+
+
+static void fprem(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  do_fprem(st0_ptr, st0_tag, RC_CHOP);
+}
+
+
+static void fprem1(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  do_fprem(st0_ptr, st0_tag, RC_RND);
+}
+
+
+static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  u_char sign, sign1;
+  FPU_REG *st1_ptr = &st(1), a, b;
+  u_char st1_tag = FPU_gettagi(1);
+
+  clear_C1();
+  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
+    {
+    valid_yl2xp1:
+
+      sign = getsign(st0_ptr);
+      sign1 = getsign(st1_ptr);
+
+      FPU_to_exp16(st0_ptr, &a);
+      FPU_to_exp16(st1_ptr, &b);
+
+      if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) )
+	return;
+
+      FPU_pop();
+      return;
+    }
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+  if ( st1_tag == TAG_Special )
+    st1_tag = FPU_Special(st1_ptr);
+
+  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
+    {
+      if ( denormal_operand() < 0 )
+	return;
+
+      goto valid_yl2xp1;
+    }
+  else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) )
+    {
+      FPU_stack_underflow_pop(1);
+      return;
+    }
+  else if ( st0_tag == TAG_Zero )
+    {
+      switch ( st1_tag )
+	{
+	case TW_Denormal:
+	  if ( denormal_operand() < 0 )
+	    return;
+
+	case TAG_Zero:
+	case TAG_Valid:
+	  setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
+	  FPU_copy_to_reg1(st0_ptr, st0_tag);
+	  break;
+
+	case TW_Infinity:
+	  /* Infinity*log(1) */
+	  if ( arith_invalid(1) < 0 )
+	    return;
+	  break;
+
+	case TW_NaN:
+	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
+	    return;
+	  break;
+
+	default:
+#ifdef PARANOID
+	  EXCEPTION(EX_INTERNAL | 0x116);
+	  return;
+#endif /* PARANOID */
+	  break;
+	}
+    }
+  else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
+    {
+      switch ( st1_tag )
+	{
+	case TAG_Zero:
+	  if ( signnegative(st0_ptr) )
+	    {
+	      if ( exponent(st0_ptr) >= 0 )
+		{
+		  /* st(0) holds <= -1.0 */
+#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
+		  changesign(st1_ptr);
+#else
+		  if ( arith_invalid(1) < 0 )
+		    return;
+#endif /* PECULIAR_486 */
+		}
+	      else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+		return;
+	      else
+		changesign(st1_ptr);
+	    }
+	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+	  break;
+
+	case TW_Infinity:
+	  if ( signnegative(st0_ptr) )
+	    {
+	      if ( (exponent(st0_ptr) >= 0) &&
+		  !((st0_ptr->sigh == 0x80000000) &&
+		    (st0_ptr->sigl == 0)) )
+		{
+		  /* st(0) holds < -1.0 */
+#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
+		  changesign(st1_ptr);
+#else
+		  if ( arith_invalid(1) < 0 ) return;
+#endif /* PECULIAR_486 */
+		}
+	      else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+		return;
+	      else
+		changesign(st1_ptr);
+	    }
+	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+	  break;
+
+	case TW_NaN:
+	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
+	    return;
+	}
+
+    }
+  else if ( st0_tag == TW_NaN )
+    {
+      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
+	return;
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      if ( st1_tag == TW_NaN )
+	{
+	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
+	    return;
+	}
+      else if ( signnegative(st0_ptr) )
+	{
+#ifndef PECULIAR_486
+	  /* This should have higher priority than denormals, but... */
+	  if ( arith_invalid(1) < 0 )  /* log(-infinity) */
+	    return;
+#endif /* PECULIAR_486 */
+	  if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+#ifdef PECULIAR_486
+	  /* Denormal operands actually get higher priority */
+	  if ( arith_invalid(1) < 0 )  /* log(-infinity) */
+	    return;
+#endif /* PECULIAR_486 */
+	}
+      else if ( st1_tag == TAG_Zero )
+	{
+	  /* log(infinity) */
+	  if ( arith_invalid(1) < 0 )
+	    return;
+	}
+	
+      /* st(1) must be valid here. */
+
+      else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
+	return;
+
+      /* The Manual says that log(Infinity) is invalid, but a real
+	 80486 sensibly says that it is o.k. */
+      else
+	{
+	  u_char sign = getsign(st1_ptr);
+	  FPU_copy_to_reg1(&CONST_INF, TAG_Special);
+	  setsign(st1_ptr, sign);
+	}
+    }
+#ifdef PARANOID
+  else
+    {
+      EXCEPTION(EX_INTERNAL | 0x117);
+      return;
+    }
+#endif /* PARANOID */
+
+  FPU_pop();
+  return;
+
+}
+
+
+static void fscale(FPU_REG *st0_ptr, u_char st0_tag)
+{
+  FPU_REG *st1_ptr = &st(1);
+  u_char st1_tag = FPU_gettagi(1);
+  int old_cw = control_word;
+  u_char sign = getsign(st0_ptr);
+
+  clear_C1();
+  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
+    {
+      long scale;
+      FPU_REG tmp;
+
+      /* Convert register for internal use. */
+      setexponent16(st0_ptr, exponent(st0_ptr));
+
+    valid_scale:
+
+      if ( exponent(st1_ptr) > 30 )
+	{
+	  /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
+
+	  if ( signpositive(st1_ptr) )
+	    {
+	      EXCEPTION(EX_Overflow);
+	      FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+	    }
+	  else
+	    {
+	      EXCEPTION(EX_Underflow);
+	      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+	    }
+	  setsign(st0_ptr, sign);
+	  return;
+	}
+
+      control_word &= ~CW_RC;
+      control_word |= RC_CHOP;
+      reg_copy(st1_ptr, &tmp);
+      FPU_round_to_int(&tmp, st1_tag);      /* This can never overflow here */
+      control_word = old_cw;
+      scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
+      scale += exponent16(st0_ptr);
+
+      setexponent16(st0_ptr, scale);
+
+      /* Use FPU_round() to properly detect under/overflow etc */
+      FPU_round(st0_ptr, 0, 0, control_word, sign);
+
+      return;
+    }
+
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+  if ( st1_tag == TAG_Special )
+    st1_tag = FPU_Special(st1_ptr);
+
+  if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
+    {
+      switch ( st1_tag )
+	{
+	case TAG_Valid:
+	  /* st(0) must be a denormal */
+	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+
+	  FPU_to_exp16(st0_ptr, st0_ptr);  /* Will not be left on stack */
+	  goto valid_scale;
+
+	case TAG_Zero:
+	  if ( st0_tag == TW_Denormal )
+	    denormal_operand();
+	  return;
+
+	case TW_Denormal:
+	  denormal_operand();
+	  return;
+
+	case TW_Infinity:
+	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
+	    return;
+
+	  if ( signpositive(st1_ptr) )
+	    FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+	  else
+	    FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+	  setsign(st0_ptr, sign);
+	  return;
+
+	case TW_NaN:
+	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+	  return;
+	}
+    }
+  else if ( st0_tag == TAG_Zero )
+    {
+      switch ( st1_tag )
+	{
+	case TAG_Valid:
+	case TAG_Zero:
+	  return;
+
+	case TW_Denormal:
+	  denormal_operand();
+	  return;
+
+	case TW_Infinity:
+	  if ( signpositive(st1_ptr) )
+	    arith_invalid(0); /* Zero scaled by +Infinity */
+	  return;
+
+	case TW_NaN:
+	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+	  return;
+	}
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      switch ( st1_tag )
+	{
+	case TAG_Valid:
+	case TAG_Zero:
+	  return;
+
+	case TW_Denormal:
+	  denormal_operand();
+	  return;
+
+	case TW_Infinity:
+	  if ( signnegative(st1_ptr) )
+	    arith_invalid(0); /* Infinity scaled by -Infinity */
+	  return;
+
+	case TW_NaN:
+	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+	  return;
+	}
+    }
+  else if ( st0_tag == TW_NaN )
+    {
+      if ( st1_tag != TAG_Empty )
+	{ real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; }
+    }
+
+#ifdef PARANOID
+  if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) )
+    {
+      EXCEPTION(EX_INTERNAL | 0x115);
+      return;
+    }
+#endif
+
+  /* At least one of st(0), st(1) must be empty */
+  FPU_stack_underflow();
+
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static FUNC_ST0 const trig_table_a[] = {
+  f2xm1, fyl2x, fptan, fpatan,
+  fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp
+};
+
+void FPU_triga(void)
+{
+  (trig_table_a[FPU_rm])(&st(0), FPU_gettag0());
+}
+
+
+static FUNC_ST0 const trig_table_b[] =
+  {
+    fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos
+  };
+
+void FPU_trigb(void)
+{
+  (trig_table_b[FPU_rm])(&st(0), FPU_gettag0());
+}
diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c
new file mode 100644
index 00000000000..91175738e94
--- /dev/null
+++ b/arch/i386/math-emu/get_address.c
@@ -0,0 +1,449 @@
+/*---------------------------------------------------------------------------+
+ |  get_address.c                                                            |
+ |                                                                           |
+ | Get the effective address from an FPU instruction.                        |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@suburbia.net             |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+
+#include <linux/stddef.h>
+
+#include <asm/uaccess.h>
+#include <asm/desc.h>
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+
+
+#define FPU_WRITE_BIT 0x10
+
+static int reg_offset[] = {
+	offsetof(struct info,___eax),
+	offsetof(struct info,___ecx),
+	offsetof(struct info,___edx),
+	offsetof(struct info,___ebx),
+	offsetof(struct info,___esp),
+	offsetof(struct info,___ebp),
+	offsetof(struct info,___esi),
+	offsetof(struct info,___edi)
+};
+
+#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info))
+
+static int reg_offset_vm86[] = {
+	offsetof(struct info,___cs),
+	offsetof(struct info,___vm86_ds),
+	offsetof(struct info,___vm86_es),
+	offsetof(struct info,___vm86_fs),
+	offsetof(struct info,___vm86_gs),
+	offsetof(struct info,___ss),
+	offsetof(struct info,___vm86_ds)
+      };
+
+#define VM86_REG_(x) (*(unsigned short *) \
+		      (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
+
+/* These are dummy, fs and gs are not saved on the stack. */
+#define ___FS ___ds
+#define ___GS ___ds
+
+static int reg_offset_pm[] = {
+	offsetof(struct info,___cs),
+	offsetof(struct info,___ds),
+	offsetof(struct info,___es),
+	offsetof(struct info,___FS),
+	offsetof(struct info,___GS),
+	offsetof(struct info,___ss),
+	offsetof(struct info,___ds)
+      };
+
+#define PM_REG_(x) (*(unsigned short *) \
+		      (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info))
+
+
+/* Decode the SIB byte. This function assumes mod != 0 */
+static int sib(int mod, unsigned long *fpu_eip)
+{
+  u_char ss,index,base;
+  long offset;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_code_access_ok(1);
+  FPU_get_user(base, (u_char __user *) (*fpu_eip));   /* The SIB byte */
+  RE_ENTRANT_CHECK_ON;
+  (*fpu_eip)++;
+  ss = base >> 6;
+  index = (base >> 3) & 7;
+  base &= 7;
+
+  if ((mod == 0) && (base == 5))
+    offset = 0;              /* No base register */
+  else
+    offset = REG_(base);
+
+  if (index == 4)
+    {
+      /* No index register */
+      /* A non-zero ss is illegal */
+      if ( ss )
+	EXCEPTION(EX_Invalid);
+    }
+  else
+    {
+      offset += (REG_(index)) << ss;
+    }
+
+  if (mod == 1)
+    {
+      /* 8 bit signed displacement */
+      long displacement;
+      RE_ENTRANT_CHECK_OFF;
+      FPU_code_access_ok(1);
+      FPU_get_user(displacement, (signed char __user *) (*fpu_eip));
+      offset += displacement;
+      RE_ENTRANT_CHECK_ON;
+      (*fpu_eip)++;
+    }
+  else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
+    {
+      /* 32 bit displacement */
+      long displacement;
+      RE_ENTRANT_CHECK_OFF;
+      FPU_code_access_ok(4);
+      FPU_get_user(displacement, (long __user *) (*fpu_eip));
+      offset += displacement;
+      RE_ENTRANT_CHECK_ON;
+      (*fpu_eip) += 4;
+    }
+
+  return offset;
+}
+
+
+static unsigned long vm86_segment(u_char segment,
+				  struct address *addr)
+{
+  segment--;
+#ifdef PARANOID
+  if ( segment > PREFIX_SS_ )
+    {
+      EXCEPTION(EX_INTERNAL|0x130);
+      math_abort(FPU_info,SIGSEGV);
+    }
+#endif /* PARANOID */
+  addr->selector = VM86_REG_(segment);
+  return (unsigned long)VM86_REG_(segment) << 4;
+}
+
+
+/* This should work for 16 and 32 bit protected mode. */
+static long pm_address(u_char FPU_modrm, u_char segment,
+		       struct address *addr, long offset)
+{ 
+  struct desc_struct descriptor;
+  unsigned long base_address, limit, address, seg_top;
+  unsigned short selector;
+
+  segment--;
+
+#ifdef PARANOID
+  /* segment is unsigned, so this also detects if segment was 0: */
+  if ( segment > PREFIX_SS_ )
+    {
+      EXCEPTION(EX_INTERNAL|0x132);
+      math_abort(FPU_info,SIGSEGV);
+    }
+#endif /* PARANOID */
+
+  switch ( segment )
+    {
+      /* fs and gs aren't used by the kernel, so they still have their
+	 user-space values. */
+    case PREFIX_FS_-1:
+      /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register
+	 in the assembler statement. */
+
+      __asm__("mov %%fs,%0":"=r" (selector));
+      addr->selector = selector;
+      break;
+    case PREFIX_GS_-1:
+      /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register
+	 in the assembler statement. */
+      __asm__("mov %%gs,%0":"=r" (selector));
+      addr->selector = selector;
+      break;
+    default:
+      addr->selector = PM_REG_(segment);
+    }
+
+  descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
+  base_address = SEG_BASE_ADDR(descriptor);
+  address = base_address + offset;
+  limit = base_address
+	+ (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
+  if ( limit < base_address ) limit = 0xffffffff;
+
+  if ( SEG_EXPAND_DOWN(descriptor) )
+    {
+      if ( SEG_G_BIT(descriptor) )
+	seg_top = 0xffffffff;
+      else
+	{
+	  seg_top = base_address + (1 << 20);
+	  if ( seg_top < base_address ) seg_top = 0xffffffff;
+	}
+      access_limit =
+	(address <= limit) || (address >= seg_top) ? 0 :
+	  ((seg_top-address) >= 255 ? 255 : seg_top-address);
+    }
+  else
+    {
+      access_limit =
+	(address > limit) || (address < base_address) ? 0 :
+	  ((limit-address) >= 254 ? 255 : limit-address+1);
+    }
+  if ( SEG_EXECUTE_ONLY(descriptor) ||
+      (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
+    {
+      access_limit = 0;
+    }
+  return address;
+}
+
+
+/*
+       MOD R/M byte:  MOD == 3 has a special use for the FPU
+                      SIB byte used iff R/M = 100b
+
+       7   6   5   4   3   2   1   0
+       .....   .........   .........
+        MOD    OPCODE(2)     R/M
+
+
+       SIB byte
+
+       7   6   5   4   3   2   1   0
+       .....   .........   .........
+        SS      INDEX        BASE
+
+*/
+
+void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
+		  struct address *addr,
+		  fpu_addr_modes addr_modes)
+{
+  u_char mod;
+  unsigned rm = FPU_modrm & 7;
+  long *cpu_reg_ptr;
+  int address = 0;     /* Initialized just to stop compiler warnings. */
+
+  /* Memory accessed via the cs selector is write protected
+     in `non-segmented' 32 bit protected mode. */
+  if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+      && (addr_modes.override.segment == PREFIX_CS_) )
+    {
+      math_abort(FPU_info,SIGSEGV);
+    }
+
+  addr->selector = FPU_DS;   /* Default, for 32 bit non-segmented mode. */
+
+  mod = (FPU_modrm >> 6) & 3;
+
+  if (rm == 4 && mod != 3)
+    {
+      address = sib(mod, fpu_eip);
+    }
+  else
+    {
+      cpu_reg_ptr = & REG_(rm);
+      switch (mod)
+	{
+	case 0:
+	  if (rm == 5)
+	    {
+	      /* Special case: disp32 */
+	      RE_ENTRANT_CHECK_OFF;
+	      FPU_code_access_ok(4);
+	      FPU_get_user(address, (unsigned long __user *) (*fpu_eip));
+	      (*fpu_eip) += 4;
+	      RE_ENTRANT_CHECK_ON;
+	      addr->offset = address;
+	      return (void __user *) address;
+	    }
+	  else
+	    {
+	      address = *cpu_reg_ptr;  /* Just return the contents
+					  of the cpu register */
+	      addr->offset = address;
+	      return (void __user *) address;
+	    }
+	case 1:
+	  /* 8 bit signed displacement */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_code_access_ok(1);
+	  FPU_get_user(address, (signed char __user *) (*fpu_eip));
+	  RE_ENTRANT_CHECK_ON;
+	  (*fpu_eip)++;
+	  break;
+	case 2:
+	  /* 32 bit displacement */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_code_access_ok(4);
+	  FPU_get_user(address, (long __user *) (*fpu_eip));
+	  (*fpu_eip) += 4;
+	  RE_ENTRANT_CHECK_ON;
+	  break;
+	case 3:
+	  /* Not legal for the FPU */
+	  EXCEPTION(EX_Invalid);
+	}
+      address += *cpu_reg_ptr;
+    }
+
+  addr->offset = address;
+
+  switch ( addr_modes.default_mode )
+    {
+    case 0:
+      break;
+    case VM86:
+      address += vm86_segment(addr_modes.override.segment, addr);
+      break;
+    case PM16:
+    case SEG32:
+      address = pm_address(FPU_modrm, addr_modes.override.segment,
+			   addr, address);
+      break;
+    default:
+      EXCEPTION(EX_INTERNAL|0x133);
+    }
+
+  return (void __user *)address;
+}
+
+
+void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
+		     struct address *addr,
+		     fpu_addr_modes addr_modes)
+{
+  u_char mod;
+  unsigned rm = FPU_modrm & 7;
+  int address = 0;     /* Default used for mod == 0 */
+
+  /* Memory accessed via the cs selector is write protected
+     in `non-segmented' 32 bit protected mode. */
+  if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+      && (addr_modes.override.segment == PREFIX_CS_) )
+    {
+      math_abort(FPU_info,SIGSEGV);
+    }
+
+  addr->selector = FPU_DS;   /* Default, for 32 bit non-segmented mode. */
+
+  mod = (FPU_modrm >> 6) & 3;
+
+  switch (mod)
+    {
+    case 0:
+      if (rm == 6)
+	{
+	  /* Special case: disp16 */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_code_access_ok(2);
+	  FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
+	  (*fpu_eip) += 2;
+	  RE_ENTRANT_CHECK_ON;
+	  goto add_segment;
+	}
+      break;
+    case 1:
+      /* 8 bit signed displacement */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_code_access_ok(1);
+      FPU_get_user(address, (signed char __user *) (*fpu_eip));
+      RE_ENTRANT_CHECK_ON;
+      (*fpu_eip)++;
+      break;
+    case 2:
+      /* 16 bit displacement */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_code_access_ok(2);
+      FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
+      (*fpu_eip) += 2;
+      RE_ENTRANT_CHECK_ON;
+      break;
+    case 3:
+      /* Not legal for the FPU */
+      EXCEPTION(EX_Invalid);
+      break;
+    }
+  switch ( rm )
+    {
+    case 0:
+      address += FPU_info->___ebx + FPU_info->___esi;
+      break;
+    case 1:
+      address += FPU_info->___ebx + FPU_info->___edi;
+      break;
+    case 2:
+      address += FPU_info->___ebp + FPU_info->___esi;
+      if ( addr_modes.override.segment == PREFIX_DEFAULT )
+	addr_modes.override.segment = PREFIX_SS_;
+      break;
+    case 3:
+      address += FPU_info->___ebp + FPU_info->___edi;
+      if ( addr_modes.override.segment == PREFIX_DEFAULT )
+	addr_modes.override.segment = PREFIX_SS_;
+      break;
+    case 4:
+      address += FPU_info->___esi;
+      break;
+    case 5:
+      address += FPU_info->___edi;
+      break;
+    case 6:
+      address += FPU_info->___ebp;
+      if ( addr_modes.override.segment == PREFIX_DEFAULT )
+	addr_modes.override.segment = PREFIX_SS_;
+      break;
+    case 7:
+      address += FPU_info->___ebx;
+      break;
+    }
+
+ add_segment:
+  address &= 0xffff;
+
+  addr->offset = address;
+
+  switch ( addr_modes.default_mode )
+    {
+    case 0:
+      break;
+    case VM86:
+      address += vm86_segment(addr_modes.override.segment, addr);
+      break;
+    case PM16:
+    case SEG32:
+      address = pm_address(FPU_modrm, addr_modes.override.segment,
+			   addr, address);
+      break;
+    default:
+      EXCEPTION(EX_INTERNAL|0x131);
+    }
+
+  return (void __user *)address ;
+}
diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c
new file mode 100644
index 00000000000..85314be2fef
--- /dev/null
+++ b/arch/i386/math-emu/load_store.c
@@ -0,0 +1,270 @@
+/*---------------------------------------------------------------------------+
+ |  load_store.c                                                             |
+ |                                                                           |
+ | This file contains most of the code to interpret the FPU instructions     |
+ | which load and store from user memory.                                    |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@suburbia.net             |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+#include <asm/uaccess.h>
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "control_w.h"
+
+
+#define _NONE_ 0   /* st0_ptr etc not needed */
+#define _REG0_ 1   /* Will be storing st(0) */
+#define _PUSH_ 3   /* Need to check for space to push onto stack */
+#define _null_ 4   /* Function illegal or not implemented */
+
+#define pop_0()	{ FPU_settag0(TAG_Empty); top++; }
+
+
+static u_char const type_table[32] = {
+  _PUSH_, _PUSH_, _PUSH_, _PUSH_,
+  _null_, _null_, _null_, _null_,
+  _REG0_, _REG0_, _REG0_, _REG0_,
+  _REG0_, _REG0_, _REG0_, _REG0_,
+  _NONE_, _null_, _NONE_, _PUSH_,
+  _NONE_, _PUSH_, _null_, _PUSH_,
+  _NONE_, _null_, _NONE_, _REG0_,
+  _NONE_, _REG0_, _NONE_, _REG0_
+  };
+
+u_char const data_sizes_16[32] = {
+  4,  4,  8,  2,  0,  0,  0,  0,
+  4,  4,  8,  2,  4,  4,  8,  2,
+  14, 0, 94, 10,  2, 10,  0,  8,  
+  14, 0, 94, 10,  2, 10,  2,  8
+};
+
+static u_char const data_sizes_32[32] = {
+  4,  4,  8,  2,  0,  0,  0,  0,
+  4,  4,  8,  2,  4,  4,  8,  2,
+  28, 0,108, 10,  2, 10,  0,  8,  
+  28, 0,108, 10,  2, 10,  2,  8
+};
+
+int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
+		     void __user *data_address)
+{
+  FPU_REG loaded_data;
+  FPU_REG *st0_ptr;
+  u_char st0_tag = TAG_Empty;  /* This is just to stop a gcc warning. */
+  u_char loaded_tag;
+
+  st0_ptr = NULL;    /* Initialized just to stop compiler warnings. */
+
+  if ( addr_modes.default_mode & PROTECTED )
+    {
+      if ( addr_modes.default_mode == SEG32 )
+	{
+	  if ( access_limit < data_sizes_32[type] )
+	    math_abort(FPU_info,SIGSEGV);
+	}
+      else if ( addr_modes.default_mode == PM16 )
+	{
+	  if ( access_limit < data_sizes_16[type] )
+	    math_abort(FPU_info,SIGSEGV);
+	}
+#ifdef PARANOID
+      else
+	EXCEPTION(EX_INTERNAL|0x140);
+#endif /* PARANOID */
+    }
+
+  switch ( type_table[type] )
+    {
+    case _NONE_:
+      break;
+    case _REG0_:
+      st0_ptr = &st(0);       /* Some of these instructions pop after
+				 storing */
+      st0_tag = FPU_gettag0();
+      break;
+    case _PUSH_:
+      {
+	if ( FPU_gettagi(-1) != TAG_Empty )
+	  { FPU_stack_overflow(); return 0; }
+	top--;
+	st0_ptr = &st(0);
+      }
+      break;
+    case _null_:
+      FPU_illegal();
+      return 0;
+#ifdef PARANOID
+    default:
+      EXCEPTION(EX_INTERNAL|0x141);
+      return 0;
+#endif /* PARANOID */
+    }
+
+  switch ( type )
+    {
+    case 000:       /* fld m32real */
+      clear_C1();
+      loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data);
+      if ( (loaded_tag == TAG_Special)
+	   && isNaN(&loaded_data)
+	   && (real_1op_NaN(&loaded_data) < 0) )
+	{
+	  top++;
+	  break;
+	}
+      FPU_copy_to_reg0(&loaded_data, loaded_tag);
+      break;
+    case 001:      /* fild m32int */
+      clear_C1();
+      loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
+      FPU_copy_to_reg0(&loaded_data, loaded_tag);
+      break;
+    case 002:      /* fld m64real */
+      clear_C1();
+      loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data);
+      if ( (loaded_tag == TAG_Special)
+	   && isNaN(&loaded_data)
+	   && (real_1op_NaN(&loaded_data) < 0) )
+	{
+	  top++;
+	  break;
+	}
+      FPU_copy_to_reg0(&loaded_data, loaded_tag);
+      break;
+    case 003:      /* fild m16int */
+      clear_C1();
+      loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
+      FPU_copy_to_reg0(&loaded_data, loaded_tag);
+      break;
+    case 010:      /* fst m32real */
+      clear_C1();
+      FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address);
+      break;
+    case 011:      /* fist m32int */
+      clear_C1();
+      FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
+      break;
+    case 012:     /* fst m64real */
+      clear_C1();
+      FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address);
+      break;
+    case 013:     /* fist m16int */
+      clear_C1();
+      FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
+      break;
+    case 014:     /* fstp m32real */
+      clear_C1();
+      if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 015:     /* fistp m32int */
+      clear_C1();
+      if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 016:     /* fstp m64real */
+      clear_C1();
+      if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 017:     /* fistp m16int */
+      clear_C1();
+      if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 020:     /* fldenv  m14/28byte */
+      fldenv(addr_modes, (u_char __user *)data_address);
+      /* Ensure that the values just loaded are not changed by
+	 fix-up operations. */
+      return 1;
+    case 022:     /* frstor m94/108byte */
+      frstor(addr_modes, (u_char __user *)data_address);
+      /* Ensure that the values just loaded are not changed by
+	 fix-up operations. */
+      return 1;
+    case 023:     /* fbld m80dec */
+      clear_C1();
+      loaded_tag = FPU_load_bcd((u_char __user *)data_address);
+      FPU_settag0(loaded_tag);
+      break;
+    case 024:     /* fldcw */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_READ, data_address, 2);
+      FPU_get_user(control_word, (unsigned short __user *) data_address);
+      RE_ENTRANT_CHECK_ON;
+      if ( partial_status & ~control_word & CW_Exceptions )
+	partial_status |= (SW_Summary | SW_Backward);
+      else
+	partial_status &= ~(SW_Summary | SW_Backward);
+#ifdef PECULIAR_486
+      control_word |= 0x40;  /* An 80486 appears to always set this bit */
+#endif /* PECULIAR_486 */
+      return 1;
+    case 025:      /* fld m80real */
+      clear_C1();
+      loaded_tag = FPU_load_extended((long double __user *)data_address, 0);
+      FPU_settag0(loaded_tag);
+      break;
+    case 027:      /* fild m64int */
+      clear_C1();
+      loaded_tag = FPU_load_int64((long long __user *)data_address);
+      FPU_settag0(loaded_tag);
+      break;
+    case 030:     /* fstenv  m14/28byte */
+      fstenv(addr_modes, (u_char __user *)data_address);
+      return 1;
+    case 032:      /* fsave */
+      fsave(addr_modes, (u_char __user *)data_address);
+      return 1;
+    case 033:      /* fbstp m80dec */
+      clear_C1();
+      if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 034:      /* fstcw m16int */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_WRITE,data_address,2);
+      FPU_put_user(control_word, (unsigned short __user *) data_address);
+      RE_ENTRANT_CHECK_ON;
+      return 1;
+    case 035:      /* fstp m80real */
+      clear_C1();
+      if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 036:      /* fstsw m2byte */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_WRITE,data_address,2);
+      FPU_put_user(status_word(),(unsigned short __user *) data_address);
+      RE_ENTRANT_CHECK_ON;
+      return 1;
+    case 037:      /* fistp m64int */
+      clear_C1();
+      if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    }
+  return 0;
+}
diff --git a/arch/i386/math-emu/mul_Xsig.S b/arch/i386/math-emu/mul_Xsig.S
new file mode 100644
index 00000000000..717785a53eb
--- /dev/null
+++ b/arch/i386/math-emu/mul_Xsig.S
@@ -0,0 +1,176 @@
+/*---------------------------------------------------------------------------+
+ |  mul_Xsig.S                                                               |
+ |                                                                           |
+ | Multiply a 12 byte fixed point number by another fixed point number.      |
+ |                                                                           |
+ | Copyright (C) 1992,1994,1995                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void mul32_Xsig(Xsig *x, unsigned b)                                    |
+ |                                                                           |
+ |   void mul64_Xsig(Xsig *x, unsigned long long *b)                         |
+ |                                                                           |
+ |   void mul_Xsig_Xsig(Xsig *x, unsigned *b)                                |
+ |                                                                           |
+ | The result is neither rounded nor normalized, and the ls bit or so may    |
+ | be wrong.                                                                 |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+	.file	"mul_Xsig.S"
+
+
+#include "fpu_emu.h"
+
+.text
+ENTRY(mul32_Xsig)
+	pushl %ebp
+	movl %esp,%ebp
+	subl $16,%esp
+	pushl %esi
+
+	movl PARAM1,%esi
+	movl PARAM2,%ecx
+
+	xor %eax,%eax
+	movl %eax,-4(%ebp)
+	movl %eax,-8(%ebp)
+
+	movl (%esi),%eax        /* lsl of Xsig */
+	mull %ecx		/* msl of b */
+	movl %edx,-12(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull %ecx		/* msl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull %ecx		/* msl of b */
+	addl %eax,-8(%ebp)
+	adcl %edx,-4(%ebp)
+
+	movl -12(%ebp),%eax
+	movl %eax,(%esi)
+	movl -8(%ebp),%eax
+	movl %eax,4(%esi)
+	movl -4(%ebp),%eax
+	movl %eax,8(%esi)
+
+	popl %esi
+	leave
+	ret
+
+
+ENTRY(mul64_Xsig)
+	pushl %ebp
+	movl %esp,%ebp
+	subl $16,%esp
+	pushl %esi
+
+	movl PARAM1,%esi
+	movl PARAM2,%ecx
+
+	xor %eax,%eax
+	movl %eax,-4(%ebp)
+	movl %eax,-8(%ebp)
+
+	movl (%esi),%eax        /* lsl of Xsig */
+	mull 4(%ecx)		/* msl of b */
+	movl %edx,-12(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull (%ecx)		/* lsl of b */
+	addl %edx,-12(%ebp)
+	adcl $0,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull 4(%ecx)		/* msl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull (%ecx)		/* lsl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull 4(%ecx)		/* msl of b */
+	addl %eax,-8(%ebp)
+	adcl %edx,-4(%ebp)
+
+	movl -12(%ebp),%eax
+	movl %eax,(%esi)
+	movl -8(%ebp),%eax
+	movl %eax,4(%esi)
+	movl -4(%ebp),%eax
+	movl %eax,8(%esi)
+
+	popl %esi
+	leave
+	ret
+
+
+
+ENTRY(mul_Xsig_Xsig)
+	pushl %ebp
+	movl %esp,%ebp
+	subl $16,%esp
+	pushl %esi
+
+	movl PARAM1,%esi
+	movl PARAM2,%ecx
+
+	xor %eax,%eax
+	movl %eax,-4(%ebp)
+	movl %eax,-8(%ebp)
+
+	movl (%esi),%eax        /* lsl of Xsig */
+	mull 8(%ecx)		/* msl of b */
+	movl %edx,-12(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull 4(%ecx)		/* midl of b */
+	addl %edx,-12(%ebp)
+	adcl $0,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull (%ecx)		/* lsl of b */
+	addl %edx,-12(%ebp)
+	adcl $0,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull 8(%ecx)		/* msl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull 4(%ecx)		/* midl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull 8(%ecx)		/* msl of b */
+	addl %eax,-8(%ebp)
+	adcl %edx,-4(%ebp)
+
+	movl -12(%ebp),%edx
+	movl %edx,(%esi)
+	movl -8(%ebp),%edx
+	movl %edx,4(%esi)
+	movl -4(%ebp),%edx
+	movl %edx,8(%esi)
+
+	popl %esi
+	leave
+	ret
+
diff --git a/arch/i386/math-emu/poly.h b/arch/i386/math-emu/poly.h
new file mode 100644
index 00000000000..4db79811492
--- /dev/null
+++ b/arch/i386/math-emu/poly.h
@@ -0,0 +1,121 @@
+/*---------------------------------------------------------------------------+
+ |  poly.h                                                                   |
+ |                                                                           |
+ |  Header file for the FPU-emu poly*.c source files.                        |
+ |                                                                           |
+ | Copyright (C) 1994,1999                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@melbpc.org.au            |
+ |                                                                           |
+ | Declarations and definitions for functions operating on Xsig (12-byte     |
+ | extended-significand) quantities.                                         |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _POLY_H
+#define _POLY_H
+
+/* This 12-byte structure is used to improve the accuracy of computation
+   of transcendental functions.
+   Intended to be used to get results better than 8-byte computation
+   allows. 9-byte would probably be sufficient.
+   */
+typedef struct {
+  unsigned long lsw;
+  unsigned long midw;
+  unsigned long msw;
+} Xsig;
+
+asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
+		      unsigned long long *result);
+asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
+				const unsigned long long terms[], const int n);
+
+asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
+asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
+asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
+
+asmlinkage void shr_Xsig(Xsig *, const int n);
+asmlinkage int round_Xsig(Xsig *);
+asmlinkage int norm_Xsig(Xsig *);
+asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
+
+/* Macro to extract the most significant 32 bits from a long long */
+#define LL_MSW(x)     (((unsigned long *)&x)[1])
+
+/* Macro to initialize an Xsig struct */
+#define MK_XSIG(a,b,c)     { c, b, a }
+
+/* Macro to access the 8 ms bytes of an Xsig as a long long */
+#define XSIG_LL(x)         (*(unsigned long long *)&x.midw)
+
+
+/*
+   Need to run gcc with optimizations on to get these to
+   actually be in-line.
+   */
+
+/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result.
+   The answer is the ms word of the product. */
+/* Some versions of gcc make it difficult to stop eax from being clobbered.
+   Merely specifying that it is used doesn't work...
+ */
+static inline unsigned long mul_32_32(const unsigned long arg1,
+				      const unsigned long arg2)
+{
+  int retval;
+  asm volatile ("mull %2; movl %%edx,%%eax" \
+		:"=a" (retval) \
+		:"0" (arg1), "g" (arg2) \
+		:"dx");
+  return retval;
+}
+
+
+/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
+static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
+{
+  asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
+                "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
+                "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
+                "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n"
+                 :"=g" (*dest):"g" (dest), "g" (x2)
+                 :"ax","si","di");
+}
+
+
+/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
+/* Note: the constraints in the asm statement didn't always work properly
+   with gcc 2.5.8.  Changing from using edi to using ecx got around the
+   problem, but keep fingers crossed! */
+static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
+{
+  asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
+                "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
+                "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
+                "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
+                "jnc 0f;\n"
+		"rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
+                "movl %4,%%ecx; incl (%%ecx)\n"
+                "movl $1,%%eax; jmp 1f;\n"
+                "0: xorl %%eax,%%eax;\n"
+                "1:\n"
+		:"=g" (*exp), "=g" (*dest)
+		:"g" (dest), "g" (x2), "g" (exp)
+		:"cx","si","ax");
+}
+
+
+/* Negate (subtract from 1.0) the 12 byte Xsig */
+/* This is faster in a loop on my 386 than using the "neg" instruction. */
+static inline void negate_Xsig(Xsig *x)
+{
+  asm volatile("movl %1,%%esi;\n"
+               "xorl %%ecx,%%ecx;\n"
+               "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
+               "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
+               "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n"
+               :"=g" (*x):"g" (x):"si","ax","cx");
+}
+
+#endif /* _POLY_H */
diff --git a/arch/i386/math-emu/poly_2xm1.c b/arch/i386/math-emu/poly_2xm1.c
new file mode 100644
index 00000000000..9766ad5e974
--- /dev/null
+++ b/arch/i386/math-emu/poly_2xm1.c
@@ -0,0 +1,156 @@
+/*---------------------------------------------------------------------------+
+ |  poly_2xm1.c                                                              |
+ |                                                                           |
+ | Function to compute 2^x-1 by a polynomial approximation.                  |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "fpu_system.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+#define	HIPOWER	11
+static const unsigned long long lterms[HIPOWER] =
+{
+  0x0000000000000000LL,  /* This term done separately as 12 bytes */
+  0xf5fdeffc162c7543LL,
+  0x1c6b08d704a0bfa6LL,
+  0x0276556df749cc21LL,
+  0x002bb0ffcf14f6b8LL,
+  0x0002861225ef751cLL,
+  0x00001ffcbfcd5422LL,
+  0x00000162c005d5f1LL,
+  0x0000000da96ccb1bLL,
+  0x0000000078d1b897LL,
+  0x000000000422b029LL
+};
+
+static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
+
+/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0,
+   These numbers are 2^(1/4), 2^(1/2), and 2^(3/4)
+ */
+static const Xsig shiftterm0 = MK_XSIG(0, 0, 0);
+static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318);
+static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
+static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
+
+static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
+				     &shiftterm2, &shiftterm3 };
+
+
+/*--- poly_2xm1() -----------------------------------------------------------+
+ | Requires st(0) which is TAG_Valid and < 1.                                |
+ +---------------------------------------------------------------------------*/
+int	poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
+{
+  long int              exponent, shift;
+  unsigned long long    Xll;
+  Xsig                  accumulator, Denom, argSignif;
+  u_char                tag;
+
+  exponent = exponent16(arg);
+
+#ifdef PARANOID
+  if ( exponent >= 0 )    	/* Don't want a |number| >= 1.0 */
+    {
+      /* Number negative, too large, or not Valid. */
+      EXCEPTION(EX_INTERNAL|0x127);
+      return 1;
+    }
+#endif /* PARANOID */
+
+  argSignif.lsw = 0;
+  XSIG_LL(argSignif) = Xll = significand(arg);
+
+  if ( exponent == -1 )
+    {
+      shift = (argSignif.msw & 0x40000000) ? 3 : 2;
+      /* subtract 0.5 or 0.75 */
+      exponent -= 2;
+      XSIG_LL(argSignif) <<= 2;
+      Xll <<= 2;
+    }
+  else if ( exponent == -2 )
+    {
+      shift = 1;
+      /* subtract 0.25 */
+      exponent--;
+      XSIG_LL(argSignif) <<= 1;
+      Xll <<= 1;
+    }
+  else
+    shift = 0;
+
+  if ( exponent < -2 )
+    {
+      /* Shift the argument right by the required places. */
+      if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U )
+	Xll++;	/* round up */
+    }
+
+  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+  polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
+  mul_Xsig_Xsig(&accumulator, &argSignif);
+  shr_Xsig(&accumulator, 3);
+
+  mul_Xsig_Xsig(&argSignif, &hiterm);   /* The leading term */
+  add_two_Xsig(&accumulator, &argSignif, &exponent);
+
+  if ( shift )
+    {
+      /* The argument is large, use the identity:
+	 f(x+a) = f(a) * (f(x) + 1) - 1;
+	 */
+      shr_Xsig(&accumulator, - exponent);
+      accumulator.msw |= 0x80000000;      /* add 1.0 */
+      mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
+      accumulator.msw &= 0x3fffffff;      /* subtract 1.0 */
+      exponent = 1;
+    }
+
+  if ( sign != SIGN_POS )
+    {
+      /* The argument is negative, use the identity:
+	     f(-x) = -f(x) / (1 + f(x))
+	 */
+      Denom.lsw = accumulator.lsw;
+      XSIG_LL(Denom) = XSIG_LL(accumulator);
+      if ( exponent < 0 )
+	shr_Xsig(&Denom, - exponent);
+      else if ( exponent > 0 )
+	{
+	  /* exponent must be 1 here */
+	  XSIG_LL(Denom) <<= 1;
+	  if ( Denom.lsw & 0x80000000 )
+	    XSIG_LL(Denom) |= 1;
+	  (Denom.lsw) <<= 1;
+	}
+      Denom.msw |= 0x80000000;      /* add 1.0 */
+      div_Xsig(&accumulator, &Denom, &accumulator);
+    }
+
+  /* Convert to 64 bit signed-compatible */
+  exponent += round_Xsig(&accumulator);
+
+  result = &st(0);
+  significand(result) = XSIG_LL(accumulator);
+  setexponent16(result, exponent);
+
+  tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
+
+  setsign(result, sign);
+  FPU_settag0(tag);
+
+  return 0;
+
+}
diff --git a/arch/i386/math-emu/poly_atan.c b/arch/i386/math-emu/poly_atan.c
new file mode 100644
index 00000000000..82f702952f6
--- /dev/null
+++ b/arch/i386/math-emu/poly_atan.c
@@ -0,0 +1,229 @@
+/*---------------------------------------------------------------------------+
+ |  poly_atan.c                                                              |
+ |                                                                           |
+ | Compute the arctan of a FPU_REG, using a polynomial approximation.        |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "fpu_system.h"
+#include "status_w.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+#define	HIPOWERon	6	/* odd poly, negative terms */
+static const unsigned long long oddnegterms[HIPOWERon] =
+{
+  0x0000000000000000LL, /* Dummy (not for - 1.0) */
+  0x015328437f756467LL,
+  0x0005dda27b73dec6LL,
+  0x0000226bf2bfb91aLL,
+  0x000000ccc439c5f7LL,
+  0x0000000355438407LL
+} ;
+
+#define	HIPOWERop	6	/* odd poly, positive terms */
+static const unsigned long long oddplterms[HIPOWERop] =
+{
+/*  0xaaaaaaaaaaaaaaabLL,  transferred to fixedpterm[] */
+  0x0db55a71875c9ac2LL,
+  0x0029fce2d67880b0LL,
+  0x0000dfd3908b4596LL,
+  0x00000550fd61dab4LL,
+  0x0000001c9422b3f9LL,
+  0x000000003e3301e1LL
+};
+
+static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
+
+static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
+
+static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
+
+
+/*--- poly_atan() -----------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void	poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
+		  FPU_REG *st1_ptr, u_char st1_tag)
+{
+  u_char	transformed, inverted,
+                sign1, sign2;
+  int           exponent;
+  long int   	dummy_exp;
+  Xsig          accumulator, Numer, Denom, accumulatore, argSignif,
+                argSq, argSqSq;
+  u_char        tag;
+  
+  sign1 = getsign(st0_ptr);
+  sign2 = getsign(st1_ptr);
+  if ( st0_tag == TAG_Valid )
+    {
+      exponent = exponent(st0_ptr);
+    }
+  else
+    {
+      /* This gives non-compatible stack contents... */
+      FPU_to_exp16(st0_ptr, st0_ptr);
+      exponent = exponent16(st0_ptr);
+    }
+  if ( st1_tag == TAG_Valid )
+    {
+      exponent -= exponent(st1_ptr);
+    }
+  else
+    {
+      /* This gives non-compatible stack contents... */
+      FPU_to_exp16(st1_ptr, st1_ptr);
+      exponent -= exponent16(st1_ptr);
+    }
+
+  if ( (exponent < 0) || ((exponent == 0) &&
+			  ((st0_ptr->sigh < st1_ptr->sigh) ||
+			   ((st0_ptr->sigh == st1_ptr->sigh) &&
+			    (st0_ptr->sigl < st1_ptr->sigl))) ) )
+    {
+      inverted = 1;
+      Numer.lsw = Denom.lsw = 0;
+      XSIG_LL(Numer) = significand(st0_ptr);
+      XSIG_LL(Denom) = significand(st1_ptr);
+    }
+  else
+    {
+      inverted = 0;
+      exponent = -exponent;
+      Numer.lsw = Denom.lsw = 0;
+      XSIG_LL(Numer) = significand(st1_ptr);
+      XSIG_LL(Denom) = significand(st0_ptr);
+     }
+  div_Xsig(&Numer, &Denom, &argSignif);
+  exponent += norm_Xsig(&argSignif);
+
+  if ( (exponent >= -1)
+      || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
+    {
+      /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
+      /* Convert the argument by an identity for atan */
+      transformed = 1;
+
+      if ( exponent >= 0 )
+	{
+#ifdef PARANOID
+	  if ( !( (exponent == 0) && 
+		 (argSignif.lsw == 0) && (argSignif.midw == 0) &&
+		 (argSignif.msw == 0x80000000) ) )
+	    {
+	      EXCEPTION(EX_INTERNAL|0x104);  /* There must be a logic error */
+	      return;
+	    }
+#endif /* PARANOID */
+	  argSignif.msw = 0;   /* Make the transformed arg -> 0.0 */
+	}
+      else
+	{
+	  Numer.lsw = Denom.lsw = argSignif.lsw;
+	  XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
+
+	  if ( exponent < -1 )
+	    shr_Xsig(&Numer, -1-exponent);
+	  negate_Xsig(&Numer);
+      
+	  shr_Xsig(&Denom, -exponent);
+	  Denom.msw |= 0x80000000;
+      
+	  div_Xsig(&Numer, &Denom, &argSignif);
+
+	  exponent = -1 + norm_Xsig(&argSignif);
+	}
+    }
+  else
+    {
+      transformed = 0;
+    }
+
+  argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
+  argSq.msw = argSignif.msw;
+  mul_Xsig_Xsig(&argSq, &argSq);
+  
+  argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
+  mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+  accumulatore.lsw = argSq.lsw;
+  XSIG_LL(accumulatore) = XSIG_LL(argSq);
+
+  shr_Xsig(&argSq, 2*(-1-exponent-1));
+  shr_Xsig(&argSqSq, 4*(-1-exponent-1));
+
+  /* Now have argSq etc with binary point at the left
+     .1xxxxxxxx */
+
+  /* Do the basic fixed point polynomial evaluation */
+  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
+		   oddplterms, HIPOWERop-1);
+  mul64_Xsig(&accumulator, &XSIG_LL(argSq));
+  negate_Xsig(&accumulator);
+  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
+  negate_Xsig(&accumulator);
+  add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
+
+  mul64_Xsig(&accumulatore, &denomterm);
+  shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
+  accumulatore.msw |= 0x80000000;
+
+  div_Xsig(&accumulator, &accumulatore, &accumulator);
+
+  mul_Xsig_Xsig(&accumulator, &argSignif);
+  mul_Xsig_Xsig(&accumulator, &argSq);
+
+  shr_Xsig(&accumulator, 3);
+  negate_Xsig(&accumulator);
+  add_Xsig_Xsig(&accumulator, &argSignif);
+
+  if ( transformed )
+    {
+      /* compute pi/4 - accumulator */
+      shr_Xsig(&accumulator, -1-exponent);
+      negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &pi_signif);
+      exponent = -1;
+    }
+
+  if ( inverted )
+    {
+      /* compute pi/2 - accumulator */
+      shr_Xsig(&accumulator, -exponent);
+      negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &pi_signif);
+      exponent = 0;
+    }
+
+  if ( sign1 )
+    {
+      /* compute pi - accumulator */
+      shr_Xsig(&accumulator, 1 - exponent);
+      negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &pi_signif);
+      exponent = 1;
+    }
+
+  exponent += round_Xsig(&accumulator);
+
+  significand(st1_ptr) = XSIG_LL(accumulator);
+  setexponent16(st1_ptr, exponent);
+
+  tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
+  FPU_settagi(1, tag);
+
+  set_precision_flag_up();  /* We do not really know if up or down,
+			       use this as the default. */
+
+}
diff --git a/arch/i386/math-emu/poly_l2.c b/arch/i386/math-emu/poly_l2.c
new file mode 100644
index 00000000000..dd00e1d5b07
--- /dev/null
+++ b/arch/i386/math-emu/poly_l2.c
@@ -0,0 +1,272 @@
+/*---------------------------------------------------------------------------+
+ |  poly_l2.c                                                                |
+ |                                                                           |
+ | Compute the base 2 log of a FPU_REG, using a polynomial approximation.    |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "fpu_system.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+static void log2_kernel(FPU_REG const *arg, u_char argsign,
+			Xsig *accum_result, long int *expon);
+
+
+/*--- poly_l2() -------------------------------------------------------------+
+ |   Base 2 logarithm by a polynomial approximation.                         |
+ +---------------------------------------------------------------------------*/
+void	poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
+{
+  long int	       exponent, expon, expon_expon;
+  Xsig                 accumulator, expon_accum, yaccum;
+  u_char		       sign, argsign;
+  FPU_REG              x;
+  int                  tag;
+
+  exponent = exponent16(st0_ptr);
+
+  /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
+  if ( st0_ptr->sigh > (unsigned)0xb504f334 )
+    {
+      /* Treat as  sqrt(2)/2 < st0_ptr < 1 */
+      significand(&x) = - significand(st0_ptr);
+      setexponent16(&x, -1);
+      exponent++;
+      argsign = SIGN_NEG;
+    }
+  else
+    {
+      /* Treat as  1 <= st0_ptr < sqrt(2) */
+      x.sigh = st0_ptr->sigh - 0x80000000;
+      x.sigl = st0_ptr->sigl;
+      setexponent16(&x, 0);
+      argsign = SIGN_POS;
+    }
+  tag = FPU_normalize_nuo(&x);
+
+  if ( tag == TAG_Zero )
+    {
+      expon = 0;
+      accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+    }
+  else
+    {
+      log2_kernel(&x, argsign, &accumulator, &expon);
+    }
+
+  if ( exponent < 0 )
+    {
+      sign = SIGN_NEG;
+      exponent = -exponent;
+    }
+  else
+    sign = SIGN_POS;
+  expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
+  if ( exponent )
+    {
+      expon_expon = 31 + norm_Xsig(&expon_accum);
+      shr_Xsig(&accumulator, expon_expon - expon);
+
+      if ( sign ^ argsign )
+	negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &expon_accum);
+    }
+  else
+    {
+      expon_expon = expon;
+      sign = argsign;
+    }
+
+  yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr);
+  mul_Xsig_Xsig(&accumulator, &yaccum);
+
+  expon_expon += round_Xsig(&accumulator);
+
+  if ( accumulator.msw == 0 )
+    {
+      FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+      return;
+    }
+
+  significand(st1_ptr) = XSIG_LL(accumulator);
+  setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
+
+  tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
+  FPU_settagi(1, tag);
+
+  set_precision_flag_up();  /* 80486 appears to always do this */
+
+  return;
+
+}
+
+
+/*--- poly_l2p1() -----------------------------------------------------------+
+ |   Base 2 logarithm by a polynomial approximation.                         |
+ |   log2(x+1)                                                               |
+ +---------------------------------------------------------------------------*/
+int	poly_l2p1(u_char sign0, u_char sign1,
+		  FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest)
+{
+  u_char             	tag;
+  long int        	exponent;
+  Xsig              	accumulator, yaccum;
+
+  if ( exponent16(st0_ptr) < 0 )
+    {
+      log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
+
+      yaccum.lsw = 0;
+      XSIG_LL(yaccum) = significand(st1_ptr);
+      mul_Xsig_Xsig(&accumulator, &yaccum);
+
+      exponent += round_Xsig(&accumulator);
+
+      exponent += exponent16(st1_ptr) + 1;
+      if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER;
+
+      significand(dest) = XSIG_LL(accumulator);
+      setexponent16(dest, exponent);
+
+      tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
+      FPU_settagi(1, tag);
+
+      if ( tag == TAG_Valid )
+	set_precision_flag_up();   /* 80486 appears to always do this */
+    }
+  else
+    {
+      /* The magnitude of st0_ptr is far too large. */
+
+      if ( sign0 != SIGN_POS )
+	{
+	  /* Trying to get the log of a negative number. */
+#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
+	  changesign(st1_ptr);
+#else
+	  if ( arith_invalid(1) < 0 )
+	    return 1;
+#endif /* PECULIAR_486 */
+	}
+
+      /* 80486 appears to do this */
+      if ( sign0 == SIGN_NEG )
+	set_precision_flag_down();
+      else
+	set_precision_flag_up();
+    }
+
+  if ( exponent(dest) <= EXP_UNDER )
+    EXCEPTION(EX_Underflow);
+
+  return 0;
+
+}
+
+
+
+
+#undef HIPOWER
+#define	HIPOWER	10
+static const unsigned long long logterms[HIPOWER] =
+{
+  0x2a8eca5705fc2ef0LL,
+  0xf6384ee1d01febceLL,
+  0x093bb62877cdf642LL,
+  0x006985d8a9ec439bLL,
+  0x0005212c4f55a9c8LL,
+  0x00004326a16927f0LL,
+  0x0000038d1d80a0e7LL,
+  0x0000003141cc80c6LL,
+  0x00000002b1668c9fLL,
+  0x000000002c7a46aaLL
+};
+
+static const unsigned long leadterm = 0xb8000000;
+
+
+/*--- log2_kernel() ---------------------------------------------------------+
+ |   Base 2 logarithm by a polynomial approximation.                         |
+ |   log2(x+1)                                                               |
+ +---------------------------------------------------------------------------*/
+static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
+			long int *expon)
+{
+  long int             exponent, adj;
+  unsigned long long   Xsq;
+  Xsig                 accumulator, Numer, Denom, argSignif, arg_signif;
+
+  exponent = exponent16(arg);
+  Numer.lsw = Denom.lsw = 0;
+  XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
+  if ( argsign == SIGN_POS )
+    {
+      shr_Xsig(&Denom, 2 - (1 + exponent));
+      Denom.msw |= 0x80000000;
+      div_Xsig(&Numer, &Denom, &argSignif);
+    }
+  else
+    {
+      shr_Xsig(&Denom, 1 - (1 + exponent));
+      negate_Xsig(&Denom);
+      if ( Denom.msw & 0x80000000 )
+	{
+	  div_Xsig(&Numer, &Denom, &argSignif);
+	  exponent ++;
+	}
+      else
+	{
+	  /* Denom must be 1.0 */
+	  argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
+	  argSignif.msw = Numer.msw;
+	}
+    }
+
+#ifndef PECULIAR_486
+  /* Should check here that  |local_arg|  is within the valid range */
+  if ( exponent >= -2 )
+    {
+      if ( (exponent > -2) ||
+	  (argSignif.msw > (unsigned)0xafb0ccc0) )
+	{
+	  /* The argument is too large */
+	}
+    }
+#endif /* PECULIAR_486 */
+
+  arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
+  adj = norm_Xsig(&argSignif);
+  accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
+  mul_Xsig_Xsig(&accumulator, &accumulator);
+  shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
+  Xsq = XSIG_LL(accumulator);
+  if ( accumulator.lsw & 0x80000000 )
+    Xsq++;
+
+  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+  /* Do the basic fixed point polynomial evaluation */
+  polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
+
+  mul_Xsig_Xsig(&accumulator, &argSignif);
+  shr_Xsig(&accumulator, 6 - adj);
+
+  mul32_Xsig(&arg_signif, leadterm);
+  add_two_Xsig(&accumulator, &arg_signif, &exponent);
+
+  *expon = exponent + 1;
+  accum_result->lsw = accumulator.lsw;
+  accum_result->midw = accumulator.midw;
+  accum_result->msw = accumulator.msw;
+
+}
diff --git a/arch/i386/math-emu/poly_sin.c b/arch/i386/math-emu/poly_sin.c
new file mode 100644
index 00000000000..a36313fb06f
--- /dev/null
+++ b/arch/i386/math-emu/poly_sin.c
@@ -0,0 +1,397 @@
+/*---------------------------------------------------------------------------+
+ |  poly_sin.c                                                               |
+ |                                                                           |
+ |  Computation of an approximation of the sin function and the cosine       |
+ |  function by a polynomial.                                                |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997,1999                                    |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@melbpc.org.au                             |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "fpu_system.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+#define	N_COEFF_P	4
+#define	N_COEFF_N	4
+
+static const unsigned long long pos_terms_l[N_COEFF_P] =
+{
+  0xaaaaaaaaaaaaaaabLL,
+  0x00d00d00d00cf906LL,
+  0x000006b99159a8bbLL,
+  0x000000000d7392e6LL
+};
+
+static const unsigned long long neg_terms_l[N_COEFF_N] =
+{
+  0x2222222222222167LL,
+  0x0002e3bc74aab624LL,
+  0x0000000b09229062LL,
+  0x00000000000c7973LL
+};
+
+
+
+#define	N_COEFF_PH	4
+#define	N_COEFF_NH	4
+static const unsigned long long pos_terms_h[N_COEFF_PH] =
+{
+  0x0000000000000000LL,
+  0x05b05b05b05b0406LL,
+  0x000049f93edd91a9LL,
+  0x00000000c9c9ed62LL
+};
+
+static const unsigned long long neg_terms_h[N_COEFF_NH] =
+{
+  0xaaaaaaaaaaaaaa98LL,
+  0x001a01a01a019064LL,
+  0x0000008f76c68a77LL,
+  0x0000000000d58f5eLL
+};
+
+
+/*--- poly_sine() -----------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void	poly_sine(FPU_REG *st0_ptr)
+{
+  int                 exponent, echange;
+  Xsig                accumulator, argSqrd, argTo4;
+  unsigned long       fix_up, adj;
+  unsigned long long  fixed_arg;
+  FPU_REG	      result;
+
+  exponent = exponent(st0_ptr);
+
+  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+
+  /* Split into two ranges, for arguments below and above 1.0 */
+  /* The boundary between upper and lower is approx 0.88309101259 */
+  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
+    {
+      /* The argument is <= 0.88309101259 */
+
+      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &significand(st0_ptr));
+      shr_Xsig(&argSqrd, 2*(-1-exponent));
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+		      N_COEFF_N-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+		      N_COEFF_P-1);
+
+      shr_Xsig(&accumulator, 2);    /* Divide by four */
+      accumulator.msw |= 0x80000000;  /* Add 1.0 */
+
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+
+      /* Divide by four, FPU_REG compatible, etc */
+      exponent = 3*exponent;
+
+      /* The minimum exponent difference is 3 */
+      shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
+
+      negate_Xsig(&accumulator);
+      XSIG_LL(accumulator) += significand(st0_ptr);
+
+      echange = round_Xsig(&accumulator);
+
+      setexponentpos(&result, exponent(st0_ptr) + echange);
+    }
+  else
+    {
+      /* The argument is > 0.88309101259 */
+      /* We use sin(st(0)) = cos(pi/2-st(0)) */
+
+      fixed_arg = significand(st0_ptr);
+
+      if ( exponent == 0 )
+	{
+	  /* The argument is >= 1.0 */
+
+	  /* Put the binary point at the left. */
+	  fixed_arg <<= 1;
+	}
+      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+      /* There is a special case which arises due to rounding, to fix here. */
+      if ( fixed_arg == 0xffffffffffffffffLL )
+	fixed_arg = 0;
+
+      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &fixed_arg);
+
+      XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+		      N_COEFF_NH-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+		      N_COEFF_PH-1);
+      negate_Xsig(&accumulator);
+
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+
+      shr_Xsig(&accumulator, 3);
+      negate_Xsig(&accumulator);
+
+      add_Xsig_Xsig(&accumulator, &argSqrd);
+
+      shr_Xsig(&accumulator, 1);
+
+      accumulator.lsw |= 1;  /* A zero accumulator here would cause problems */
+      negate_Xsig(&accumulator);
+
+      /* The basic computation is complete. Now fix the answer to
+	 compensate for the error due to the approximation used for
+	 pi/2
+	 */
+
+      /* This has an exponent of -65 */
+      fix_up = 0x898cc517;
+      /* The fix-up needs to be improved for larger args */
+      if ( argSqrd.msw & 0xffc00000 )
+	{
+	  /* Get about 32 bit precision in these: */
+	  fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
+	}
+      fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
+
+      adj = accumulator.lsw;    /* temp save */
+      accumulator.lsw -= fix_up;
+      if ( accumulator.lsw > adj )
+	XSIG_LL(accumulator) --;
+
+      echange = round_Xsig(&accumulator);
+
+      setexponentpos(&result, echange - 1);
+    }
+
+  significand(&result) = XSIG_LL(accumulator);
+  setsign(&result, getsign(st0_ptr));
+  FPU_copy_to_reg0(&result, TAG_Valid);
+
+#ifdef PARANOID
+  if ( (exponent(&result) >= 0)
+      && (significand(&result) > 0x8000000000000000LL) )
+    {
+      EXCEPTION(EX_INTERNAL|0x150);
+    }
+#endif /* PARANOID */
+
+}
+
+
+
+/*--- poly_cos() ------------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void	poly_cos(FPU_REG *st0_ptr)
+{
+  FPU_REG	      result;
+  long int            exponent, exp2, echange;
+  Xsig                accumulator, argSqrd, fix_up, argTo4;
+  unsigned long long  fixed_arg;
+
+#ifdef PARANOID
+  if ( (exponent(st0_ptr) > 0)
+      || ((exponent(st0_ptr) == 0)
+	  && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
+    {
+      EXCEPTION(EX_Invalid);
+      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+      return;
+    }
+#endif /* PARANOID */
+
+  exponent = exponent(st0_ptr);
+
+  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+
+  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
+    {
+      /* arg is < 0.687705 */
+
+      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
+      argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &significand(st0_ptr));
+
+      if ( exponent < -1 )
+	{
+	  /* shift the argument right by the required places */
+	  shr_Xsig(&argSqrd, 2*(-1-exponent));
+	}
+
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+		      N_COEFF_NH-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+		      N_COEFF_PH-1);
+      negate_Xsig(&accumulator);
+
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      mul64_Xsig(&accumulator, &significand(st0_ptr));
+      shr_Xsig(&accumulator, -2*(1+exponent));
+
+      shr_Xsig(&accumulator, 3);
+      negate_Xsig(&accumulator);
+
+      add_Xsig_Xsig(&accumulator, &argSqrd);
+
+      shr_Xsig(&accumulator, 1);
+
+      /* It doesn't matter if accumulator is all zero here, the
+	 following code will work ok */
+      negate_Xsig(&accumulator);
+
+      if ( accumulator.lsw & 0x80000000 )
+	XSIG_LL(accumulator) ++;
+      if ( accumulator.msw == 0 )
+	{
+	  /* The result is 1.0 */
+	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+	  return;
+	}
+      else
+	{
+	  significand(&result) = XSIG_LL(accumulator);
+      
+	  /* will be a valid positive nr with expon = -1 */
+	  setexponentpos(&result, -1);
+	}
+    }
+  else
+    {
+      fixed_arg = significand(st0_ptr);
+
+      if ( exponent == 0 )
+	{
+	  /* The argument is >= 1.0 */
+
+	  /* Put the binary point at the left. */
+	  fixed_arg <<= 1;
+	}
+      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+      /* There is a special case which arises due to rounding, to fix here. */
+      if ( fixed_arg == 0xffffffffffffffffLL )
+	fixed_arg = 0;
+
+      exponent = -1;
+      exp2 = -1;
+
+      /* A shift is needed here only for a narrow range of arguments,
+	 i.e. for fixed_arg approx 2^-32, but we pick up more... */
+      if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
+	{
+	  fixed_arg <<= 16;
+	  exponent -= 16;
+	  exp2 -= 16;
+	}
+
+      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &fixed_arg);
+
+      if ( exponent < -1 )
+	{
+	  /* shift the argument right by the required places */
+	  shr_Xsig(&argSqrd, 2*(-1-exponent));
+	}
+
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+		      N_COEFF_N-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+		      N_COEFF_P-1);
+
+      shr_Xsig(&accumulator, 2);    /* Divide by four */
+      accumulator.msw |= 0x80000000;  /* Add 1.0 */
+
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+
+      /* Divide by four, FPU_REG compatible, etc */
+      exponent = 3*exponent;
+
+      /* The minimum exponent difference is 3 */
+      shr_Xsig(&accumulator, exp2 - exponent);
+
+      negate_Xsig(&accumulator);
+      XSIG_LL(accumulator) += fixed_arg;
+
+      /* The basic computation is complete. Now fix the answer to
+	 compensate for the error due to the approximation used for
+	 pi/2
+	 */
+
+      /* This has an exponent of -65 */
+      XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
+      fix_up.lsw = 0;
+
+      /* The fix-up needs to be improved for larger args */
+      if ( argSqrd.msw & 0xffc00000 )
+	{
+	  /* Get about 32 bit precision in these: */
+	  fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
+	  fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
+	}
+
+      exp2 += norm_Xsig(&accumulator);
+      shr_Xsig(&accumulator, 1); /* Prevent overflow */
+      exp2++;
+      shr_Xsig(&fix_up, 65 + exp2);
+
+      add_Xsig_Xsig(&accumulator, &fix_up);
+
+      echange = round_Xsig(&accumulator);
+
+      setexponentpos(&result, exp2 + echange);
+      significand(&result) = XSIG_LL(accumulator);
+    }
+
+  FPU_copy_to_reg0(&result, TAG_Valid);
+
+#ifdef PARANOID
+  if ( (exponent(&result) >= 0)
+      && (significand(&result) > 0x8000000000000000LL) )
+    {
+      EXCEPTION(EX_INTERNAL|0x151);
+    }
+#endif /* PARANOID */
+
+}
diff --git a/arch/i386/math-emu/poly_tan.c b/arch/i386/math-emu/poly_tan.c
new file mode 100644
index 00000000000..8df3e03b6e6
--- /dev/null
+++ b/arch/i386/math-emu/poly_tan.c
@@ -0,0 +1,222 @@
+/*---------------------------------------------------------------------------+
+ |  poly_tan.c                                                               |
+ |                                                                           |
+ | Compute the tan of a FPU_REG, using a polynomial approximation.           |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997,1999                                    |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@melbpc.org.au            |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "fpu_system.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+#define	HiPOWERop	3	/* odd poly, positive terms */
+static const unsigned long long oddplterm[HiPOWERop] =
+{
+  0x0000000000000000LL,
+  0x0051a1cf08fca228LL,
+  0x0000000071284ff7LL
+};
+
+#define	HiPOWERon	2	/* odd poly, negative terms */
+static const unsigned long long oddnegterm[HiPOWERon] =
+{
+   0x1291a9a184244e80LL,
+   0x0000583245819c21LL
+};
+
+#define	HiPOWERep	2	/* even poly, positive terms */
+static const unsigned long long evenplterm[HiPOWERep] =
+{
+  0x0e848884b539e888LL,
+  0x00003c7f18b887daLL
+};
+
+#define	HiPOWERen	2	/* even poly, negative terms */
+static const unsigned long long evennegterm[HiPOWERen] =
+{
+  0xf1f0200fd51569ccLL,
+  0x003afb46105c4432LL
+};
+
+static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
+
+
+/*--- poly_tan() ------------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void	poly_tan(FPU_REG *st0_ptr)
+{
+  long int    		exponent;
+  int                   invert;
+  Xsig                  argSq, argSqSq, accumulatoro, accumulatore, accum,
+                        argSignif, fix_up;
+  unsigned long         adj;
+
+  exponent = exponent(st0_ptr);
+
+#ifdef PARANOID
+  if ( signnegative(st0_ptr) )	/* Can't hack a number < 0.0 */
+    { arith_invalid(0); return; }  /* Need a positive number */
+#endif /* PARANOID */
+
+  /* Split the problem into two domains, smaller and larger than pi/4 */
+  if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) )
+    {
+      /* The argument is greater than (approx) pi/4 */
+      invert = 1;
+      accum.lsw = 0;
+      XSIG_LL(accum) = significand(st0_ptr);
+ 
+      if ( exponent == 0 )
+	{
+	  /* The argument is >= 1.0 */
+	  /* Put the binary point at the left. */
+	  XSIG_LL(accum) <<= 1;
+	}
+      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+      XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
+      /* This is a special case which arises due to rounding. */
+      if ( XSIG_LL(accum) == 0xffffffffffffffffLL )
+	{
+	  FPU_settag0(TAG_Valid);
+	  significand(st0_ptr) = 0x8a51e04daabda360LL;
+	  setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative);
+	  return;
+	}
+
+      argSignif.lsw = accum.lsw;
+      XSIG_LL(argSignif) = XSIG_LL(accum);
+      exponent = -1 + norm_Xsig(&argSignif);
+    }
+  else
+    {
+      invert = 0;
+      argSignif.lsw = 0;
+      XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
+ 
+      if ( exponent < -1 )
+	{
+	  /* shift the argument right by the required places */
+	  if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
+	    XSIG_LL(accum) ++;	/* round up */
+	}
+    }
+
+  XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
+  mul_Xsig_Xsig(&argSq, &argSq);
+  XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
+  mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+  /* Compute the negative terms for the numerator polynomial */
+  accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
+  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
+  mul_Xsig_Xsig(&accumulatoro, &argSq);
+  negate_Xsig(&accumulatoro);
+  /* Add the positive terms */
+  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
+
+  
+  /* Compute the positive terms for the denominator polynomial */
+  accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
+  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
+  mul_Xsig_Xsig(&accumulatore, &argSq);
+  negate_Xsig(&accumulatore);
+  /* Add the negative terms */
+  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
+  /* Multiply by arg^2 */
+  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+  /* de-normalize and divide by 2 */
+  shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
+  negate_Xsig(&accumulatore);      /* This does 1 - accumulator */
+
+  /* Now find the ratio. */
+  if ( accumulatore.msw == 0 )
+    {
+      /* accumulatoro must contain 1.0 here, (actually, 0) but it
+	 really doesn't matter what value we use because it will
+	 have negligible effect in later calculations
+	 */
+      XSIG_LL(accum) = 0x8000000000000000LL;
+      accum.lsw = 0;
+    }
+  else
+    {
+      div_Xsig(&accumulatoro, &accumulatore, &accum);
+    }
+
+  /* Multiply by 1/3 * arg^3 */
+  mul64_Xsig(&accum, &XSIG_LL(argSignif));
+  mul64_Xsig(&accum, &XSIG_LL(argSignif));
+  mul64_Xsig(&accum, &XSIG_LL(argSignif));
+  mul64_Xsig(&accum, &twothirds);
+  shr_Xsig(&accum, -2*(exponent+1));
+
+  /* tan(arg) = arg + accum */
+  add_two_Xsig(&accum, &argSignif, &exponent);
+
+  if ( invert )
+    {
+      /* We now have the value of tan(pi_2 - arg) where pi_2 is an
+	 approximation for pi/2
+	 */
+      /* The next step is to fix the answer to compensate for the
+	 error due to the approximation used for pi/2
+	 */
+
+      /* This is (approx) delta, the error in our approx for pi/2
+	 (see above). It has an exponent of -65
+	 */
+      XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
+      fix_up.lsw = 0;
+
+      if ( exponent == 0 )
+	adj = 0xffffffff;   /* We want approx 1.0 here, but
+			       this is close enough. */
+      else if ( exponent > -30 )
+	{
+	  adj = accum.msw >> -(exponent+1);      /* tan */
+	  adj = mul_32_32(adj, adj);             /* tan^2 */
+	}
+      else
+	adj = 0;
+      adj = mul_32_32(0x898cc517, adj);          /* delta * tan^2 */
+
+      fix_up.msw += adj;
+      if ( !(fix_up.msw & 0x80000000) )   /* did fix_up overflow ? */
+	{
+	  /* Yes, we need to add an msb */
+	  shr_Xsig(&fix_up, 1);
+	  fix_up.msw |= 0x80000000;
+	  shr_Xsig(&fix_up, 64 + exponent);
+	}
+      else
+	shr_Xsig(&fix_up, 65 + exponent);
+
+      add_two_Xsig(&accum, &fix_up, &exponent);
+
+      /* accum now contains tan(pi/2 - arg).
+	 Use tan(arg) = 1.0 / tan(pi/2 - arg)
+	 */
+      accumulatoro.lsw = accumulatoro.midw = 0;
+      accumulatoro.msw = 0x80000000;
+      div_Xsig(&accumulatoro, &accum, &accum);
+      exponent = - exponent - 1;
+    }
+
+  /* Transfer the result */
+  round_Xsig(&accum);
+  FPU_settag0(TAG_Valid);
+  significand(st0_ptr) = XSIG_LL(accum);
+  setexponent16(st0_ptr, exponent + EXTENDED_Ebias);  /* Result is positive. */
+
+}
diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S
new file mode 100644
index 00000000000..17315c89ff3
--- /dev/null
+++ b/arch/i386/math-emu/polynom_Xsig.S
@@ -0,0 +1,135 @@
+/*---------------------------------------------------------------------------+
+ |  polynomial_Xsig.S                                                        |
+ |                                                                           |
+ | Fixed point arithmetic polynomial evaluation.                             |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1995                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
+ |                        unsigned long long terms[], int n)                 |
+ |                                                                           |
+ | Computes:                                                                 |
+ | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
+ | and adds the result to the 12 byte Xsig.                                  |
+ | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
+ | precision.                                                                |
+ |                                                                           |
+ | This function must be used carefully: most overflow of intermediate       |
+ | results is controlled, but overflow of the result is not.                 |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+	.file	"polynomial_Xsig.S"
+
+#include "fpu_emu.h"
+
+
+#define	TERM_SIZE	$8
+#define	SUM_MS		-20(%ebp)	/* sum ms long */
+#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
+#define	SUM_LS		-28(%ebp)	/* sum ls long */
+#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
+#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
+#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
+#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
+
+.text
+ENTRY(polynomial_Xsig)
+	pushl	%ebp
+	movl	%esp,%ebp
+	subl	$32,%esp
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM2,%esi		/* x */
+	movl	PARAM3,%edi		/* terms */
+
+	movl	TERM_SIZE,%eax
+	mull	PARAM4			/* n */
+	addl	%eax,%edi
+
+	movl	4(%edi),%edx		/* terms[n] */
+	movl	%edx,SUM_MS
+	movl	(%edi),%edx		/* terms[n] */
+	movl	%edx,SUM_MIDDLE
+	xor	%eax,%eax
+	movl	%eax,SUM_LS
+	movb	%al,OVERFLOWED
+
+	subl	TERM_SIZE,%edi
+	decl	PARAM4
+	js	L_accum_done
+
+L_accum_loop:
+	xor	%eax,%eax
+	movl	%eax,ACCUM_MS
+	movl	%eax,ACCUM_MIDDLE
+
+	movl	SUM_MIDDLE,%eax
+	mull	(%esi)			/* x ls long */
+	movl	%edx,ACCUM_LS
+
+	movl	SUM_MIDDLE,%eax
+	mull	4(%esi)			/* x ms long */
+	addl	%eax,ACCUM_LS
+	adcl	%edx,ACCUM_MIDDLE
+	adcl	$0,ACCUM_MS
+
+	movl	SUM_MS,%eax
+	mull	(%esi)			/* x ls long */
+	addl	%eax,ACCUM_LS
+	adcl	%edx,ACCUM_MIDDLE
+	adcl	$0,ACCUM_MS
+
+	movl	SUM_MS,%eax
+	mull	4(%esi)			/* x ms long */
+	addl	%eax,ACCUM_MIDDLE
+	adcl	%edx,ACCUM_MS
+
+	testb	$0xff,OVERFLOWED
+	jz	L_no_overflow
+
+	movl	(%esi),%eax
+	addl	%eax,ACCUM_MIDDLE
+	movl	4(%esi),%eax
+	adcl	%eax,ACCUM_MS		/* This could overflow too */
+
+L_no_overflow:
+
+/*
+ * Now put the sum of next term and the accumulator
+ * into the sum register
+ */
+	movl	ACCUM_LS,%eax
+	addl	(%edi),%eax		/* term ls long */
+	movl	%eax,SUM_LS
+	movl	ACCUM_MIDDLE,%eax
+	adcl	(%edi),%eax		/* term ls long */
+	movl	%eax,SUM_MIDDLE
+	movl	ACCUM_MS,%eax
+	adcl	4(%edi),%eax		/* term ms long */
+	movl	%eax,SUM_MS
+	sbbb	%al,%al
+	movb	%al,OVERFLOWED		/* Used in the next iteration */
+
+	subl	TERM_SIZE,%edi
+	decl	PARAM4
+	jns	L_accum_loop
+
+L_accum_done:
+	movl	PARAM1,%edi		/* accum */
+	movl	SUM_LS,%eax
+	addl	%eax,(%edi)
+	movl	SUM_MIDDLE,%eax
+	adcl	%eax,4(%edi)
+	movl	SUM_MS,%eax
+	adcl	%eax,8(%edi)
+
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/reg_add_sub.c b/arch/i386/math-emu/reg_add_sub.c
new file mode 100644
index 00000000000..7cd3b37ac08
--- /dev/null
+++ b/arch/i386/math-emu/reg_add_sub.c
@@ -0,0 +1,374 @@
+/*---------------------------------------------------------------------------+
+ |  reg_add_sub.c                                                            |
+ |                                                                           |
+ | Functions to add or subtract two registers and put the result in a third. |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1997                                              |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |  For each function, the destination may be any FPU_REG, including one of  |
+ | the source FPU_REGs.                                                      |
+ |  Each function returns 0 if the answer is o.k., otherwise a non-zero      |
+ | value is returned, indicating either an exception condition or an         |
+ | internal error.                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "fpu_system.h"
+
+static
+int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
+		     FPU_REG const *b, u_char tagb, u_char signb,
+		     FPU_REG *dest, int deststnr, int control_w);
+
+/*
+  Operates on st(0) and st(n), or on st(0) and temporary data.
+  The destination must be one of the source st(x).
+  */
+int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
+{
+  FPU_REG *a = &st(0);
+  FPU_REG *dest = &st(deststnr);
+  u_char signb = getsign(b);
+  u_char taga = FPU_gettag0();
+  u_char signa = getsign(a);
+  u_char saved_sign = getsign(dest);
+  int diff, tag, expa, expb;
+  
+  if ( !(taga | tagb) )
+    {
+      expa = exponent(a);
+      expb = exponent(b);
+
+    valid_add:
+      /* Both registers are valid */
+      if (!(signa ^ signb))
+	{
+	  /* signs are the same */
+	  tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb);
+	}
+      else
+	{
+	  /* The signs are different, so do a subtraction */
+	  diff = expa - expb;
+	  if (!diff)
+	    {
+	      diff = a->sigh - b->sigh;  /* This works only if the ms bits
+					    are identical. */
+	      if (!diff)
+		{
+		  diff = a->sigl > b->sigl;
+		  if (!diff)
+		    diff = -(a->sigl < b->sigl);
+		}
+	    }
+      
+	  if (diff > 0)
+	    {
+	      tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
+	    }
+	  else if ( diff < 0 )
+	    {
+	      tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa);
+	    }
+	  else
+	    {
+	      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+	      /* sign depends upon rounding mode */
+	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+		      ? SIGN_POS : SIGN_NEG);
+	      return TAG_Zero;
+	    }
+	}
+
+      if ( tag < 0 )
+	{
+	  setsign(dest, saved_sign);
+	  return tag;
+	}
+      FPU_settagi(deststnr, tag);
+      return tag;
+    }
+
+  if ( taga == TAG_Special )
+    taga = FPU_Special(a);
+  if ( tagb == TAG_Special )
+    tagb = FPU_Special(b);
+
+  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
+    {
+      FPU_REG x, y;
+
+      if ( denormal_operand() < 0 )
+	return FPU_Exception;
+
+      FPU_to_exp16(a, &x);
+      FPU_to_exp16(b, &y);
+      a = &x;
+      b = &y;
+      expa = exponent16(a);
+      expb = exponent16(b);
+      goto valid_add;
+    }
+
+  if ( (taga == TW_NaN) || (tagb == TW_NaN) )
+    {
+      if ( deststnr == 0 )
+	return real_2op_NaN(b, tagb, deststnr, a);
+      else
+	return real_2op_NaN(a, taga, deststnr, a);
+    }
+
+  return add_sub_specials(a, taga, signa, b, tagb, signb,
+			  dest, deststnr, control_w);
+}
+
+
+/* Subtract b from a.  (a-b) -> dest */
+int FPU_sub(int flags, int rm, int control_w)
+{
+  FPU_REG const *a, *b;
+  FPU_REG *dest;
+  u_char taga, tagb, signa, signb, saved_sign, sign;
+  int diff, tag = 0, expa, expb, deststnr;
+
+  a = &st(0);
+  taga = FPU_gettag0();
+
+  deststnr = 0;
+  if ( flags & LOADED )
+    {
+      b = (FPU_REG *)rm;
+      tagb = flags & 0x0f;
+    }
+  else
+    {
+      b = &st(rm);
+      tagb = FPU_gettagi(rm);
+
+      if ( flags & DEST_RM )
+	deststnr = rm;
+    }
+
+  signa = getsign(a);
+  signb = getsign(b);
+
+  if ( flags & REV )
+    {
+      signa ^= SIGN_NEG;
+      signb ^= SIGN_NEG;
+    }
+
+  dest = &st(deststnr);
+  saved_sign = getsign(dest);
+
+  if ( !(taga | tagb) )
+    {
+      expa = exponent(a);
+      expb = exponent(b);
+
+    valid_subtract:
+      /* Both registers are valid */
+
+      diff = expa - expb;
+
+      if (!diff)
+	{
+	  diff = a->sigh - b->sigh;  /* Works only if ms bits are identical */
+	  if (!diff)
+	    {
+	      diff = a->sigl > b->sigl;
+	      if (!diff)
+		diff = -(a->sigl < b->sigl);
+	    }
+	}
+
+      switch ( (((int)signa)*2 + signb) / SIGN_NEG )
+	{
+	case 0: /* P - P */
+	case 3: /* N - N */
+	  if (diff > 0)
+	    {
+	      /* |a| > |b| */
+	      tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
+	    }
+	  else if ( diff == 0 )
+	    {
+	      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+
+	      /* sign depends upon rounding mode */
+	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+		? SIGN_POS : SIGN_NEG);
+	      return TAG_Zero;
+	    }
+	  else
+	    {
+	      sign = signa ^ SIGN_NEG;
+	      tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa);
+	    }
+	  break;
+	case 1: /* P - N */
+	  tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb);
+	  break;
+	case 2: /* N - P */
+	  tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb);
+	  break;
+#ifdef PARANOID
+	default:
+	  EXCEPTION(EX_INTERNAL|0x111);
+	  return -1;
+#endif
+	}
+      if ( tag < 0 )
+	{
+	  setsign(dest, saved_sign);
+	  return tag;
+	}
+      FPU_settagi(deststnr, tag);
+      return tag;
+    }
+
+  if ( taga == TAG_Special )
+    taga = FPU_Special(a);
+  if ( tagb == TAG_Special )
+    tagb = FPU_Special(b);
+
+  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
+    {
+      FPU_REG x, y;
+
+      if ( denormal_operand() < 0 )
+	return FPU_Exception;
+
+      FPU_to_exp16(a, &x);
+      FPU_to_exp16(b, &y);
+      a = &x;
+      b = &y;
+      expa = exponent16(a);
+      expb = exponent16(b);
+
+      goto valid_subtract;
+    }
+
+  if ( (taga == TW_NaN) || (tagb == TW_NaN) )
+    {
+      FPU_REG const *d1, *d2;
+      if ( flags & REV )
+	{
+	  d1 = b;
+	  d2 = a;
+	}
+      else
+	{
+	  d1 = a;
+	  d2 = b;
+	}
+      if ( flags & LOADED )
+	return real_2op_NaN(b, tagb, deststnr, d1);
+      if ( flags & DEST_RM )
+	return real_2op_NaN(a, taga, deststnr, d2);
+      else
+	return real_2op_NaN(b, tagb, deststnr, d2);
+    }
+
+    return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
+			    dest, deststnr, control_w);
+}
+
+
+static
+int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
+		     FPU_REG const *b, u_char tagb, u_char signb,
+		     FPU_REG *dest, int deststnr, int control_w)
+{
+  if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
+       && (denormal_operand() < 0) )
+    return FPU_Exception;
+
+  if (taga == TAG_Zero)
+    {
+      if (tagb == TAG_Zero)
+	{
+	  /* Both are zero, result will be zero. */
+	  u_char different_signs = signa ^ signb;
+
+	  FPU_copy_to_regi(a, TAG_Zero, deststnr);
+	  if ( different_signs )
+	    {
+	      /* Signs are different. */
+	      /* Sign of answer depends upon rounding mode. */
+	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+		      ? SIGN_POS : SIGN_NEG);
+	    }
+	  else
+	    setsign(dest, signa);  /* signa may differ from the sign of a. */
+	  return TAG_Zero;
+	}
+      else
+	{
+	  reg_copy(b, dest);
+	  if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) )
+	    {
+	      /* A pseudoDenormal, convert it. */
+	      addexponent(dest, 1);
+	      tagb = TAG_Valid;
+	    }
+	  else if ( tagb > TAG_Empty )
+	    tagb = TAG_Special;
+	  setsign(dest, signb);  /* signb may differ from the sign of b. */
+	  FPU_settagi(deststnr, tagb);
+	  return tagb;
+	}
+    }
+  else if (tagb == TAG_Zero)
+    {
+      reg_copy(a, dest);
+      if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) )
+	{
+	  /* A pseudoDenormal */
+	  addexponent(dest, 1);
+	  taga = TAG_Valid;
+	}
+      else if ( taga > TAG_Empty )
+	taga = TAG_Special;
+      setsign(dest, signa);  /* signa may differ from the sign of a. */
+      FPU_settagi(deststnr, taga);
+      return taga;
+    }
+  else if (taga == TW_Infinity)
+    {
+      if ( (tagb != TW_Infinity) || (signa == signb) )
+	{
+	  FPU_copy_to_regi(a, TAG_Special, deststnr);
+	  setsign(dest, signa);  /* signa may differ from the sign of a. */
+	  return taga;
+	}
+      /* Infinity-Infinity is undefined. */
+      return arith_invalid(deststnr);
+    }
+  else if (tagb == TW_Infinity)
+    {
+      FPU_copy_to_regi(b, TAG_Special, deststnr);
+      setsign(dest, signb);  /* signb may differ from the sign of b. */
+      return tagb;
+    }
+
+#ifdef PARANOID
+  EXCEPTION(EX_INTERNAL|0x101);
+#endif
+
+  return FPU_Exception;
+}
+
diff --git a/arch/i386/math-emu/reg_compare.c b/arch/i386/math-emu/reg_compare.c
new file mode 100644
index 00000000000..f37c5b5a35a
--- /dev/null
+++ b/arch/i386/math-emu/reg_compare.c
@@ -0,0 +1,381 @@
+/*---------------------------------------------------------------------------+
+ |  reg_compare.c                                                            |
+ |                                                                           |
+ | Compare two floating point registers                                      |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | compare() is the core FPU_REG comparison function                         |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "status_w.h"
+
+
+static int compare(FPU_REG const *b, int tagb)
+{
+  int diff, exp0, expb;
+  u_char	  	st0_tag;
+  FPU_REG  	*st0_ptr;
+  FPU_REG	x, y;
+  u_char		st0_sign, signb = getsign(b);
+
+  st0_ptr = &st(0);
+  st0_tag = FPU_gettag0();
+  st0_sign = getsign(st0_ptr);
+
+  if ( tagb == TAG_Special )
+    tagb = FPU_Special(b);
+  if ( st0_tag == TAG_Special )
+    st0_tag = FPU_Special(st0_ptr);
+
+  if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
+       || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) )
+    {
+      if ( st0_tag == TAG_Zero )
+	{
+	  if ( tagb == TAG_Zero ) return COMP_A_eq_B;
+	  if ( tagb == TAG_Valid )
+	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
+	  if ( tagb == TW_Denormal )
+	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+	    | COMP_Denormal;
+	}
+      else if ( tagb == TAG_Zero )
+	{
+	  if ( st0_tag == TAG_Valid )
+	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+	  if ( st0_tag == TW_Denormal )
+	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+	    | COMP_Denormal;
+	}
+
+      if ( st0_tag == TW_Infinity )
+	{
+	  if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) )
+	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+	  else if ( tagb == TW_Denormal )
+	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+	      | COMP_Denormal;
+	  else if ( tagb == TW_Infinity )
+	    {
+	      /* The 80486 book says that infinities can be equal! */
+	      return (st0_sign == signb) ? COMP_A_eq_B :
+		((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+	    }
+	  /* Fall through to the NaN code */
+	}
+      else if ( tagb == TW_Infinity )
+	{
+	  if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) )
+	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
+	  if ( st0_tag == TW_Denormal )
+	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+		| COMP_Denormal;
+	  /* Fall through to the NaN code */
+	}
+
+      /* The only possibility now should be that one of the arguments
+	 is a NaN */
+      if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) )
+	{
+	  int signalling = 0, unsupported = 0;
+	  if ( st0_tag == TW_NaN )
+	    {
+	      signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000;
+	      unsupported = !((exponent(st0_ptr) == EXP_OVER)
+			      && (st0_ptr->sigh & 0x80000000));
+	    }
+	  if ( tagb == TW_NaN )
+	    {
+	      signalling |= (b->sigh & 0xc0000000) == 0x80000000;
+	      unsupported |= !((exponent(b) == EXP_OVER)
+			       && (b->sigh & 0x80000000));
+	    }
+	  if ( signalling || unsupported )
+	    return COMP_No_Comp | COMP_SNaN | COMP_NaN;
+	  else
+	    /* Neither is a signaling NaN */
+	    return COMP_No_Comp | COMP_NaN;
+	}
+      
+      EXCEPTION(EX_Invalid);
+    }
+  
+  if (st0_sign != signb)
+    {
+      return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+	    COMP_Denormal : 0);
+    }
+
+  if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) )
+    {
+      FPU_to_exp16(st0_ptr, &x);
+      FPU_to_exp16(b, &y);
+      st0_ptr = &x;
+      b = &y;
+      exp0 = exponent16(st0_ptr);
+      expb = exponent16(b);
+    }
+  else
+    {
+      exp0 = exponent(st0_ptr);
+      expb = exponent(b);
+    }
+
+#ifdef PARANOID
+  if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
+  if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
+#endif /* PARANOID */
+
+  diff = exp0 - expb;
+  if ( diff == 0 )
+    {
+      diff = st0_ptr->sigh - b->sigh;  /* Works only if ms bits are
+					      identical */
+      if ( diff == 0 )
+	{
+	diff = st0_ptr->sigl > b->sigl;
+	if ( diff == 0 )
+	  diff = -(st0_ptr->sigl < b->sigl);
+	}
+    }
+
+  if ( diff > 0 )
+    {
+      return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+	    COMP_Denormal : 0);
+    }
+  if ( diff < 0 )
+    {
+      return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+	    COMP_Denormal : 0);
+    }
+
+  return COMP_A_eq_B
+    | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+	COMP_Denormal : 0);
+
+}
+
+
+/* This function requires that st(0) is not empty */
+int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
+{
+  int f = 0, c;
+
+  c = compare(loaded_data, loaded_tag);
+
+  if (c & COMP_NaN)
+    {
+      EXCEPTION(EX_Invalid);
+      f = SW_C3 | SW_C2 | SW_C0;
+    }
+  else
+    switch (c & 7)
+      {
+      case COMP_A_lt_B:
+	f = SW_C0;
+	break;
+      case COMP_A_eq_B:
+	f = SW_C3;
+	break;
+      case COMP_A_gt_B:
+	f = 0;
+	break;
+      case COMP_No_Comp:
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#ifdef PARANOID
+      default:
+	EXCEPTION(EX_INTERNAL|0x121);
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#endif /* PARANOID */
+      }
+  setcc(f);
+  if (c & COMP_Denormal)
+    {
+      return denormal_operand() < 0;
+    }
+  return 0;
+}
+
+
+static int compare_st_st(int nr)
+{
+  int f = 0, c;
+  FPU_REG *st_ptr;
+
+  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
+    {
+      setcc(SW_C3 | SW_C2 | SW_C0);
+      /* Stack fault */
+      EXCEPTION(EX_StackUnder);
+      return !(control_word & CW_Invalid);
+    }
+
+  st_ptr = &st(nr);
+  c = compare(st_ptr, FPU_gettagi(nr));
+  if (c & COMP_NaN)
+    {
+      setcc(SW_C3 | SW_C2 | SW_C0);
+      EXCEPTION(EX_Invalid);
+      return !(control_word & CW_Invalid);
+    }
+  else
+    switch (c & 7)
+      {
+      case COMP_A_lt_B:
+	f = SW_C0;
+	break;
+      case COMP_A_eq_B:
+	f = SW_C3;
+	break;
+      case COMP_A_gt_B:
+	f = 0;
+	break;
+      case COMP_No_Comp:
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#ifdef PARANOID
+      default:
+	EXCEPTION(EX_INTERNAL|0x122);
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#endif /* PARANOID */
+      }
+  setcc(f);
+  if (c & COMP_Denormal)
+    {
+      return denormal_operand() < 0;
+    }
+  return 0;
+}
+
+
+static int compare_u_st_st(int nr)
+{
+  int f = 0, c;
+  FPU_REG *st_ptr;
+
+  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
+    {
+      setcc(SW_C3 | SW_C2 | SW_C0);
+      /* Stack fault */
+      EXCEPTION(EX_StackUnder);
+      return !(control_word & CW_Invalid);
+    }
+
+  st_ptr = &st(nr);
+  c = compare(st_ptr, FPU_gettagi(nr));
+  if (c & COMP_NaN)
+    {
+      setcc(SW_C3 | SW_C2 | SW_C0);
+      if (c & COMP_SNaN)       /* This is the only difference between
+				  un-ordered and ordinary comparisons */
+	{
+	  EXCEPTION(EX_Invalid);
+	  return !(control_word & CW_Invalid);
+	}
+      return 0;
+    }
+  else
+    switch (c & 7)
+      {
+      case COMP_A_lt_B:
+	f = SW_C0;
+	break;
+      case COMP_A_eq_B:
+	f = SW_C3;
+	break;
+      case COMP_A_gt_B:
+	f = 0;
+	break;
+      case COMP_No_Comp:
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#ifdef PARANOID
+      default:
+	EXCEPTION(EX_INTERNAL|0x123);
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#endif /* PARANOID */ 
+      }
+  setcc(f);
+  if (c & COMP_Denormal)
+    {
+      return denormal_operand() < 0;
+    }
+  return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+void fcom_st(void)
+{
+  /* fcom st(i) */
+  compare_st_st(FPU_rm);
+}
+
+
+void fcompst(void)
+{
+  /* fcomp st(i) */
+  if ( !compare_st_st(FPU_rm) )
+    FPU_pop();
+}
+
+
+void fcompp(void)
+{
+  /* fcompp */
+  if (FPU_rm != 1)
+    {
+      FPU_illegal();
+      return;
+    }
+  if ( !compare_st_st(1) )
+      poppop();
+}
+
+
+void fucom_(void)
+{
+  /* fucom st(i) */
+  compare_u_st_st(FPU_rm);
+
+}
+
+
+void fucomp(void)
+{
+  /* fucomp st(i) */
+  if ( !compare_u_st_st(FPU_rm) )
+    FPU_pop();
+}
+
+
+void fucompp(void)
+{
+  /* fucompp */
+  if (FPU_rm == 1)
+    {
+      if ( !compare_u_st_st(1) )
+	poppop();
+    }
+  else
+    FPU_illegal();
+}
diff --git a/arch/i386/math-emu/reg_constant.c b/arch/i386/math-emu/reg_constant.c
new file mode 100644
index 00000000000..a8501580196
--- /dev/null
+++ b/arch/i386/math-emu/reg_constant.c
@@ -0,0 +1,120 @@
+/*---------------------------------------------------------------------------+
+ |  reg_constant.c                                                           |
+ |                                                                           |
+ | All of the constant FPU_REGs                                              |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1997                                         |
+ |                     W. Metzenthen, 22 Parker St, Ormond, Vic 3163,        |
+ |                     Australia.  E-mail   billm@suburbia.net               |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "reg_constant.h"
+#include "control_w.h"
+
+
+#define MAKE_REG(s,e,l,h) { l, h, \
+                            ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
+
+FPU_REG const CONST_1    = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
+#if 0
+FPU_REG const CONST_2    = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
+FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000);
+#endif  /*  0  */
+static FPU_REG const CONST_L2T  = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
+static FPU_REG const CONST_L2E  = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
+FPU_REG const CONST_PI   = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
+FPU_REG const CONST_PI2  = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
+FPU_REG const CONST_PI4  = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
+static FPU_REG const CONST_LG2  = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
+static FPU_REG const CONST_LN2  = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
+
+/* Extra bits to take pi/2 to more than 128 bits precision. */
+FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
+					 0xfc8f8cbb, 0xece675d1);
+
+/* Only the sign (and tag) is used in internal zeroes */
+FPU_REG const CONST_Z    = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
+
+/* Only the sign and significand (and tag) are used in internal NaNs */
+/* The 80486 never generates one of these 
+FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000);
+ */
+/* This is the real indefinite QNaN */
+FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
+
+/* Only the sign (and tag) is used in internal infinities */
+FPU_REG const CONST_INF  = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
+
+
+static void fld_const(FPU_REG const *c, int adj, u_char tag)
+{
+  FPU_REG *st_new_ptr;
+
+  if ( STACK_OVERFLOW )
+    {
+      FPU_stack_overflow();
+      return;
+    }
+  push();
+  reg_copy(c, st_new_ptr);
+  st_new_ptr->sigl += adj;  /* For all our fldxxx constants, we don't need to
+			       borrow or carry. */
+  FPU_settag0(tag);
+  clear_C1();
+}
+
+/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
+   (and not one of RC_RND or RC_UP).
+   */
+#define DOWN_OR_CHOP(x)  (x & RC_DOWN)
+
+static void fld1(int rc)
+{
+  fld_const(&CONST_1, 0, TAG_Valid);
+}
+
+static void fldl2t(int rc)
+{
+  fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
+}
+
+static void fldl2e(int rc)
+{
+  fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+}
+
+static void fldpi(int rc)
+{
+  fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+}
+
+static void fldlg2(int rc)
+{
+  fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+}
+
+static void fldln2(int rc)
+{
+  fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+}
+
+static void fldz(int rc)
+{
+  fld_const(&CONST_Z, 0, TAG_Zero);
+}
+
+typedef void (*FUNC_RC)(int);
+
+static FUNC_RC constants_table[] = {
+  fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
+};
+
+void fconst(void)
+{
+  (constants_table[FPU_rm])(control_word & CW_RC);
+}
diff --git a/arch/i386/math-emu/reg_constant.h b/arch/i386/math-emu/reg_constant.h
new file mode 100644
index 00000000000..1bffaec3a13
--- /dev/null
+++ b/arch/i386/math-emu/reg_constant.h
@@ -0,0 +1,25 @@
+/*---------------------------------------------------------------------------+
+ |  reg_constant.h                                                           |
+ |                                                                           |
+ | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _REG_CONSTANT_H_
+#define _REG_CONSTANT_H_
+
+#include "fpu_emu.h"
+
+extern FPU_REG const CONST_1;
+extern FPU_REG const CONST_PI;
+extern FPU_REG const CONST_PI2;
+extern FPU_REG const CONST_PI2extra;
+extern FPU_REG const CONST_PI4;
+extern FPU_REG const CONST_Z;
+extern FPU_REG const CONST_PINF;
+extern FPU_REG const CONST_INF;
+extern FPU_REG const CONST_MINF;
+extern FPU_REG const CONST_QNaN;
+
+#endif /* _REG_CONSTANT_H_ */
diff --git a/arch/i386/math-emu/reg_convert.c b/arch/i386/math-emu/reg_convert.c
new file mode 100644
index 00000000000..45a25875270
--- /dev/null
+++ b/arch/i386/math-emu/reg_convert.c
@@ -0,0 +1,53 @@
+/*---------------------------------------------------------------------------+
+ |  reg_convert.c                                                            |
+ |                                                                           |
+ |  Convert register representation.                                         |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1996,1997                                    |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_emu.h"
+
+
+int FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
+{
+  int sign = getsign(a);
+
+  *(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
+
+  /* Set up the exponent as a 16 bit quantity. */
+  setexponent16(x, exponent(a));
+
+  if ( exponent16(x) == EXP_UNDER )
+    {
+      /* The number is a de-normal or pseudodenormal. */
+      /* We only deal with the significand and exponent. */
+
+      if (x->sigh & 0x80000000)
+	{
+	  /* Is a pseudodenormal. */
+	  /* This is non-80486 behaviour because the number
+	     loses its 'denormal' identity. */
+	  addexponent(x, 1);
+	}
+      else
+	{
+	  /* Is a denormal. */
+	  addexponent(x, 1);
+	  FPU_normalize_nuo(x);
+	}
+    }
+
+  if ( !(x->sigh & 0x80000000) )
+    {
+      EXCEPTION(EX_INTERNAL | 0x180);
+    }
+
+  return sign;
+}
+
diff --git a/arch/i386/math-emu/reg_divide.c b/arch/i386/math-emu/reg_divide.c
new file mode 100644
index 00000000000..5cee7ff920d
--- /dev/null
+++ b/arch/i386/math-emu/reg_divide.c
@@ -0,0 +1,207 @@
+/*---------------------------------------------------------------------------+
+ |  reg_divide.c                                                             |
+ |                                                                           |
+ | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
+ |                                                                           |
+ | Copyright (C) 1996                                                        |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@jacobi.maths.monash.edu.au                |
+ |                                                                           |
+ |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
+ |    one was raised, or -1 on internal error.                               |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | The destination may be any FPU_REG, including one of the source FPU_REGs. |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "fpu_system.h"
+
+/*
+  Divide one register by another and put the result into a third register.
+  */
+int FPU_div(int flags, int rm, int control_w)
+{
+  FPU_REG x, y;
+  FPU_REG const *a, *b, *st0_ptr, *st_ptr;
+  FPU_REG *dest;
+  u_char taga, tagb, signa, signb, sign, saved_sign;
+  int tag, deststnr;
+
+  if ( flags & DEST_RM )
+    deststnr = rm;
+  else
+    deststnr = 0;
+
+  if ( flags & REV )
+    {
+      b = &st(0);
+      st0_ptr = b;
+      tagb = FPU_gettag0();
+      if ( flags & LOADED )
+	{
+	  a = (FPU_REG *)rm;
+	  taga = flags & 0x0f;
+	}
+      else
+	{
+	  a = &st(rm);
+	  st_ptr = a;
+	  taga = FPU_gettagi(rm);
+	}
+    }
+  else
+    {
+      a = &st(0);
+      st0_ptr = a;
+      taga = FPU_gettag0();
+      if ( flags & LOADED )
+	{
+	  b = (FPU_REG *)rm;
+	  tagb = flags & 0x0f;
+	}
+      else
+	{
+	  b = &st(rm);
+	  st_ptr = b;
+	  tagb = FPU_gettagi(rm);
+	}
+    }
+
+  signa = getsign(a);
+  signb = getsign(b);
+
+  sign = signa ^ signb;
+
+  dest = &st(deststnr);
+  saved_sign = getsign(dest);
+
+  if ( !(taga | tagb) )
+    {
+      /* Both regs Valid, this should be the most common case. */
+      reg_copy(a, &x);
+      reg_copy(b, &y);
+      setpositive(&x);
+      setpositive(&y);
+      tag = FPU_u_div(&x, &y, dest, control_w, sign);
+
+      if ( tag < 0 )
+	return tag;
+
+      FPU_settagi(deststnr, tag);
+      return tag;
+    }
+
+  if ( taga == TAG_Special )
+    taga = FPU_Special(a);
+  if ( tagb == TAG_Special )
+    tagb = FPU_Special(b);
+
+  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
+    {
+      if ( denormal_operand() < 0 )
+	return FPU_Exception;
+
+      FPU_to_exp16(a, &x);
+      FPU_to_exp16(b, &y);
+      tag = FPU_u_div(&x, &y, dest, control_w, sign);
+      if ( tag < 0 )
+	return tag;
+
+      FPU_settagi(deststnr, tag);
+      return tag;
+    }
+  else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
+    {
+      if ( tagb != TAG_Zero )
+	{
+	  /* Want to find Zero/Valid */
+	  if ( tagb == TW_Denormal )
+	    {
+	      if ( denormal_operand() < 0 )
+		return FPU_Exception;
+	    }
+
+	  /* The result is zero. */
+	  FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+	  setsign(dest, sign);
+	  return TAG_Zero;
+	}
+      /* We have an exception condition, either 0/0 or Valid/Zero. */
+      if ( taga == TAG_Zero )
+	{
+	  /* 0/0 */
+	  return arith_invalid(deststnr);
+	}
+      /* Valid/Zero */
+      return FPU_divide_by_zero(deststnr, sign);
+    }
+  /* Must have infinities, NaNs, etc */
+  else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
+    {
+      if ( flags & LOADED )
+	return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr);
+
+      if ( flags & DEST_RM )
+	{
+	  int tag;
+	  tag = FPU_gettag0();
+	  if ( tag == TAG_Special )
+	    tag = FPU_Special(st0_ptr);
+	  return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm));
+	}
+      else
+	{
+	  int tag;
+	  tag = FPU_gettagi(rm);
+	  if ( tag == TAG_Special )
+	    tag = FPU_Special(&st(rm));
+	  return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm));
+	}
+    }
+  else if (taga == TW_Infinity)
+    {
+      if (tagb == TW_Infinity)
+	{
+	  /* infinity/infinity */
+	  return arith_invalid(deststnr);
+	}
+      else
+	{
+	  /* tagb must be Valid or Zero */
+	  if ( (tagb == TW_Denormal) && (denormal_operand() < 0) )
+	    return FPU_Exception;
+	  
+	  /* Infinity divided by Zero or Valid does
+	     not raise and exception, but returns Infinity */
+	  FPU_copy_to_regi(a, TAG_Special, deststnr);
+	  setsign(dest, sign);
+	  return taga;
+	}
+    }
+  else if (tagb == TW_Infinity)
+    {
+      if ( (taga == TW_Denormal) && (denormal_operand() < 0) )
+	return FPU_Exception;
+
+      /* The result is zero. */
+      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+      setsign(dest, sign);
+      return TAG_Zero;
+    }
+#ifdef PARANOID
+  else
+    {
+      EXCEPTION(EX_INTERNAL|0x102);
+      return FPU_Exception;
+    }
+#endif /* PARANOID */ 
+
+	return 0;
+}
diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c
new file mode 100644
index 00000000000..f06ed41d191
--- /dev/null
+++ b/arch/i386/math-emu/reg_ld_str.c
@@ -0,0 +1,1370 @@
+/*---------------------------------------------------------------------------+
+ |  reg_ld_str.c                                                             |
+ |                                                                           |
+ | All of the functions which transfer data between user memory and FPU_REGs.|
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1996,1997                                    |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_emu.h"
+
+#include <asm/uaccess.h>
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "reg_constant.h"
+#include "control_w.h"
+#include "status_w.h"
+
+
+#define DOUBLE_Emax 1023         /* largest valid exponent */
+#define DOUBLE_Ebias 1023
+#define DOUBLE_Emin (-1022)      /* smallest valid exponent */
+
+#define SINGLE_Emax 127          /* largest valid exponent */
+#define SINGLE_Ebias 127
+#define SINGLE_Emin (-126)       /* smallest valid exponent */
+
+
+static u_char normalize_no_excep(FPU_REG *r, int exp, int sign)
+{
+  u_char tag;
+
+  setexponent16(r, exp);
+
+  tag = FPU_normalize_nuo(r);
+  stdexp(r);
+  if ( sign )
+    setnegative(r);
+
+  return tag;
+}
+
+
+int FPU_tagof(FPU_REG *ptr)
+{
+  int exp;
+
+  exp = exponent16(ptr) & 0x7fff;
+  if ( exp == 0 )
+    {
+      if ( !(ptr->sigh | ptr->sigl) )
+	{
+	  return TAG_Zero;
+	}
+      /* The number is a de-normal or pseudodenormal. */
+      return TAG_Special;
+    }
+
+  if ( exp == 0x7fff )
+    {
+      /* Is an Infinity, a NaN, or an unsupported data type. */
+      return TAG_Special;
+    }
+
+  if ( !(ptr->sigh & 0x80000000) )
+    {
+      /* Unsupported data type. */
+      /* Valid numbers have the ms bit set to 1. */
+      /* Unnormal. */
+      return TAG_Special;
+    }
+
+  return TAG_Valid;
+}
+
+
+/* Get a long double from user memory */
+int FPU_load_extended(long double __user *s, int stnr)
+{
+  FPU_REG *sti_ptr = &st(stnr);
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ, s, 10);
+  __copy_from_user(sti_ptr, s, 10);
+  RE_ENTRANT_CHECK_ON;
+
+  return FPU_tagof(sti_ptr);
+}
+
+
+/* Get a double from user memory */
+int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data)
+{
+  int exp, tag, negative;
+  unsigned m64, l64;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ, dfloat, 8);
+  FPU_get_user(m64, 1 + (unsigned long __user *) dfloat);
+  FPU_get_user(l64, (unsigned long __user *) dfloat);
+  RE_ENTRANT_CHECK_ON;
+
+  negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
+  exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
+  m64 &= 0xfffff;
+  if ( exp > DOUBLE_Emax + EXTENDED_Ebias )
+    {
+      /* Infinity or NaN */
+      if ((m64 == 0) && (l64 == 0))
+	{
+	  /* +- infinity */
+	  loaded_data->sigh = 0x80000000;
+	  loaded_data->sigl = 0x00000000;
+	  exp = EXP_Infinity + EXTENDED_Ebias;
+	  tag = TAG_Special;
+	}
+      else
+	{
+	  /* Must be a signaling or quiet NaN */
+	  exp = EXP_NaN + EXTENDED_Ebias;
+	  loaded_data->sigh = (m64 << 11) | 0x80000000;
+	  loaded_data->sigh |= l64 >> 21;
+	  loaded_data->sigl = l64 << 11;
+	  tag = TAG_Special;    /* The calling function must look for NaNs */
+	}
+    }
+  else if ( exp < DOUBLE_Emin + EXTENDED_Ebias )
+    {
+      /* Zero or de-normal */
+      if ((m64 == 0) && (l64 == 0))
+	{
+	  /* Zero */
+	  reg_copy(&CONST_Z, loaded_data);
+	  exp = 0;
+	  tag = TAG_Zero;
+	}
+      else
+	{
+	  /* De-normal */
+	  loaded_data->sigh = m64 << 11;
+	  loaded_data->sigh |= l64 >> 21;
+	  loaded_data->sigl = l64 << 11;
+
+	  return normalize_no_excep(loaded_data, DOUBLE_Emin, negative)
+	    | (denormal_operand() < 0 ? FPU_Exception : 0);
+	}
+    }
+  else
+    {
+      loaded_data->sigh = (m64 << 11) | 0x80000000;
+      loaded_data->sigh |= l64 >> 21;
+      loaded_data->sigl = l64 << 11;
+
+      tag = TAG_Valid;
+    }
+
+  setexponent16(loaded_data, exp | negative);
+
+  return tag;
+}
+
+
+/* Get a float from user memory */
+int FPU_load_single(float __user *single, FPU_REG *loaded_data)
+{
+  unsigned m32;
+  int exp, tag, negative;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ, single, 4);
+  FPU_get_user(m32, (unsigned long __user *) single);
+  RE_ENTRANT_CHECK_ON;
+
+  negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
+
+  if (!(m32 & 0x7fffffff))
+    {
+      /* Zero */
+      reg_copy(&CONST_Z, loaded_data);
+      addexponent(loaded_data, negative);
+      return TAG_Zero;
+    }
+  exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
+  m32 = (m32 & 0x7fffff) << 8;
+  if ( exp < SINGLE_Emin + EXTENDED_Ebias )
+    {
+      /* De-normals */
+      loaded_data->sigh = m32;
+      loaded_data->sigl = 0;
+
+      return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
+	| (denormal_operand() < 0 ? FPU_Exception : 0);
+    }
+  else if ( exp > SINGLE_Emax + EXTENDED_Ebias )
+    {
+    /* Infinity or NaN */
+      if ( m32 == 0 )
+	{
+	  /* +- infinity */
+	  loaded_data->sigh = 0x80000000;
+	  loaded_data->sigl = 0x00000000;
+	  exp = EXP_Infinity + EXTENDED_Ebias;
+	  tag = TAG_Special;
+	}
+      else
+	{
+	  /* Must be a signaling or quiet NaN */
+	  exp = EXP_NaN + EXTENDED_Ebias;
+	  loaded_data->sigh = m32 | 0x80000000;
+	  loaded_data->sigl = 0;
+	  tag = TAG_Special;  /* The calling function must look for NaNs */
+	}
+    }
+  else
+    {
+      loaded_data->sigh = m32 | 0x80000000;
+      loaded_data->sigl = 0;
+      tag = TAG_Valid;
+    }
+
+  setexponent16(loaded_data, exp | negative);  /* Set the sign. */
+
+  return tag;
+}
+
+
+/* Get a long long from user memory */
+int FPU_load_int64(long long __user *_s)
+{
+  long long s;
+  int sign;
+  FPU_REG *st0_ptr = &st(0);
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ, _s, 8);
+  copy_from_user(&s,_s,8);
+  RE_ENTRANT_CHECK_ON;
+
+  if (s == 0)
+    {
+      reg_copy(&CONST_Z, st0_ptr);
+      return TAG_Zero;
+    }
+
+  if (s > 0)
+    sign = SIGN_Positive;
+  else
+  {
+    s = -s;
+    sign = SIGN_Negative;
+  }
+
+  significand(st0_ptr) = s;
+
+  return normalize_no_excep(st0_ptr, 63, sign);
+}
+
+
+/* Get a long from user memory */
+int FPU_load_int32(long __user *_s, FPU_REG *loaded_data)
+{
+  long s;
+  int negative;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ, _s, 4);
+  FPU_get_user(s, _s);
+  RE_ENTRANT_CHECK_ON;
+
+  if (s == 0)
+    { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
+
+  if (s > 0)
+    negative = SIGN_Positive;
+  else
+    {
+      s = -s;
+      negative = SIGN_Negative;
+    }
+
+  loaded_data->sigh = s;
+  loaded_data->sigl = 0;
+
+  return normalize_no_excep(loaded_data, 31, negative);
+}
+
+
+/* Get a short from user memory */
+int FPU_load_int16(short __user *_s, FPU_REG *loaded_data)
+{
+  int s, negative;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ, _s, 2);
+  /* Cast as short to get the sign extended. */
+  FPU_get_user(s, _s);
+  RE_ENTRANT_CHECK_ON;
+
+  if (s == 0)
+    { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
+
+  if (s > 0)
+    negative = SIGN_Positive;
+  else
+    {
+      s = -s;
+      negative = SIGN_Negative;
+    }
+
+  loaded_data->sigh = s << 16;
+  loaded_data->sigl = 0;
+
+  return normalize_no_excep(loaded_data, 15, negative);
+}
+
+
+/* Get a packed bcd array from user memory */
+int FPU_load_bcd(u_char __user *s)
+{
+  FPU_REG *st0_ptr = &st(0);
+  int pos;
+  u_char bcd;
+  long long l=0;
+  int sign;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ, s, 10);
+  RE_ENTRANT_CHECK_ON;
+  for ( pos = 8; pos >= 0; pos--)
+    {
+      l *= 10;
+      RE_ENTRANT_CHECK_OFF;
+      FPU_get_user(bcd, s+pos);
+      RE_ENTRANT_CHECK_ON;
+      l += bcd >> 4;
+      l *= 10;
+      l += bcd & 0x0f;
+    }
+ 
+  RE_ENTRANT_CHECK_OFF;
+  FPU_get_user(sign, s+9);
+  sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
+  RE_ENTRANT_CHECK_ON;
+
+  if ( l == 0 )
+    {
+      reg_copy(&CONST_Z, st0_ptr);
+      addexponent(st0_ptr, sign);   /* Set the sign. */
+      return TAG_Zero;
+    }
+  else
+    {
+      significand(st0_ptr) = l;
+      return normalize_no_excep(st0_ptr, 63, sign);
+    }
+}
+
+/*===========================================================================*/
+
+/* Put a long double into user memory */
+int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d)
+{
+  /*
+    The only exception raised by an attempt to store to an
+    extended format is the Invalid Stack exception, i.e.
+    attempting to store from an empty register.
+   */
+
+  if ( st0_tag != TAG_Empty )
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_WRITE, d, 10);
+
+      FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d);
+      FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4));
+      FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8));
+      RE_ENTRANT_CHECK_ON;
+
+      return 1;
+    }
+
+  /* Empty register (stack underflow) */
+  EXCEPTION(EX_StackUnder);
+  if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      /* Put out the QNaN indefinite */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_WRITE,d,10);
+      FPU_put_user(0, (unsigned long __user *) d);
+      FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d);
+      FPU_put_user(0xffff, 4 + (short __user *) d);
+      RE_ENTRANT_CHECK_ON;
+      return 1;
+    }
+  else
+    return 0;
+
+}
+
+
+/* Put a double into user memory */
+int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
+{
+  unsigned long l[2];
+  unsigned long increment = 0;	/* avoid gcc warnings */
+  int precision_loss;
+  int exp;
+  FPU_REG tmp;
+
+  if ( st0_tag == TAG_Valid )
+    {
+      reg_copy(st0_ptr, &tmp);
+      exp = exponent(&tmp);
+
+      if ( exp < DOUBLE_Emin )     /* It may be a denormal */
+	{
+	  addexponent(&tmp, -DOUBLE_Emin + 52);  /* largest exp to be 51 */
+
+	denormal_arg:
+
+	  if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
+	    {
+#ifdef PECULIAR_486
+	      /* Did it round to a non-denormal ? */
+	      /* This behaviour might be regarded as peculiar, it appears
+		 that the 80486 rounds to the dest precision, then
+		 converts to decide underflow. */
+	      if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) &&
+		  (st0_ptr->sigl & 0x000007ff)) )
+#endif /* PECULIAR_486 */
+		{
+		  EXCEPTION(EX_Underflow);
+		  /* This is a special case: see sec 16.2.5.1 of
+		     the 80486 book */
+		  if ( !(control_word & CW_Underflow) )
+		    return 0;
+		}
+	      EXCEPTION(precision_loss);
+	      if ( !(control_word & CW_Precision) )
+		return 0;
+	    }
+	  l[0] = tmp.sigl;
+	  l[1] = tmp.sigh;
+	}
+      else
+	{
+	  if ( tmp.sigl & 0x000007ff )
+	    {
+	      precision_loss = 1;
+	      switch (control_word & CW_RC)
+		{
+		case RC_RND:
+		  /* Rounding can get a little messy.. */
+		  increment = ((tmp.sigl & 0x7ff) > 0x400) |  /* nearest */
+		    ((tmp.sigl & 0xc00) == 0xc00);            /* odd -> even */
+		  break;
+		case RC_DOWN:   /* towards -infinity */
+		  increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff;
+		  break;
+		case RC_UP:     /* towards +infinity */
+		  increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0;
+		  break;
+		case RC_CHOP:
+		  increment = 0;
+		  break;
+		}
+	  
+	      /* Truncate the mantissa */
+	      tmp.sigl &= 0xfffff800;
+	  
+	      if ( increment )
+		{
+		  if ( tmp.sigl >= 0xfffff800 )
+		    {
+		      /* the sigl part overflows */
+		      if ( tmp.sigh == 0xffffffff )
+			{
+			  /* The sigh part overflows */
+			  tmp.sigh = 0x80000000;
+			  exp++;
+			  if (exp >= EXP_OVER)
+			    goto overflow;
+			}
+		      else
+			{
+			  tmp.sigh ++;
+			}
+		      tmp.sigl = 0x00000000;
+		    }
+		  else
+		    {
+		      /* We only need to increment sigl */
+		      tmp.sigl += 0x00000800;
+		    }
+		}
+	    }
+	  else
+	    precision_loss = 0;
+	  
+	  l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
+	  l[1] = ((tmp.sigh >> 11) & 0xfffff);
+
+	  if ( exp > DOUBLE_Emax )
+	    {
+	    overflow:
+	      EXCEPTION(EX_Overflow);
+	      if ( !(control_word & CW_Overflow) )
+		return 0;
+	      set_precision_flag_up();
+	      if ( !(control_word & CW_Precision) )
+		return 0;
+
+	      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      /* Overflow to infinity */
+	      l[0] = 0x00000000;	/* Set to */
+	      l[1] = 0x7ff00000;	/* + INF */
+	    }
+	  else
+	    {
+	      if ( precision_loss )
+		{
+		  if ( increment )
+		    set_precision_flag_up();
+		  else
+		    set_precision_flag_down();
+		}
+	      /* Add the exponent */
+	      l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20);
+	    }
+	}
+    }
+  else if (st0_tag == TAG_Zero)
+    {
+      /* Number is zero */
+      l[0] = 0;
+      l[1] = 0;
+    }
+  else if ( st0_tag == TAG_Special )
+    {
+      st0_tag = FPU_Special(st0_ptr);
+      if ( st0_tag == TW_Denormal )
+	{
+	  /* A denormal will always underflow. */
+#ifndef PECULIAR_486
+	  /* An 80486 is supposed to be able to generate
+	     a denormal exception here, but... */
+	  /* Underflow has priority. */
+	  if ( control_word & CW_Underflow )
+	    denormal_operand();
+#endif /* PECULIAR_486 */
+	  reg_copy(st0_ptr, &tmp);
+	  goto denormal_arg;
+	}
+      else if (st0_tag == TW_Infinity)
+	{
+	  l[0] = 0;
+	  l[1] = 0x7ff00000;
+	}
+      else if (st0_tag == TW_NaN)
+	{
+	  /* Is it really a NaN ? */
+	  if ( (exponent(st0_ptr) == EXP_OVER)
+	       && (st0_ptr->sigh & 0x80000000) )
+	    {
+	      /* See if we can get a valid NaN from the FPU_REG */
+	      l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21);
+	      l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
+	      if ( !(st0_ptr->sigh & 0x40000000) )
+		{
+		  /* It is a signalling NaN */
+		  EXCEPTION(EX_Invalid);
+		  if ( !(control_word & CW_Invalid) )
+		    return 0;
+		  l[1] |= (0x40000000 >> 11);
+		}
+	      l[1] |= 0x7ff00000;
+	    }
+	  else
+	    {
+	      /* It is an unsupported data type */
+	      EXCEPTION(EX_Invalid);
+	      if ( !(control_word & CW_Invalid) )
+		return 0;
+	      l[0] = 0;
+	      l[1] = 0xfff80000;
+	    }
+	}
+    }
+  else if ( st0_tag == TAG_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      if ( control_word & CW_Invalid )
+	{
+	  /* The masked response */
+	  /* Put out the QNaN indefinite */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_access_ok(VERIFY_WRITE,dfloat,8);
+	  FPU_put_user(0, (unsigned long __user *) dfloat);
+	  FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat);
+	  RE_ENTRANT_CHECK_ON;
+	  return 1;
+	}
+      else
+	return 0;
+    }
+  if ( getsign(st0_ptr) )
+    l[1] |= 0x80000000;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_WRITE,dfloat,8);
+  FPU_put_user(l[0], (unsigned long __user *)dfloat);
+  FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a float into user memory */
+int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single)
+{
+  long templ = 0;
+  unsigned long increment = 0;     	/* avoid gcc warnings */
+  int precision_loss;
+  int exp;
+  FPU_REG tmp;
+
+  if ( st0_tag == TAG_Valid )
+    {
+
+      reg_copy(st0_ptr, &tmp);
+      exp = exponent(&tmp);
+
+      if ( exp < SINGLE_Emin )
+	{
+	  addexponent(&tmp, -SINGLE_Emin + 23);  /* largest exp to be 22 */
+
+	denormal_arg:
+
+	  if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
+	    {
+#ifdef PECULIAR_486
+	      /* Did it round to a non-denormal ? */
+	      /* This behaviour might be regarded as peculiar, it appears
+		 that the 80486 rounds to the dest precision, then
+		 converts to decide underflow. */
+	      if ( !((tmp.sigl == 0x00800000) &&
+		  ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) )
+#endif /* PECULIAR_486 */
+		{
+		  EXCEPTION(EX_Underflow);
+		  /* This is a special case: see sec 16.2.5.1 of
+		     the 80486 book */
+		  if ( !(control_word & CW_Underflow) )
+		    return 0;
+		}
+	      EXCEPTION(precision_loss);
+	      if ( !(control_word & CW_Precision) )
+		return 0;
+	    }
+	  templ = tmp.sigl;
+      }
+      else
+	{
+	  if ( tmp.sigl | (tmp.sigh & 0x000000ff) )
+	    {
+	      unsigned long sigh = tmp.sigh;
+	      unsigned long sigl = tmp.sigl;
+	      
+	      precision_loss = 1;
+	      switch (control_word & CW_RC)
+		{
+		case RC_RND:
+		  increment = ((sigh & 0xff) > 0x80)       /* more than half */
+		    || (((sigh & 0xff) == 0x80) && sigl)   /* more than half */
+		    || ((sigh & 0x180) == 0x180);        /* round to even */
+		  break;
+		case RC_DOWN:   /* towards -infinity */
+		  increment = signpositive(&tmp)
+		    ? 0 : (sigl | (sigh & 0xff));
+		  break;
+		case RC_UP:     /* towards +infinity */
+		  increment = signpositive(&tmp)
+		    ? (sigl | (sigh & 0xff)) : 0;
+		  break;
+		case RC_CHOP:
+		  increment = 0;
+		  break;
+		}
+	  
+	      /* Truncate part of the mantissa */
+	      tmp.sigl = 0;
+	  
+	      if (increment)
+		{
+		  if ( sigh >= 0xffffff00 )
+		    {
+		      /* The sigh part overflows */
+		      tmp.sigh = 0x80000000;
+		      exp++;
+		      if ( exp >= EXP_OVER )
+			goto overflow;
+		    }
+		  else
+		    {
+		      tmp.sigh &= 0xffffff00;
+		      tmp.sigh += 0x100;
+		    }
+		}
+	      else
+		{
+		  tmp.sigh &= 0xffffff00;  /* Finish the truncation */
+		}
+	    }
+	  else
+	    precision_loss = 0;
+      
+	  templ = (tmp.sigh >> 8) & 0x007fffff;
+
+	  if ( exp > SINGLE_Emax )
+	    {
+	    overflow:
+	      EXCEPTION(EX_Overflow);
+	      if ( !(control_word & CW_Overflow) )
+		return 0;
+	      set_precision_flag_up();
+	      if ( !(control_word & CW_Precision) )
+		return 0;
+
+	      /* This is a special case: see sec 16.2.5.1 of the 80486 book. */
+	      /* Masked response is overflow to infinity. */
+	      templ = 0x7f800000;
+	    }
+	  else
+	    {
+	      if ( precision_loss )
+		{
+		  if ( increment )
+		    set_precision_flag_up();
+		  else
+		    set_precision_flag_down();
+		}
+	      /* Add the exponent */
+	      templ |= ((exp+SINGLE_Ebias) & 0xff) << 23;
+	    }
+	}
+    }
+  else if (st0_tag == TAG_Zero)
+    {
+      templ = 0;
+    }
+  else if ( st0_tag == TAG_Special )
+    {
+      st0_tag = FPU_Special(st0_ptr);
+      if (st0_tag == TW_Denormal)
+	{
+	  reg_copy(st0_ptr, &tmp);
+
+	  /* A denormal will always underflow. */
+#ifndef PECULIAR_486
+	  /* An 80486 is supposed to be able to generate
+	     a denormal exception here, but... */
+	  /* Underflow has priority. */
+	  if ( control_word & CW_Underflow )
+	    denormal_operand();
+#endif /* PECULIAR_486 */ 
+	  goto denormal_arg;
+	}
+      else if (st0_tag == TW_Infinity)
+	{
+	  templ = 0x7f800000;
+	}
+      else if (st0_tag == TW_NaN)
+	{
+	  /* Is it really a NaN ? */
+	  if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) )
+	    {
+	      /* See if we can get a valid NaN from the FPU_REG */
+	      templ = st0_ptr->sigh >> 8;
+	      if ( !(st0_ptr->sigh & 0x40000000) )
+		{
+		  /* It is a signalling NaN */
+		  EXCEPTION(EX_Invalid);
+		  if ( !(control_word & CW_Invalid) )
+		    return 0;
+		  templ |= (0x40000000 >> 8);
+		}
+	      templ |= 0x7f800000;
+	    }
+	  else
+	    {
+	      /* It is an unsupported data type */
+	      EXCEPTION(EX_Invalid);
+	      if ( !(control_word & CW_Invalid) )
+		return 0;
+	      templ = 0xffc00000;
+	    }
+	}
+#ifdef PARANOID
+      else
+	{
+	  EXCEPTION(EX_INTERNAL|0x164);
+	  return 0;
+	}
+#endif
+    }
+  else if ( st0_tag == TAG_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      if ( control_word & EX_Invalid )
+	{
+	  /* The masked response */
+	  /* Put out the QNaN indefinite */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_access_ok(VERIFY_WRITE,single,4);
+	  FPU_put_user(0xffc00000, (unsigned long __user *) single);
+	  RE_ENTRANT_CHECK_ON;
+	  return 1;
+	}
+      else
+	return 0;
+    }
+#ifdef PARANOID
+  else
+    {
+      EXCEPTION(EX_INTERNAL|0x163);
+      return 0;
+    }
+#endif
+  if ( getsign(st0_ptr) )
+    templ |= 0x80000000;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_WRITE,single,4);
+  FPU_put_user(templ,(unsigned long __user *) single);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a long long into user memory */
+int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d)
+{
+  FPU_REG t;
+  long long tll;
+  int precision_loss;
+
+  if ( st0_tag == TAG_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      goto invalid_operand;
+    }
+  else if ( st0_tag == TAG_Special )
+    {
+      st0_tag = FPU_Special(st0_ptr);
+      if ( (st0_tag == TW_Infinity) ||
+	   (st0_tag == TW_NaN) )
+	{
+	  EXCEPTION(EX_Invalid);
+	  goto invalid_operand;
+	}
+    }
+
+  reg_copy(st0_ptr, &t);
+  precision_loss = FPU_round_to_int(&t, st0_tag);
+  ((long *)&tll)[0] = t.sigl;
+  ((long *)&tll)[1] = t.sigh;
+  if ( (precision_loss == 1) ||
+      ((t.sigh & 0x80000000) &&
+       !((t.sigh == 0x80000000) && (t.sigl == 0) &&
+	 signnegative(&t))) )
+    {
+      EXCEPTION(EX_Invalid);
+      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+    invalid_operand:
+      if ( control_word & EX_Invalid )
+	{
+	  /* Produce something like QNaN "indefinite" */
+	  tll = 0x8000000000000000LL;
+	}
+      else
+	return 0;
+    }
+  else
+    {
+      if ( precision_loss )
+	set_precision_flag(precision_loss);
+      if ( signnegative(&t) )
+	tll = - tll;
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_WRITE,d,8);
+  copy_to_user(d, &tll, 8);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a long into user memory */
+int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d)
+{
+  FPU_REG t;
+  int precision_loss;
+
+  if ( st0_tag == TAG_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      goto invalid_operand;
+    }
+  else if ( st0_tag == TAG_Special )
+    {
+      st0_tag = FPU_Special(st0_ptr);
+      if ( (st0_tag == TW_Infinity) ||
+	   (st0_tag == TW_NaN) )
+	{
+	  EXCEPTION(EX_Invalid);
+	  goto invalid_operand;
+	}
+    }
+
+  reg_copy(st0_ptr, &t);
+  precision_loss = FPU_round_to_int(&t, st0_tag);
+  if (t.sigh ||
+      ((t.sigl & 0x80000000) &&
+       !((t.sigl == 0x80000000) && signnegative(&t))) )
+    {
+      EXCEPTION(EX_Invalid);
+      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+    invalid_operand:
+      if ( control_word & EX_Invalid )
+	{
+	  /* Produce something like QNaN "indefinite" */
+	  t.sigl = 0x80000000;
+	}
+      else
+	return 0;
+    }
+  else
+    {
+      if ( precision_loss )
+	set_precision_flag(precision_loss);
+      if ( signnegative(&t) )
+	t.sigl = -(long)t.sigl;
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_WRITE,d,4);
+  FPU_put_user(t.sigl, (unsigned long __user *) d);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a short into user memory */
+int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d)
+{
+  FPU_REG t;
+  int precision_loss;
+
+  if ( st0_tag == TAG_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      goto invalid_operand;
+    }
+  else if ( st0_tag == TAG_Special )
+    {
+      st0_tag = FPU_Special(st0_ptr);
+      if ( (st0_tag == TW_Infinity) ||
+	   (st0_tag == TW_NaN) )
+	{
+	  EXCEPTION(EX_Invalid);
+	  goto invalid_operand;
+	}
+    }
+
+  reg_copy(st0_ptr, &t);
+  precision_loss = FPU_round_to_int(&t, st0_tag);
+  if (t.sigh ||
+      ((t.sigl & 0xffff8000) &&
+       !((t.sigl == 0x8000) && signnegative(&t))) )
+    {
+      EXCEPTION(EX_Invalid);
+      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+    invalid_operand:
+      if ( control_word & EX_Invalid )
+	{
+	  /* Produce something like QNaN "indefinite" */
+	  t.sigl = 0x8000;
+	}
+      else
+	return 0;
+    }
+  else
+    {
+      if ( precision_loss )
+	set_precision_flag(precision_loss);
+      if ( signnegative(&t) )
+	t.sigl = -t.sigl;
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_WRITE,d,2);
+  FPU_put_user((short)t.sigl, d);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a packed bcd array into user memory */
+int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
+{
+  FPU_REG t;
+  unsigned long long ll;
+  u_char b;
+  int i, precision_loss;
+  u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
+
+  if ( st0_tag == TAG_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      goto invalid_operand;
+    }
+  else if ( st0_tag == TAG_Special )
+    {
+      st0_tag = FPU_Special(st0_ptr);
+      if ( (st0_tag == TW_Infinity) ||
+	   (st0_tag == TW_NaN) )
+	{
+	  EXCEPTION(EX_Invalid);
+	  goto invalid_operand;
+	}
+    }
+
+  reg_copy(st0_ptr, &t);
+  precision_loss = FPU_round_to_int(&t, st0_tag);
+  ll = significand(&t);
+
+  /* Check for overflow, by comparing with 999999999999999999 decimal. */
+  if ( (t.sigh > 0x0de0b6b3) ||
+      ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) )
+    {
+      EXCEPTION(EX_Invalid);
+      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+    invalid_operand:
+      if ( control_word & CW_Invalid )
+	{
+	  /* Produce the QNaN "indefinite" */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_access_ok(VERIFY_WRITE,d,10);
+	  for ( i = 0; i < 7; i++)
+	    FPU_put_user(0, d+i); /* These bytes "undefined" */
+	  FPU_put_user(0xc0, d+7); /* This byte "undefined" */
+	  FPU_put_user(0xff, d+8);
+	  FPU_put_user(0xff, d+9);
+	  RE_ENTRANT_CHECK_ON;
+	  return 1;
+	}
+      else
+	return 0;
+    }
+  else if ( precision_loss )
+    {
+      /* Precision loss doesn't stop the data transfer */
+      set_precision_flag(precision_loss);
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_WRITE,d,10);
+  RE_ENTRANT_CHECK_ON;
+  for ( i = 0; i < 9; i++)
+    {
+      b = FPU_div_small(&ll, 10);
+      b |= (FPU_div_small(&ll, 10)) << 4;
+      RE_ENTRANT_CHECK_OFF;
+      FPU_put_user(b, d+i);
+      RE_ENTRANT_CHECK_ON;
+    }
+  RE_ENTRANT_CHECK_OFF;
+  FPU_put_user(sign, d+9);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+/*===========================================================================*/
+
+/* r gets mangled such that sig is int, sign: 
+   it is NOT normalized */
+/* The return value (in eax) is zero if the result is exact,
+   if bits are changed due to rounding, truncation, etc, then
+   a non-zero value is returned */
+/* Overflow is signalled by a non-zero return value (in eax).
+   In the case of overflow, the returned significand always has the
+   largest possible value */
+int FPU_round_to_int(FPU_REG *r, u_char tag)
+{
+  u_char     very_big;
+  unsigned eax;
+
+  if (tag == TAG_Zero)
+    {
+      /* Make sure that zero is returned */
+      significand(r) = 0;
+      return 0;        /* o.k. */
+    }
+
+  if (exponent(r) > 63)
+    {
+      r->sigl = r->sigh = ~0;      /* The largest representable number */
+      return 1;        /* overflow */
+    }
+
+  eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
+  very_big = !(~(r->sigh) | ~(r->sigl));  /* test for 0xfff...fff */
+#define	half_or_more	(eax & 0x80000000)
+#define	frac_part	(eax)
+#define more_than_half  ((eax & 0x80000001) == 0x80000001)
+  switch (control_word & CW_RC)
+    {
+    case RC_RND:
+      if ( more_than_half               	/* nearest */
+	  || (half_or_more && (r->sigl & 1)) )	/* odd -> even */
+	{
+	  if ( very_big ) return 1;        /* overflow */
+	  significand(r) ++;
+	  return PRECISION_LOST_UP;
+	}
+      break;
+    case RC_DOWN:
+      if (frac_part && getsign(r))
+	{
+	  if ( very_big ) return 1;        /* overflow */
+	  significand(r) ++;
+	  return PRECISION_LOST_UP;
+	}
+      break;
+    case RC_UP:
+      if (frac_part && !getsign(r))
+	{
+	  if ( very_big ) return 1;        /* overflow */
+	  significand(r) ++;
+	  return PRECISION_LOST_UP;
+	}
+      break;
+    case RC_CHOP:
+      break;
+    }
+
+  return eax ? PRECISION_LOST_DOWN : 0;
+
+}
+
+/*===========================================================================*/
+
+u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s)
+{
+  unsigned short tag_word = 0;
+  u_char tag;
+  int i;
+
+  if ( (addr_modes.default_mode == VM86) ||
+      ((addr_modes.default_mode == PM16)
+      ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_READ, s, 0x0e);
+      FPU_get_user(control_word, (unsigned short __user *) s);
+      FPU_get_user(partial_status, (unsigned short __user *) (s+2));
+      FPU_get_user(tag_word, (unsigned short __user *) (s+4));
+      FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6));
+      FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8));
+      FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a));
+      FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c));
+      RE_ENTRANT_CHECK_ON;
+      s += 0x0e;
+      if ( addr_modes.default_mode == VM86 )
+	{
+	  instruction_address.offset
+	    += (instruction_address.selector & 0xf000) << 4;
+	  operand_address.offset += (operand_address.selector & 0xf000) << 4;
+	}
+    }
+  else
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_READ, s, 0x1c);
+      FPU_get_user(control_word, (unsigned short __user *) s);
+      FPU_get_user(partial_status, (unsigned short __user *) (s+4));
+      FPU_get_user(tag_word, (unsigned short __user *) (s+8));
+      FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c));
+      FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10));
+      FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12));
+      FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14));
+      FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18));
+      RE_ENTRANT_CHECK_ON;
+      s += 0x1c;
+    }
+
+#ifdef PECULIAR_486
+  control_word &= ~0xe080;
+#endif /* PECULIAR_486 */ 
+
+  top = (partial_status >> SW_Top_Shift) & 7;
+
+  if ( partial_status & ~control_word & CW_Exceptions )
+    partial_status |= (SW_Summary | SW_Backward);
+  else
+    partial_status &= ~(SW_Summary | SW_Backward);
+
+  for ( i = 0; i < 8; i++ )
+    {
+      tag = tag_word & 3;
+      tag_word >>= 2;
+
+      if ( tag == TAG_Empty )
+	/* New tag is empty.  Accept it */
+	FPU_settag(i, TAG_Empty);
+      else if ( FPU_gettag(i) == TAG_Empty )
+	{
+	  /* Old tag is empty and new tag is not empty.  New tag is determined
+	     by old reg contents */
+	  if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias )
+	    {
+	      if ( !(fpu_register(i).sigl | fpu_register(i).sigh) )
+		FPU_settag(i, TAG_Zero);
+	      else
+		FPU_settag(i, TAG_Special);
+	    }
+	  else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias )
+	    {
+	      FPU_settag(i, TAG_Special);
+	    }
+	  else if ( fpu_register(i).sigh & 0x80000000 )
+	    FPU_settag(i, TAG_Valid);
+	  else
+	    FPU_settag(i, TAG_Special);   /* An Un-normal */
+  	}
+      /* Else old tag is not empty and new tag is not empty.  Old tag
+	 remains correct */
+    }
+
+  return s;
+}
+
+
+void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
+{
+  int i, regnr;
+  u_char __user *s = fldenv(addr_modes, data_address);
+  int offset = (top & 7) * 10, other = 80 - offset;
+
+  /* Copy all registers in stack order. */
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_READ,s,80);
+  __copy_from_user(register_base+offset, s, other);
+  if ( offset )
+    __copy_from_user(register_base, s+other, offset);
+  RE_ENTRANT_CHECK_ON;
+
+  for ( i = 0; i < 8; i++ )
+    {
+      regnr = (i+top) & 7;
+      if ( FPU_gettag(regnr) != TAG_Empty )
+	/* The loaded data over-rides all other cases. */
+	FPU_settag(regnr, FPU_tagof(&st(i)));
+    }
+
+}
+
+
+u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
+{
+  if ( (addr_modes.default_mode == VM86) ||
+      ((addr_modes.default_mode == PM16)
+      ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_WRITE,d,14);
+#ifdef PECULIAR_486
+      FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d);
+#else
+      FPU_put_user(control_word, (unsigned short __user *) d);
+#endif /* PECULIAR_486 */
+      FPU_put_user(status_word(), (unsigned short __user *) (d+2));
+      FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4));
+      FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6));
+      FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a));
+      if ( addr_modes.default_mode == VM86 )
+	{
+	  FPU_put_user((instruction_address.offset & 0xf0000) >> 4,
+		      (unsigned short __user *) (d+8));
+	  FPU_put_user((operand_address.offset & 0xf0000) >> 4,
+		      (unsigned short __user *) (d+0x0c));
+	}
+      else
+	{
+	  FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8));
+	  FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c));
+	}
+      RE_ENTRANT_CHECK_ON;
+      d += 0x0e;
+    }
+  else
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_access_ok(VERIFY_WRITE, d, 7*4);
+#ifdef PECULIAR_486
+      control_word &= ~0xe080;
+      /* An 80486 sets nearly all of the reserved bits to 1. */
+      control_word |= 0xffff0040;
+      partial_status = status_word() | 0xffff0000;
+      fpu_tag_word |= 0xffff0000;
+      I387.soft.fcs &= ~0xf8000000;
+      I387.soft.fos |= 0xffff0000;
+#endif /* PECULIAR_486 */
+      __copy_to_user(d, &control_word, 7*4);
+      RE_ENTRANT_CHECK_ON;
+      d += 0x1c;
+    }
+  
+  control_word |= CW_Exceptions;
+  partial_status &= ~(SW_Summary | SW_Backward);
+
+  return d;
+}
+
+
+void fsave(fpu_addr_modes addr_modes, u_char __user *data_address)
+{
+  u_char __user *d;
+  int offset = (top & 7) * 10, other = 80 - offset;
+
+  d = fstenv(addr_modes, data_address);
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_access_ok(VERIFY_WRITE,d,80);
+
+  /* Copy all registers in stack order. */
+  __copy_to_user(d, register_base+offset, other);
+  if ( offset )
+    __copy_to_user(d+other, register_base, offset);
+  RE_ENTRANT_CHECK_ON;
+
+  finit();
+}
+
+/*===========================================================================*/
diff --git a/arch/i386/math-emu/reg_mul.c b/arch/i386/math-emu/reg_mul.c
new file mode 100644
index 00000000000..40f50b61bc6
--- /dev/null
+++ b/arch/i386/math-emu/reg_mul.c
@@ -0,0 +1,132 @@
+/*---------------------------------------------------------------------------+
+ |  reg_mul.c                                                                |
+ |                                                                           |
+ | Multiply one FPU_REG by another, put the result in a destination FPU_REG. |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1997                                              |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ | Returns the tag of the result if no exceptions or errors occurred.        |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | The destination may be any FPU_REG, including one of the source FPU_REGs. |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_emu.h"
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_system.h"
+
+
+/*
+  Multiply two registers to give a register result.
+  The sources are st(deststnr) and (b,tagb,signb).
+  The destination is st(deststnr).
+  */
+/* This routine must be called with non-empty source registers */
+int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
+{
+  FPU_REG *a = &st(deststnr);
+  FPU_REG *dest = a;
+  u_char taga = FPU_gettagi(deststnr);
+  u_char saved_sign = getsign(dest);
+  u_char sign = (getsign(a) ^ getsign(b));
+  int tag;
+
+
+  if ( !(taga | tagb) )
+    {
+      /* Both regs Valid, this should be the most common case. */
+
+      tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b));
+      if ( tag < 0 )
+	{
+	  setsign(dest, saved_sign);
+	  return tag;
+	}
+      FPU_settagi(deststnr, tag);
+      return tag;
+    }
+
+  if ( taga == TAG_Special )
+    taga = FPU_Special(a);
+  if ( tagb == TAG_Special )
+    tagb = FPU_Special(b);
+
+  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
+    {
+      FPU_REG x, y;
+      if ( denormal_operand() < 0 )
+	return FPU_Exception;
+
+      FPU_to_exp16(a, &x);
+      FPU_to_exp16(b, &y);
+      tag = FPU_u_mul(&x, &y, dest, control_w, sign,
+		      exponent16(&x) + exponent16(&y));
+      if ( tag < 0 )
+	{
+	  setsign(dest, saved_sign);
+	  return tag;
+	}
+      FPU_settagi(deststnr, tag);
+      return tag;
+    }
+  else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
+    {
+      if ( ((tagb == TW_Denormal) || (taga == TW_Denormal))
+	   && (denormal_operand() < 0) )
+	return FPU_Exception;
+
+      /* Must have either both arguments == zero, or
+	 one valid and the other zero.
+	 The result is therefore zero. */
+      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+      /* The 80486 book says that the answer is +0, but a real
+	 80486 behaves this way.
+	 IEEE-754 apparently says it should be this way. */
+      setsign(dest, sign);
+      return TAG_Zero;
+    }
+      /* Must have infinities, NaNs, etc */
+  else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
+    {
+      return real_2op_NaN(b, tagb, deststnr, &st(0));
+    }
+  else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero))
+	    || ((tagb == TW_Infinity) && (taga == TAG_Zero)) )
+    {
+      return arith_invalid(deststnr);  /* Zero*Infinity is invalid */
+    }
+  else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
+	    && (denormal_operand() < 0) )
+    {
+      return FPU_Exception;
+    }
+  else if (taga == TW_Infinity)
+    {
+      FPU_copy_to_regi(a, TAG_Special, deststnr);
+      setsign(dest, sign);
+      return TAG_Special;
+    }
+  else if (tagb == TW_Infinity)
+    {
+      FPU_copy_to_regi(b, TAG_Special, deststnr);
+      setsign(dest, sign);
+      return TAG_Special;
+    }
+
+#ifdef PARANOID
+  else
+    {
+      EXCEPTION(EX_INTERNAL|0x102);
+      return FPU_Exception;
+    }
+#endif /* PARANOID */ 
+
+	return 0;
+}
diff --git a/arch/i386/math-emu/reg_norm.S b/arch/i386/math-emu/reg_norm.S
new file mode 100644
index 00000000000..8b6352efcee
--- /dev/null
+++ b/arch/i386/math-emu/reg_norm.S
@@ -0,0 +1,147 @@
+/*---------------------------------------------------------------------------+
+ |  reg_norm.S                                                               |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1995,1997                                    |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@suburbia.net               |
+ |                                                                           |
+ | Normalize the value in a FPU_REG.                                         |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |    int FPU_normalize(FPU_REG *n)                                          |
+ |                                                                           |
+ |    int FPU_normalize_nuo(FPU_REG *n)                                      |
+ |                                                                           |
+ |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
+ |    one was raised, or -1 on internal error.                               |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_emu.h"
+
+
+.text
+ENTRY(FPU_normalize)
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%ebx
+
+	movl	PARAM1,%ebx
+
+	movl	SIGH(%ebx),%edx
+	movl	SIGL(%ebx),%eax
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_done		/* Already normalized */
+	jnz	L_shift_1	/* Shift left 1 - 31 bits */
+
+	orl	%eax,%eax
+	jz	L_zero		/* The contents are zero */
+
+	movl	%eax,%edx
+	xorl	%eax,%eax
+	subw	$32,EXP(%ebx)	/* This can cause an underflow */
+
+/* We need to shift left by 1 - 31 bits */
+L_shift_1:
+	bsrl	%edx,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	shld	%cl,%eax,%edx
+	shl	%cl,%eax
+	subw	%cx,EXP(%ebx)	/* This can cause an underflow */
+
+	movl	%edx,SIGH(%ebx)
+	movl	%eax,SIGL(%ebx)
+
+L_done:
+	cmpw	EXP_OVER,EXP(%ebx)
+	jge	L_overflow
+
+	cmpw	EXP_UNDER,EXP(%ebx)
+	jle	L_underflow
+
+L_exit_valid:
+	movl	TAG_Valid,%eax
+
+	/* Convert the exponent to 80x87 form. */
+	addw	EXTENDED_Ebias,EXP(%ebx)
+	andw	$0x7fff,EXP(%ebx)
+
+L_exit:
+	popl	%ebx
+	leave
+	ret
+
+
+L_zero:
+	movw	$0,EXP(%ebx)
+	movl	TAG_Zero,%eax
+	jmp	L_exit
+
+L_underflow:
+	/* Convert the exponent to 80x87 form. */
+	addw	EXTENDED_Ebias,EXP(%ebx)
+	push	%ebx
+	call	arith_underflow
+	pop	%ebx
+	jmp	L_exit
+
+L_overflow:
+	/* Convert the exponent to 80x87 form. */
+	addw	EXTENDED_Ebias,EXP(%ebx)
+	push	%ebx
+	call	arith_overflow
+	pop	%ebx
+	jmp	L_exit
+
+
+
+/* Normalise without reporting underflow or overflow */
+ENTRY(FPU_normalize_nuo)
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%ebx
+
+	movl	PARAM1,%ebx
+
+	movl	SIGH(%ebx),%edx
+	movl	SIGL(%ebx),%eax
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_exit_nuo_valid	/* Already normalized */
+	jnz	L_nuo_shift_1	/* Shift left 1 - 31 bits */
+
+	orl	%eax,%eax
+	jz	L_exit_nuo_zero		/* The contents are zero */
+
+	movl	%eax,%edx
+	xorl	%eax,%eax
+	subw	$32,EXP(%ebx)	/* This can cause an underflow */
+
+/* We need to shift left by 1 - 31 bits */
+L_nuo_shift_1:
+	bsrl	%edx,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	shld	%cl,%eax,%edx
+	shl	%cl,%eax
+	subw	%cx,EXP(%ebx)	/* This can cause an underflow */
+
+	movl	%edx,SIGH(%ebx)
+	movl	%eax,SIGL(%ebx)
+
+L_exit_nuo_valid:
+	movl	TAG_Valid,%eax
+
+	popl	%ebx
+	leave
+	ret
+
+L_exit_nuo_zero:
+	movl	TAG_Zero,%eax
+	movw	EXP_UNDER,EXP(%ebx)
+
+	popl	%ebx
+	leave
+	ret
diff --git a/arch/i386/math-emu/reg_round.S b/arch/i386/math-emu/reg_round.S
new file mode 100644
index 00000000000..d1d4e48b4f6
--- /dev/null
+++ b/arch/i386/math-emu/reg_round.S
@@ -0,0 +1,708 @@
+	.file "reg_round.S"
+/*---------------------------------------------------------------------------+
+ |  reg_round.S                                                              |
+ |                                                                           |
+ | Rounding/truncation/etc for FPU basic arithmetic functions.               |
+ |                                                                           |
+ | Copyright (C) 1993,1995,1997                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@suburbia.net               |
+ |                                                                           |
+ | This code has four possible entry points.                                 |
+ | The following must be entered by a jmp instruction:                       |
+ |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
+ |                                                                           |
+ | The FPU_round entry point is intended to be used by C code.               |
+ | From C, call as:                                                          |
+ |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
+ |                                                                           |
+ |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
+ |    one was raised, or -1 on internal error.                               |
+ |                                                                           |
+ | For correct "up" and "down" rounding, the argument must have the correct  |
+ | sign.                                                                     |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Four entry points.                                                        |
+ |                                                                           |
+ | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
+ |  %eax:%ebx  64 bit significand                                            |
+ |  %edx       32 bit extension of the significand                           |
+ |  %edi       pointer to an FPU_REG for the result to be stored             |
+ |  stack      calling function must have set up a C stack frame and         |
+ |             pushed %esi, %edi, and %ebx                                   |
+ |                                                                           |
+ | Needed just for the fpu_reg_round_sqrt entry point:                       |
+ |  %cx  A control word in the same format as the FPU control word.          |
+ | Otherwise, PARAM4 must give such a value.                                 |
+ |                                                                           |
+ |                                                                           |
+ | The significand and its extension are assumed to be exact in the          |
+ | following sense:                                                          |
+ |   If the significand by itself is the exact result then the significand   |
+ |   extension (%edx) must contain 0, otherwise the significand extension    |
+ |   must be non-zero.                                                       |
+ |   If the significand extension is non-zero then the significand is        |
+ |   smaller than the magnitude of the correct exact result by an amount     |
+ |   greater than zero and less than one ls bit of the significand.          |
+ |   The significand extension is only required to have three possible       |
+ |   non-zero values:                                                        |
+ |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
+ |                                 bit smaller than the magnitude of the     |
+ |                                 true exact result.                        |
+ |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
+ |                                 smaller than the magnitude of the true    |
+ |                                 exact result.                             |
+ |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
+ |                                 bit smaller than the magnitude of the     |
+ |                                 true exact result.                        |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |  The code in this module has become quite complex, but it should handle   |
+ |  all of the FPU flags which are set at this stage of the basic arithmetic |
+ |  computations.                                                            |
+ |  There are a few rare cases where the results are not set identically to  |
+ |  a real FPU. These require a bit more thought because at this stage the   |
+ |  results of the code here appear to be more consistent...                 |
+ |  This may be changed in a future version.                                 |
+ +---------------------------------------------------------------------------*/
+
+
+#include "fpu_emu.h"
+#include "exception.h"
+#include "control_w.h"
+
+/* Flags for FPU_bits_lost */
+#define	LOST_DOWN	$1
+#define	LOST_UP		$2
+
+/* Flags for FPU_denormal */
+#define	DENORMAL	$1
+#define	UNMASKED_UNDERFLOW $2
+
+
+#ifndef NON_REENTRANT_FPU
+/*	Make the code re-entrant by putting
+	local storage on the stack: */
+#define FPU_bits_lost	(%esp)
+#define FPU_denormal	1(%esp)
+
+#else
+/*	Not re-entrant, so we can gain speed by putting
+	local storage in a static area: */
+.data
+	.align 4,0
+FPU_bits_lost:
+	.byte	0
+FPU_denormal:
+	.byte	0
+#endif /* NON_REENTRANT_FPU */
+
+
+.text
+.globl fpu_reg_round
+.globl fpu_Arith_exit
+
+/* Entry point when called from C */
+ENTRY(FPU_round)
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%edi
+	movl	SIGH(%edi),%eax
+	movl	SIGL(%edi),%ebx
+	movl	PARAM2,%edx
+
+fpu_reg_round:			/* Normal entry point */
+	movl	PARAM4,%ecx
+
+#ifndef NON_REENTRANT_FPU
+	pushl	%ebx		/* adjust the stack pointer */
+#endif /* NON_REENTRANT_FPU */ 
+
+#ifdef PARANOID
+/* Cannot use this here yet */
+/*	orl	%eax,%eax */
+/*	jns	L_entry_bugged */
+#endif /* PARANOID */
+
+	cmpw	EXP_UNDER,EXP(%edi)
+	jle	L_Make_denorm			/* The number is a de-normal */
+
+	movb	$0,FPU_denormal			/* 0 -> not a de-normal */
+
+Denorm_done:
+	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */
+
+	movl	%ecx,%esi
+	andl	CW_PC,%ecx
+	cmpl	PR_64_BITS,%ecx
+	je	LRound_To_64
+
+	cmpl	PR_53_BITS,%ecx
+	je	LRound_To_53
+
+	cmpl	PR_24_BITS,%ecx
+	je	LRound_To_24
+
+#ifdef PECULIAR_486
+/* With the precision control bits set to 01 "(reserved)", a real 80486
+   behaves as if the precision control bits were set to 11 "64 bits" */
+	cmpl	PR_RESERVED_BITS,%ecx
+	je	LRound_To_64
+#ifdef PARANOID
+	jmp	L_bugged_denorm_486
+#endif /* PARANOID */ 
+#else
+#ifdef PARANOID
+	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
+#endif /* PARANOID */ 
+#endif /* PECULIAR_486 */
+
+
+/* Round etc to 24 bit precision */
+LRound_To_24:
+	movl	%esi,%ecx
+	andl	CW_RC,%ecx
+	cmpl	RC_RND,%ecx
+	je	LRound_nearest_24
+
+	cmpl	RC_CHOP,%ecx
+	je	LCheck_truncate_24
+
+	cmpl	RC_UP,%ecx		/* Towards +infinity */
+	je	LUp_24
+
+	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
+	je	LDown_24
+
+#ifdef PARANOID
+	jmp	L_bugged_round24
+#endif /* PARANOID */ 
+
+LUp_24:
+	cmpb	SIGN_POS,PARAM5
+	jne	LCheck_truncate_24	/* If negative then  up==truncate */
+
+	jmp	LCheck_24_round_up
+
+LDown_24:
+	cmpb	SIGN_POS,PARAM5
+	je	LCheck_truncate_24	/* If positive then  down==truncate */
+
+LCheck_24_round_up:
+	movl	%eax,%ecx
+	andl	$0x000000ff,%ecx
+	orl	%ebx,%ecx
+	orl	%edx,%ecx
+	jnz	LDo_24_round_up
+	jmp	L_Re_normalise
+
+LRound_nearest_24:
+	/* Do rounding of the 24th bit if needed (nearest or even) */
+	movl	%eax,%ecx
+	andl	$0x000000ff,%ecx
+	cmpl	$0x00000080,%ecx
+	jc	LCheck_truncate_24	/* less than half, no increment needed */
+
+	jne	LGreater_Half_24	/* greater than half, increment needed */
+
+	/* Possibly half, we need to check the ls bits */
+	orl	%ebx,%ebx
+	jnz	LGreater_Half_24	/* greater than half, increment needed */
+
+	orl	%edx,%edx
+	jnz	LGreater_Half_24	/* greater than half, increment needed */
+
+	/* Exactly half, increment only if 24th bit is 1 (round to even) */
+	testl	$0x00000100,%eax
+	jz	LDo_truncate_24
+
+LGreater_Half_24:			/* Rounding: increment at the 24th bit */
+LDo_24_round_up:
+	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
+	xorl	%ebx,%ebx
+	movb	LOST_UP,FPU_bits_lost
+	addl	$0x00000100,%eax
+	jmp	LCheck_Round_Overflow
+
+LCheck_truncate_24:
+	movl	%eax,%ecx
+	andl	$0x000000ff,%ecx
+	orl	%ebx,%ecx
+	orl	%edx,%ecx
+	jz	L_Re_normalise		/* No truncation needed */
+
+LDo_truncate_24:
+	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
+	xorl	%ebx,%ebx
+	movb	LOST_DOWN,FPU_bits_lost
+	jmp	L_Re_normalise
+
+
+/* Round etc to 53 bit precision */
+LRound_To_53:
+	movl	%esi,%ecx
+	andl	CW_RC,%ecx
+	cmpl	RC_RND,%ecx
+	je	LRound_nearest_53
+
+	cmpl	RC_CHOP,%ecx
+	je	LCheck_truncate_53
+
+	cmpl	RC_UP,%ecx		/* Towards +infinity */
+	je	LUp_53
+
+	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
+	je	LDown_53
+
+#ifdef PARANOID
+	jmp	L_bugged_round53
+#endif /* PARANOID */ 
+
+LUp_53:
+	cmpb	SIGN_POS,PARAM5
+	jne	LCheck_truncate_53	/* If negative then  up==truncate */
+
+	jmp	LCheck_53_round_up
+
+LDown_53:
+	cmpb	SIGN_POS,PARAM5
+	je	LCheck_truncate_53	/* If positive then  down==truncate */
+
+LCheck_53_round_up:
+	movl	%ebx,%ecx
+	andl	$0x000007ff,%ecx
+	orl	%edx,%ecx
+	jnz	LDo_53_round_up
+	jmp	L_Re_normalise
+
+LRound_nearest_53:
+	/* Do rounding of the 53rd bit if needed (nearest or even) */
+	movl	%ebx,%ecx
+	andl	$0x000007ff,%ecx
+	cmpl	$0x00000400,%ecx
+	jc	LCheck_truncate_53	/* less than half, no increment needed */
+
+	jnz	LGreater_Half_53	/* greater than half, increment needed */
+
+	/* Possibly half, we need to check the ls bits */
+	orl	%edx,%edx
+	jnz	LGreater_Half_53	/* greater than half, increment needed */
+
+	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
+	testl	$0x00000800,%ebx
+	jz	LTruncate_53
+
+LGreater_Half_53:			/* Rounding: increment at the 53rd bit */
+LDo_53_round_up:
+	movb	LOST_UP,FPU_bits_lost
+	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
+	addl	$0x00000800,%ebx
+	adcl	$0,%eax
+	jmp	LCheck_Round_Overflow
+
+LCheck_truncate_53:
+	movl	%ebx,%ecx
+	andl	$0x000007ff,%ecx
+	orl	%edx,%ecx
+	jz	L_Re_normalise
+
+LTruncate_53:
+	movb	LOST_DOWN,FPU_bits_lost
+	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
+	jmp	L_Re_normalise
+
+
+/* Round etc to 64 bit precision */
+LRound_To_64:
+	movl	%esi,%ecx
+	andl	CW_RC,%ecx
+	cmpl	RC_RND,%ecx
+	je	LRound_nearest_64
+
+	cmpl	RC_CHOP,%ecx
+	je	LCheck_truncate_64
+
+	cmpl	RC_UP,%ecx		/* Towards +infinity */
+	je	LUp_64
+
+	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
+	je	LDown_64
+
+#ifdef PARANOID
+	jmp	L_bugged_round64
+#endif /* PARANOID */ 
+
+LUp_64:
+	cmpb	SIGN_POS,PARAM5
+	jne	LCheck_truncate_64	/* If negative then  up==truncate */
+
+	orl	%edx,%edx
+	jnz	LDo_64_round_up
+	jmp	L_Re_normalise
+
+LDown_64:
+	cmpb	SIGN_POS,PARAM5
+	je	LCheck_truncate_64	/* If positive then  down==truncate */
+
+	orl	%edx,%edx
+	jnz	LDo_64_round_up
+	jmp	L_Re_normalise
+
+LRound_nearest_64:
+	cmpl	$0x80000000,%edx
+	jc	LCheck_truncate_64
+
+	jne	LDo_64_round_up
+
+	/* Now test for round-to-even */
+	testb	$1,%bl
+	jz	LCheck_truncate_64
+
+LDo_64_round_up:
+	movb	LOST_UP,FPU_bits_lost
+	addl	$1,%ebx
+	adcl	$0,%eax
+
+LCheck_Round_Overflow:
+	jnc	L_Re_normalise
+
+	/* Overflow, adjust the result (significand to 1.0) */
+	rcrl	$1,%eax
+	rcrl	$1,%ebx
+	incw	EXP(%edi)
+	jmp	L_Re_normalise
+
+LCheck_truncate_64:
+	orl	%edx,%edx
+	jz	L_Re_normalise
+
+LTruncate_64:
+	movb	LOST_DOWN,FPU_bits_lost
+
+L_Re_normalise:
+	testb	$0xff,FPU_denormal
+	jnz	Normalise_result
+
+L_Normalised:
+	movl	TAG_Valid,%edx
+
+L_deNormalised:
+	cmpb	LOST_UP,FPU_bits_lost
+	je	L_precision_lost_up
+
+	cmpb	LOST_DOWN,FPU_bits_lost
+	je	L_precision_lost_down
+
+L_no_precision_loss:
+	/* store the result */
+
+L_Store_significand:
+	movl	%eax,SIGH(%edi)
+	movl	%ebx,SIGL(%edi)
+
+	cmpw	EXP_OVER,EXP(%edi)
+	jge	L_overflow
+
+	movl	%edx,%eax
+
+	/* Convert the exponent to 80x87 form. */
+	addw	EXTENDED_Ebias,EXP(%edi)
+	andw	$0x7fff,EXP(%edi)
+
+fpu_reg_round_signed_special_exit:
+
+	cmpb	SIGN_POS,PARAM5
+	je	fpu_reg_round_special_exit
+
+	orw	$0x8000,EXP(%edi)	/* Negative sign for the result. */
+
+fpu_reg_round_special_exit:
+
+#ifndef NON_REENTRANT_FPU
+	popl	%ebx		/* adjust the stack pointer */
+#endif /* NON_REENTRANT_FPU */ 
+
+fpu_Arith_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
+
+
+/*
+ * Set the FPU status flags to represent precision loss due to
+ * round-up.
+ */
+L_precision_lost_up:
+	push	%edx
+	push	%eax
+	call	set_precision_flag_up
+	popl	%eax
+	popl	%edx
+	jmp	L_no_precision_loss
+
+/*
+ * Set the FPU status flags to represent precision loss due to
+ * truncation.
+ */
+L_precision_lost_down:
+	push	%edx
+	push	%eax
+	call	set_precision_flag_down
+	popl	%eax
+	popl	%edx
+	jmp	L_no_precision_loss
+
+
+/*
+ * The number is a denormal (which might get rounded up to a normal)
+ * Shift the number right the required number of bits, which will
+ * have to be undone later...
+ */
+L_Make_denorm:
+	/* The action to be taken depends upon whether the underflow
+	   exception is masked */
+	testb	CW_Underflow,%cl		/* Underflow mask. */
+	jz	Unmasked_underflow		/* Do not make a denormal. */
+
+	movb	DENORMAL,FPU_denormal
+
+	pushl	%ecx		/* Save */
+	movw	EXP_UNDER+1,%cx
+	subw	EXP(%edi),%cx
+
+	cmpw	$64,%cx	/* shrd only works for 0..31 bits */
+	jnc	Denorm_shift_more_than_63
+
+	cmpw	$32,%cx	/* shrd only works for 0..31 bits */
+	jnc	Denorm_shift_more_than_32
+
+/*
+ * We got here without jumps by assuming that the most common requirement
+ *   is for a small de-normalising shift.
+ * Shift by [1..31] bits
+ */
+	addw	%cx,EXP(%edi)
+	orl	%edx,%edx	/* extension */
+	setne	%ch		/* Save whether %edx is non-zero */
+	xorl	%edx,%edx
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	orb	%ch,%dl
+	popl	%ecx
+	jmp	Denorm_done
+
+/* Shift by [32..63] bits */
+Denorm_shift_more_than_32:
+	addw	%cx,EXP(%edi)
+	subb	$32,%cl
+	orl	%edx,%edx
+	setne	%ch
+	orb	%ch,%bl
+	xorl	%edx,%edx
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	orl	%edx,%edx		/* test these 32 bits */
+	setne	%cl
+	orb	%ch,%bl
+	orb	%cl,%bl
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	popl	%ecx
+	jmp	Denorm_done
+
+/* Shift by [64..) bits */
+Denorm_shift_more_than_63:
+	cmpw	$64,%cx
+	jne	Denorm_shift_more_than_64
+
+/* Exactly 64 bit shift */
+	addw	%cx,EXP(%edi)
+	xorl	%ecx,%ecx
+	orl	%edx,%edx
+	setne	%cl
+	orl	%ebx,%ebx
+	setne	%ch
+	orb	%ch,%cl
+	orb	%cl,%al
+	movl	%eax,%edx
+	xorl	%eax,%eax
+	xorl	%ebx,%ebx
+	popl	%ecx
+	jmp	Denorm_done
+
+Denorm_shift_more_than_64:
+	movw	EXP_UNDER+1,EXP(%edi)
+/* This is easy, %eax must be non-zero, so.. */
+	movl	$1,%edx
+	xorl	%eax,%eax
+	xorl	%ebx,%ebx
+	popl	%ecx
+	jmp	Denorm_done
+
+
+Unmasked_underflow:
+	movb	UNMASKED_UNDERFLOW,FPU_denormal
+	jmp	Denorm_done
+
+
+/* Undo the de-normalisation. */
+Normalise_result:
+	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
+	je	Signal_underflow
+
+/* The number must be a denormal if we got here. */
+#ifdef PARANOID
+	/* But check it... just in case. */
+	cmpw	EXP_UNDER+1,EXP(%edi)
+	jne	L_norm_bugged
+#endif /* PARANOID */
+
+#ifdef PECULIAR_486
+	/*
+	 * This implements a special feature of 80486 behaviour.
+	 * Underflow will be signalled even if the number is
+	 * not a denormal after rounding.
+	 * This difference occurs only for masked underflow, and not
+	 * in the unmasked case.
+	 * Actual 80486 behaviour differs from this in some circumstances.
+	 */
+	orl	%eax,%eax		/* ms bits */
+	js	LPseudoDenormal		/* Will be masked underflow */
+#else
+	orl	%eax,%eax		/* ms bits */
+	js	L_Normalised		/* No longer a denormal */
+#endif /* PECULIAR_486 */ 
+
+	jnz	LDenormal_adj_exponent
+
+	orl	%ebx,%ebx
+	jz	L_underflow_to_zero	/* The contents are zero */
+
+LDenormal_adj_exponent:
+	decw	EXP(%edi)
+
+LPseudoDenormal:
+	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
+	movl	TAG_Special,%edx
+	jz	L_deNormalised
+
+	/* There must be a masked underflow */
+	push	%eax
+	pushl	EX_Underflow
+	call	EXCEPTION
+	popl	%eax
+	popl	%eax
+	movl	TAG_Special,%edx
+	jmp	L_deNormalised
+
+
+/*
+ * The operations resulted in a number too small to represent.
+ * Masked response.
+ */
+L_underflow_to_zero:
+	push	%eax
+	call	set_precision_flag_down
+	popl	%eax
+
+	push	%eax
+	pushl	EX_Underflow
+	call	EXCEPTION
+	popl	%eax
+	popl	%eax
+
+/* Reduce the exponent to EXP_UNDER */
+	movw	EXP_UNDER,EXP(%edi)
+	movl	TAG_Zero,%edx
+	jmp	L_Store_significand
+
+
+/* The operations resulted in a number too large to represent. */
+L_overflow:
+	addw	EXTENDED_Ebias,EXP(%edi)	/* Set for unmasked response. */
+	push	%edi
+	call	arith_overflow
+	pop	%edi
+	jmp	fpu_reg_round_signed_special_exit
+
+
+Signal_underflow:
+	/* The number may have been changed to a non-denormal */
+	/* by the rounding operations. */
+	cmpw	EXP_UNDER,EXP(%edi)
+	jle	Do_unmasked_underflow
+
+	jmp	L_Normalised
+
+Do_unmasked_underflow:
+	/* Increase the exponent by the magic number */
+	addw	$(3*(1<<13)),EXP(%edi)
+	push	%eax
+	pushl	EX_Underflow
+	call	EXCEPTION
+	popl	%eax
+	popl	%eax
+	jmp	L_Normalised
+
+
+#ifdef PARANOID
+#ifdef PECULIAR_486
+L_bugged_denorm_486:
+	pushl	EX_INTERNAL|0x236
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+#else
+L_bugged_denorm:
+	pushl	EX_INTERNAL|0x230
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+#endif /* PECULIAR_486 */ 
+
+L_bugged_round24:
+	pushl	EX_INTERNAL|0x231
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+
+L_bugged_round53:
+	pushl	EX_INTERNAL|0x232
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+
+L_bugged_round64:
+	pushl	EX_INTERNAL|0x233
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+
+L_norm_bugged:
+	pushl	EX_INTERNAL|0x234
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+
+L_entry_bugged:
+	pushl	EX_INTERNAL|0x235
+	call	EXCEPTION
+	popl	%ebx
+L_exception_exit:
+	mov	$-1,%eax
+	jmp	fpu_reg_round_special_exit
+#endif /* PARANOID */ 
diff --git a/arch/i386/math-emu/reg_u_add.S b/arch/i386/math-emu/reg_u_add.S
new file mode 100644
index 00000000000..47c4c2434d8
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_add.S
@@ -0,0 +1,167 @@
+	.file	"reg_u_add.S"
+/*---------------------------------------------------------------------------+
+ |  reg_u_add.S                                                              |
+ |                                                                           |
+ | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the  |
+ |   result in a destination FPU_REG.                                        |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1995,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   int  FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ,             |
+ |                                                int control_w)             |
+ |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
+ |    one was raised, or -1 on internal error.                               |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*
+ |    Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ).
+ |    Takes two valid reg f.p. numbers (TAG_Valid), which are
+ |    treated as unsigned numbers,
+ |    and returns their sum as a TAG_Valid or TAG_Special f.p. number.
+ |    The returned number is normalized.
+ |    Basic checks are performed if PARANOID is defined.
+ */
+
+#include "exception.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+
+.text
+ENTRY(FPU_u_add)
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi		/* source 1 */
+	movl	PARAM2,%edi		/* source 2 */
+
+	movl	PARAM6,%ecx
+	movl	%ecx,%edx
+	subl	PARAM7,%ecx			/* exp1 - exp2 */
+	jge	L_arg1_larger
+
+	/* num1 is smaller */
+	movl	SIGL(%esi),%ebx
+	movl	SIGH(%esi),%eax
+
+	movl	%edi,%esi
+	movl	PARAM7,%edx
+	negw	%cx
+	jmp	L_accum_loaded
+
+L_arg1_larger:
+	/* num1 has larger or equal exponent */
+	movl	SIGL(%edi),%ebx
+	movl	SIGH(%edi),%eax
+
+L_accum_loaded:
+	movl	PARAM3,%edi		/* destination */
+	movw	%dx,EXP(%edi)		/* Copy exponent to destination */
+
+	xorl	%edx,%edx		/* clear the extension */
+
+#ifdef PARANOID
+	testl	$0x80000000,%eax
+	je	L_bugged
+
+	testl	$0x80000000,SIGH(%esi)
+	je	L_bugged
+#endif /* PARANOID */
+
+/* The number to be shifted is in %eax:%ebx:%edx */
+	cmpw	$32,%cx		/* shrd only works for 0..31 bits */
+	jnc	L_more_than_31
+
+/* less than 32 bits */
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	jmp	L_shift_done
+
+L_more_than_31:
+	cmpw	$64,%cx
+	jnc	L_more_than_63
+
+	subb	$32,%cl
+	jz	L_exactly_32
+
+	shrd	%cl,%eax,%edx
+	shr	%cl,%eax
+	orl	%ebx,%ebx
+	jz	L_more_31_no_low	/* none of the lowest bits is set */
+
+	orl	$1,%edx			/* record the fact in the extension */
+
+L_more_31_no_low:
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	jmp	L_shift_done
+
+L_exactly_32:
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	jmp	L_shift_done
+
+L_more_than_63:
+	cmpw	$65,%cx
+	jnc	L_more_than_64
+
+	movl	%eax,%edx
+	orl	%ebx,%ebx
+	jz	L_more_63_no_low
+
+	orl	$1,%edx
+	jmp	L_more_63_no_low
+
+L_more_than_64:
+	movl	$1,%edx		/* The shifted nr always at least one '1' */
+
+L_more_63_no_low:
+	xorl	%ebx,%ebx
+	xorl	%eax,%eax
+
+L_shift_done:
+	/* Now do the addition */
+	addl	SIGL(%esi),%ebx
+	adcl	SIGH(%esi),%eax
+	jnc	L_round_the_result
+
+	/* Overflow, adjust the result */
+	rcrl	$1,%eax
+	rcrl	$1,%ebx
+	rcrl	$1,%edx
+	jnc	L_no_bit_lost
+
+	orl	$1,%edx
+
+L_no_bit_lost:
+	incw	EXP(%edi)
+
+L_round_the_result:
+	jmp	fpu_reg_round	/* Round the result */
+
+
+
+#ifdef PARANOID
+/* If we ever get here then we have problems! */
+L_bugged:
+	pushl	EX_INTERNAL|0x201
+	call	EXCEPTION
+	pop	%ebx
+	movl	$-1,%eax
+	jmp	L_exit
+
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
+#endif /* PARANOID */
diff --git a/arch/i386/math-emu/reg_u_div.S b/arch/i386/math-emu/reg_u_div.S
new file mode 100644
index 00000000000..cc00654b6f9
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_div.S
@@ -0,0 +1,471 @@
+	.file	"reg_u_div.S"
+/*---------------------------------------------------------------------------+
+ |  reg_u_div.S                                                              |
+ |                                                                           |
+ | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
+ |                                                                           |
+ | Copyright (C) 1992,1993,1995,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Call from C as:                                                           |
+ |    int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest,                   |
+ |                unsigned int control_word, char *sign)                     |
+ |                                                                           |
+ |  Does not compute the destination exponent, but does adjust it.           |
+ |                                                                           |
+ |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
+ |    one was raised, or -1 on internal error.                               |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+
+
+/* #define	dSIGL(x)	(x) */
+/* #define	dSIGH(x)	4(x) */
+
+
+#ifndef NON_REENTRANT_FPU
+/*
+	Local storage on the stack:
+	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
+	Overflow flag:	ovfl_flag
+ */
+#define FPU_accum_3	-4(%ebp)
+#define FPU_accum_2	-8(%ebp)
+#define FPU_accum_1	-12(%ebp)
+#define FPU_accum_0	-16(%ebp)
+#define FPU_result_1	-20(%ebp)
+#define FPU_result_2	-24(%ebp)
+#define FPU_ovfl_flag	-28(%ebp)
+
+#else
+.data
+/*
+	Local storage in a static area:
+	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
+	Overflow flag:	ovfl_flag
+ */
+	.align 4,0
+FPU_accum_3:
+	.long	0
+FPU_accum_2:
+	.long	0
+FPU_accum_1:
+	.long	0
+FPU_accum_0:
+	.long	0
+FPU_result_1:
+	.long	0
+FPU_result_2:
+	.long	0
+FPU_ovfl_flag:
+	.byte	0
+#endif /* NON_REENTRANT_FPU */
+
+#define REGA	PARAM1
+#define REGB	PARAM2
+#define DEST	PARAM3
+
+.text
+ENTRY(FPU_u_div)
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$28,%esp
+#endif /* NON_REENTRANT_FPU */
+
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	REGA,%esi
+	movl	REGB,%ebx
+	movl	DEST,%edi
+
+	movswl	EXP(%esi),%edx
+	movswl	EXP(%ebx),%eax
+	subl	%eax,%edx
+	addl	EXP_BIAS,%edx
+
+	/* A denormal and a large number can cause an exponent underflow */
+	cmpl	EXP_WAY_UNDER,%edx
+	jg	xExp_not_underflow
+
+	/* Set to a really low value allow correct handling */
+	movl	EXP_WAY_UNDER,%edx
+
+xExp_not_underflow:
+
+	movw    %dx,EXP(%edi)
+
+#ifdef PARANOID
+/*	testl	$0x80000000, SIGH(%esi)	// Dividend */
+/*	je	L_bugged */
+	testl	$0x80000000, SIGH(%ebx)	/* Divisor */
+	je	L_bugged
+#endif /* PARANOID */ 
+
+/* Check if the divisor can be treated as having just 32 bits */
+	cmpl	$0,SIGL(%ebx)
+	jnz	L_Full_Division	/* Can't do a quick divide */
+
+/* We should be able to zip through the division here */
+	movl	SIGH(%ebx),%ecx	/* The divisor */
+	movl	SIGH(%esi),%edx	/* Dividend */
+	movl	SIGL(%esi),%eax	/* Dividend */
+
+	cmpl	%ecx,%edx
+	setaeb	FPU_ovfl_flag	/* Keep a record */
+	jb	L_no_adjust
+
+	subl	%ecx,%edx	/* Prevent the overflow */
+
+L_no_adjust:
+	/* Divide the 64 bit number by the 32 bit denominator */
+	divl	%ecx
+	movl	%eax,FPU_result_2
+
+	/* Work on the remainder of the first division */
+	xorl	%eax,%eax
+	divl	%ecx
+	movl	%eax,FPU_result_1
+
+	/* Work on the remainder of the 64 bit division */
+	xorl	%eax,%eax
+	divl	%ecx
+
+	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
+	je	L_no_overflow
+
+	/* Do the shifting here */
+	/* increase the exponent */
+	incw	EXP(%edi)
+
+	/* shift the mantissa right one bit */
+	stc			/* To set the ms bit */
+	rcrl	FPU_result_2
+	rcrl	FPU_result_1
+	rcrl	%eax
+
+L_no_overflow:
+	jmp	LRound_precision	/* Do the rounding as required */
+
+
+/*---------------------------------------------------------------------------+
+ |  Divide:   Return  arg1/arg2 to arg3.                                     |
+ |                                                                           |
+ |  This routine does not use the exponents of arg1 and arg2, but does       |
+ |  adjust the exponent of arg3.                                             |
+ |                                                                           |
+ |  The maximum returned value is (ignoring exponents)                       |
+ |               .ffffffff ffffffff                                          |
+ |               ------------------  =  1.ffffffff fffffffe                  |
+ |               .80000000 00000000                                          |
+ | and the minimum is                                                        |
+ |               .80000000 00000000                                          |
+ |               ------------------  =  .80000000 00000001   (rounded)       |
+ |               .ffffffff ffffffff                                          |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+
+L_Full_Division:
+	/* Save extended dividend in local register */
+	movl	SIGL(%esi),%eax
+	movl	%eax,FPU_accum_2
+	movl	SIGH(%esi),%eax
+	movl	%eax,FPU_accum_3
+	xorl	%eax,%eax
+	movl	%eax,FPU_accum_1	/* zero the extension */
+	movl	%eax,FPU_accum_0	/* zero the extension */
+
+	movl	SIGL(%esi),%eax	/* Get the current num */
+	movl	SIGH(%esi),%edx
+
+/*----------------------------------------------------------------------*/
+/* Initialization done.
+   Do the first 32 bits. */
+
+	movb	$0,FPU_ovfl_flag
+	cmpl	SIGH(%ebx),%edx	/* Test for imminent overflow */
+	jb	LLess_than_1
+	ja	LGreater_than_1
+
+	cmpl	SIGL(%ebx),%eax
+	jb	LLess_than_1
+
+LGreater_than_1:
+/* The dividend is greater or equal, would cause overflow */
+	setaeb	FPU_ovfl_flag		/* Keep a record */
+
+	subl	SIGL(%ebx),%eax
+	sbbl	SIGH(%ebx),%edx	/* Prevent the overflow */
+	movl	%eax,FPU_accum_2
+	movl	%edx,FPU_accum_3
+
+LLess_than_1:
+/* At this point, we have a dividend < divisor, with a record of
+   adjustment in FPU_ovfl_flag */
+
+	/* We will divide by a number which is too large */
+	movl	SIGH(%ebx),%ecx
+	addl	$1,%ecx
+	jnc	LFirst_div_not_1
+
+	/* here we need to divide by 100000000h,
+	   i.e., no division at all.. */
+	mov	%edx,%eax
+	jmp	LFirst_div_done
+
+LFirst_div_not_1:
+	divl	%ecx		/* Divide the numerator by the augmented
+				   denom ms dw */
+
+LFirst_div_done:
+	movl	%eax,FPU_result_2	/* Put the result in the answer */
+
+	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
+
+	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_3
+
+	movl	FPU_result_2,%eax	/* Get the result back */
+	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
+
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+	sbbl	$0,FPU_accum_3
+	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
+
+#ifdef PARANOID
+	jb	L_bugged_1
+#endif /* PARANOID */ 
+
+	/* need to subtract another once of the denom */
+	incl	FPU_result_2	/* Correct the answer */
+
+	movl	SIGL(%ebx),%eax
+	movl	SIGH(%ebx),%edx
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+
+#ifdef PARANOID
+	sbbl	$0,FPU_accum_3
+	jne	L_bugged_1	/* Must check for non-zero result here */
+#endif /* PARANOID */ 
+
+/*----------------------------------------------------------------------*/
+/* Half of the main problem is done, there is just a reduced numerator
+   to handle now.
+   Work with the second 32 bits, FPU_accum_0 not used from now on */
+LDo_2nd_32_bits:
+	movl	FPU_accum_2,%edx	/* get the reduced num */
+	movl	FPU_accum_1,%eax
+
+	/* need to check for possible subsequent overflow */
+	cmpl	SIGH(%ebx),%edx
+	jb	LDo_2nd_div
+	ja	LPrevent_2nd_overflow
+
+	cmpl	SIGL(%ebx),%eax
+	jb	LDo_2nd_div
+
+LPrevent_2nd_overflow:
+/* The numerator is greater or equal, would cause overflow */
+	/* prevent overflow */
+	subl	SIGL(%ebx),%eax
+	sbbl	SIGH(%ebx),%edx
+	movl	%edx,FPU_accum_2
+	movl	%eax,FPU_accum_1
+
+	incl	FPU_result_2	/* Reflect the subtraction in the answer */
+
+#ifdef PARANOID
+	je	L_bugged_2	/* Can't bump the result to 1.0 */
+#endif /* PARANOID */ 
+
+LDo_2nd_div:
+	cmpl	$0,%ecx		/* augmented denom msw */
+	jnz	LSecond_div_not_1
+
+	/* %ecx == 0, we are dividing by 1.0 */
+	mov	%edx,%eax
+	jmp	LSecond_div_done
+
+LSecond_div_not_1:
+	divl	%ecx		/* Divide the numerator by the denom ms dw */
+
+LSecond_div_done:
+	movl	%eax,FPU_result_1	/* Put the result in the answer */
+
+	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
+
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+#endif /* PARANOID */ 
+
+	movl	FPU_result_1,%eax	/* Get the result back */
+	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
+
+	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+#endif /* PARANOID */ 
+
+	jz	LDo_3rd_32_bits
+
+#ifdef PARANOID
+	cmpl	$1,FPU_accum_2
+	jne	L_bugged_2
+#endif /* PARANOID */
+
+	/* need to subtract another once of the denom */
+	movl	SIGL(%ebx),%eax
+	movl	SIGH(%ebx),%edx
+	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_1
+	sbbl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+	jne	L_bugged_2
+#endif /* PARANOID */ 
+
+	addl	$1,FPU_result_1	/* Correct the answer */
+	adcl	$0,FPU_result_2
+
+#ifdef PARANOID
+	jc	L_bugged_2	/* Must check for non-zero result here */
+#endif /* PARANOID */
+
+/*----------------------------------------------------------------------*/
+/* The division is essentially finished here, we just need to perform
+   tidying operations.
+   Deal with the 3rd 32 bits */
+LDo_3rd_32_bits:
+	movl	FPU_accum_1,%edx		/* get the reduced num */
+	movl	FPU_accum_0,%eax
+
+	/* need to check for possible subsequent overflow */
+	cmpl	SIGH(%ebx),%edx	/* denom */
+	jb	LRound_prep
+	ja	LPrevent_3rd_overflow
+
+	cmpl	SIGL(%ebx),%eax	/* denom */
+	jb	LRound_prep
+
+LPrevent_3rd_overflow:
+	/* prevent overflow */
+	subl	SIGL(%ebx),%eax
+	sbbl	SIGH(%ebx),%edx
+	movl	%edx,FPU_accum_1
+	movl	%eax,FPU_accum_0
+
+	addl	$1,FPU_result_1	/* Reflect the subtraction in the answer */
+	adcl	$0,FPU_result_2
+	jne	LRound_prep
+	jnc	LRound_prep
+
+	/* This is a tricky spot, there is an overflow of the answer */
+	movb	$255,FPU_ovfl_flag		/* Overflow -> 1.000 */
+
+LRound_prep:
+/*
+ * Prepare for rounding.
+ * To test for rounding, we just need to compare 2*accum with the
+ * denom.
+ */
+	movl	FPU_accum_0,%ecx
+	movl	FPU_accum_1,%edx
+	movl	%ecx,%eax
+	orl	%edx,%eax
+	jz	LRound_ovfl		/* The accumulator contains zero. */
+
+	/* Multiply by 2 */
+	clc
+	rcll	$1,%ecx
+	rcll	$1,%edx
+	jc	LRound_large		/* No need to compare, denom smaller */
+
+	subl	SIGL(%ebx),%ecx
+	sbbl	SIGH(%ebx),%edx
+	jnc	LRound_not_small
+
+	movl	$0x70000000,%eax	/* Denom was larger */
+	jmp	LRound_ovfl
+
+LRound_not_small:
+	jnz	LRound_large
+
+	movl	$0x80000000,%eax	/* Remainder was exactly 1/2 denom */
+	jmp	LRound_ovfl
+
+LRound_large:
+	movl	$0xff000000,%eax	/* Denom was smaller */
+
+LRound_ovfl:
+/* We are now ready to deal with rounding, but first we must get
+   the bits properly aligned */
+	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
+	je	LRound_precision
+
+	incw	EXP(%edi)
+
+	/* shift the mantissa right one bit */
+	stc			/* Will set the ms bit */
+	rcrl	FPU_result_2
+	rcrl	FPU_result_1
+	rcrl	%eax
+
+/* Round the result as required */
+LRound_precision:
+	decw	EXP(%edi)	/* binary point between 1st & 2nd bits */
+
+	movl	%eax,%edx
+	movl	FPU_result_1,%ebx
+	movl	FPU_result_2,%eax
+	jmp	fpu_reg_round
+
+
+#ifdef PARANOID
+/* The logic is wrong if we got here */
+L_bugged:
+	pushl	EX_INTERNAL|0x202
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_bugged_1:
+	pushl	EX_INTERNAL|0x203
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_bugged_2:
+	pushl	EX_INTERNAL|0x204
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_exit:
+	movl	$-1,%eax
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+
+	leave
+	ret
+#endif /* PARANOID */ 
diff --git a/arch/i386/math-emu/reg_u_mul.S b/arch/i386/math-emu/reg_u_mul.S
new file mode 100644
index 00000000000..973f12af97d
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_mul.S
@@ -0,0 +1,148 @@
+	.file	"reg_u_mul.S"
+/*---------------------------------------------------------------------------+
+ |  reg_u_mul.S                                                              |
+ |                                                                           |
+ | Core multiplication routine                                               |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1995,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |   Basic multiplication routine.                                           |
+ |   Does not check the resulting exponent for overflow/underflow            |
+ |                                                                           |
+ |   FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw);         |
+ |                                                                           |
+ |   Internal working is at approx 128 bits.                                 |
+ |   Result is rounded to nearest 53 or 64 bits, using "nearest or even".    |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+
+
+
+#ifndef NON_REENTRANT_FPU
+/*  Local storage on the stack: */
+#define FPU_accum_0	-4(%ebp)	/* ms word */
+#define FPU_accum_1	-8(%ebp)
+
+#else
+/*  Local storage in a static area: */
+.data
+	.align 4,0
+FPU_accum_0:
+	.long	0
+FPU_accum_1:
+	.long	0
+#endif /* NON_REENTRANT_FPU */
+
+
+.text
+ENTRY(FPU_u_mul)
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$8,%esp
+#endif /* NON_REENTRANT_FPU */ 
+
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi
+	movl	PARAM2,%edi
+
+#ifdef PARANOID
+	testl	$0x80000000,SIGH(%esi)
+	jz	L_bugged
+	testl	$0x80000000,SIGH(%edi)
+	jz	L_bugged
+#endif /* PARANOID */
+
+	xorl	%ecx,%ecx
+	xorl	%ebx,%ebx
+
+	movl	SIGL(%esi),%eax
+	mull	SIGL(%edi)
+	movl	%eax,FPU_accum_0
+	movl	%edx,FPU_accum_1
+
+	movl	SIGL(%esi),%eax
+	mull	SIGH(%edi)
+	addl	%eax,FPU_accum_1
+	adcl	%edx,%ebx
+/*	adcl	$0,%ecx		// overflow here is not possible */
+
+	movl	SIGH(%esi),%eax
+	mull	SIGL(%edi)
+	addl	%eax,FPU_accum_1
+	adcl	%edx,%ebx
+	adcl	$0,%ecx
+
+	movl	SIGH(%esi),%eax
+	mull	SIGH(%edi)
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+
+	/* Get the sum of the exponents. */
+	movl	PARAM6,%eax
+	subl	EXP_BIAS-1,%eax
+
+	/* Two denormals can cause an exponent underflow */
+	cmpl	EXP_WAY_UNDER,%eax
+	jg	Exp_not_underflow
+
+	/* Set to a really low value allow correct handling */
+	movl	EXP_WAY_UNDER,%eax
+
+Exp_not_underflow:
+
+/*  Have now finished with the sources */
+	movl	PARAM3,%edi	/* Point to the destination */
+	movw	%ax,EXP(%edi)
+
+/*  Now make sure that the result is normalized */
+	testl	$0x80000000,%ecx
+	jnz	LResult_Normalised
+
+	/* Normalize by shifting left one bit */
+	shll	$1,FPU_accum_0
+	rcll	$1,FPU_accum_1
+	rcll	$1,%ebx
+	rcll	$1,%ecx
+	decw	EXP(%edi)
+
+LResult_Normalised:
+	movl	FPU_accum_0,%eax
+	movl	FPU_accum_1,%edx
+	orl	%eax,%eax
+	jz	L_extent_zero
+
+	orl	$1,%edx
+
+L_extent_zero:
+	movl	%ecx,%eax
+	jmp	fpu_reg_round
+
+
+#ifdef PARANOID
+L_bugged:
+	pushl	EX_INTERNAL|0x205
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
+#endif /* PARANOID */ 
+
diff --git a/arch/i386/math-emu/reg_u_sub.S b/arch/i386/math-emu/reg_u_sub.S
new file mode 100644
index 00000000000..1b6c24801d2
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_sub.S
@@ -0,0 +1,272 @@
+	.file	"reg_u_sub.S"
+/*---------------------------------------------------------------------------+
+ |  reg_u_sub.S                                                              |
+ |                                                                           |
+ | Core floating point subtraction routine.                                  |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1995,1997                                         |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@suburbia.net                              |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |    int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ,             |
+ |                                                int control_w)             |
+ |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
+ |    one was raised, or -1 on internal error.                               |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*
+ |    Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ).
+ |    Takes two valid reg f.p. numbers (TAG_Valid), which are
+ |    treated as unsigned numbers,
+ |    and returns their difference as a TAG_Valid or TAG_Zero f.p.
+ |    number.
+ |    The first number (arg1) must be the larger.
+ |    The returned number is normalized.
+ |    Basic checks are performed if PARANOID is defined.
+ */
+
+#include "exception.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+
+.text
+ENTRY(FPU_u_sub)
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi	/* source 1 */
+	movl	PARAM2,%edi	/* source 2 */
+	
+	movl	PARAM6,%ecx
+	subl	PARAM7,%ecx	/* exp1 - exp2 */
+
+#ifdef PARANOID
+	/* source 2 is always smaller than source 1 */
+	js	L_bugged_1
+
+	testl	$0x80000000,SIGH(%edi)	/* The args are assumed to be be normalized */
+	je	L_bugged_2
+
+	testl	$0x80000000,SIGH(%esi)
+	je	L_bugged_2
+#endif /* PARANOID */
+
+/*--------------------------------------+
+ |	Form a register holding the     |
+ |	smaller number                  |
+ +--------------------------------------*/
+	movl	SIGH(%edi),%eax	/* register ms word */
+	movl	SIGL(%edi),%ebx	/* register ls word */
+
+	movl	PARAM3,%edi	/* destination */
+	movl	PARAM6,%edx
+	movw	%dx,EXP(%edi)	/* Copy exponent to destination */
+
+	xorl	%edx,%edx	/* register extension */
+
+/*--------------------------------------+
+ |	Shift the temporary register	|
+ |      right the required number of	|
+ |	places.				|
+ +--------------------------------------*/
+
+	cmpw	$32,%cx		/* shrd only works for 0..31 bits */
+	jnc	L_more_than_31
+
+/* less than 32 bits */
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	jmp	L_shift_done
+
+L_more_than_31:
+	cmpw	$64,%cx
+	jnc	L_more_than_63
+
+	subb	$32,%cl
+	jz	L_exactly_32
+
+	shrd	%cl,%eax,%edx
+	shr	%cl,%eax
+	orl	%ebx,%ebx
+	jz	L_more_31_no_low	/* none of the lowest bits is set */
+
+	orl	$1,%edx			/* record the fact in the extension */
+
+L_more_31_no_low:
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	jmp	L_shift_done
+
+L_exactly_32:
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	jmp	L_shift_done
+
+L_more_than_63:
+	cmpw	$65,%cx
+	jnc	L_more_than_64
+
+	/* Shift right by 64 bits */
+	movl	%eax,%edx
+	orl	%ebx,%ebx
+	jz	L_more_63_no_low
+
+	orl	$1,%edx
+	jmp	L_more_63_no_low
+
+L_more_than_64:
+	jne	L_more_than_65
+
+	/* Shift right by 65 bits */
+	/* Carry is clear if we get here */
+	movl	%eax,%edx
+	rcrl	%edx
+	jnc	L_shift_65_nc
+
+	orl	$1,%edx
+	jmp	L_more_63_no_low
+
+L_shift_65_nc:
+	orl	%ebx,%ebx
+	jz	L_more_63_no_low
+
+	orl	$1,%edx
+	jmp	L_more_63_no_low
+
+L_more_than_65:
+	movl	$1,%edx		/* The shifted nr always at least one '1' */
+
+L_more_63_no_low:
+	xorl	%ebx,%ebx
+	xorl	%eax,%eax
+
+L_shift_done:
+L_subtr:
+/*------------------------------+
+ |	Do the subtraction	|
+ +------------------------------*/
+	xorl	%ecx,%ecx
+	subl	%edx,%ecx
+	movl	%ecx,%edx
+	movl	SIGL(%esi),%ecx
+	sbbl	%ebx,%ecx
+	movl	%ecx,%ebx
+	movl	SIGH(%esi),%ecx
+	sbbl	%eax,%ecx
+	movl	%ecx,%eax
+
+#ifdef PARANOID
+	/* We can never get a borrow */
+	jc	L_bugged
+#endif /* PARANOID */
+
+/*--------------------------------------+
+ |	Normalize the result		|
+ +--------------------------------------*/
+	testl	$0x80000000,%eax
+	jnz	L_round		/* no shifting needed */
+
+	orl	%eax,%eax
+	jnz	L_shift_1	/* shift left 1 - 31 bits */
+
+	orl	%ebx,%ebx
+	jnz	L_shift_32	/* shift left 32 - 63 bits */
+
+/*
+ *	 A rare case, the only one which is non-zero if we got here
+ *         is:           1000000 .... 0000
+ *                      -0111111 .... 1111 1
+ *                       -------------------- 
+ *                       0000000 .... 0000 1 
+ */
+
+	cmpl	$0x80000000,%edx
+	jnz	L_must_be_zero
+
+	/* Shift left 64 bits */
+	subw	$64,EXP(%edi)
+	xchg	%edx,%eax
+	jmp	fpu_reg_round
+
+L_must_be_zero:
+#ifdef PARANOID
+	orl	%edx,%edx
+	jnz	L_bugged_3
+#endif /* PARANOID */ 
+
+	/* The result is zero */
+	movw	$0,EXP(%edi)		/* exponent */
+	movl	$0,SIGL(%edi)
+	movl	$0,SIGH(%edi)
+	movl	TAG_Zero,%eax
+	jmp	L_exit
+
+L_shift_32:
+	movl	%ebx,%eax
+	movl	%edx,%ebx
+	movl	$0,%edx
+	subw	$32,EXP(%edi)	/* Can get underflow here */
+
+/* We need to shift left by 1 - 31 bits */
+L_shift_1:
+	bsrl	%eax,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	shld	%cl,%ebx,%eax
+	shld	%cl,%edx,%ebx
+	shl	%cl,%edx
+	subw	%cx,EXP(%edi)	/* Can get underflow here */
+
+L_round:
+	jmp	fpu_reg_round	/* Round the result */
+
+
+#ifdef PARANOID
+L_bugged_1:
+	pushl	EX_INTERNAL|0x206
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_bugged_2:
+	pushl	EX_INTERNAL|0x209
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_bugged_3:
+	pushl	EX_INTERNAL|0x210
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_bugged_4:
+	pushl	EX_INTERNAL|0x211
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_bugged:
+	pushl	EX_INTERNAL|0x212
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_error_exit:
+	movl	$-1,%eax
+
+#endif /* PARANOID */
+
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/round_Xsig.S b/arch/i386/math-emu/round_Xsig.S
new file mode 100644
index 00000000000..bbe0e87718e
--- /dev/null
+++ b/arch/i386/math-emu/round_Xsig.S
@@ -0,0 +1,141 @@
+/*---------------------------------------------------------------------------+
+ |  round_Xsig.S                                                             |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1995                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
+ |                                                                           |
+ | Normalize and round a 12 byte quantity.                                   |
+ | Call from C as:                                                           |
+ |   int round_Xsig(Xsig *n)                                                 |
+ |                                                                           |
+ | Normalize a 12 byte quantity.                                             |
+ | Call from C as:                                                           |
+ |   int norm_Xsig(Xsig *n)                                                  |
+ |                                                                           |
+ | Each function returns the size of the shift (nr of bits).                 |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+	.file	"round_Xsig.S"
+
+#include "fpu_emu.h"
+
+
+.text
+ENTRY(round_Xsig)
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%ebx		/* Reserve some space */
+	pushl	%ebx
+	pushl	%esi
+
+	movl	PARAM1,%esi
+
+	movl	8(%esi),%edx
+	movl	4(%esi),%ebx
+	movl	(%esi),%eax
+
+	movl	$0,-4(%ebp)
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_round		/* Already normalized */
+	jnz	L_shift_1	/* Shift left 1 - 31 bits */
+
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	movl	$-32,-4(%ebp)
+
+/* We need to shift left by 1 - 31 bits */
+L_shift_1:
+	bsrl	%edx,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	subl	%ecx,-4(%ebp)
+	shld	%cl,%ebx,%edx
+	shld	%cl,%eax,%ebx
+	shl	%cl,%eax
+
+L_round:
+	testl	$0x80000000,%eax
+	jz	L_exit
+
+	addl	$1,%ebx
+	adcl	$0,%edx
+	jnz	L_exit
+
+	movl	$0x80000000,%edx
+	incl	-4(%ebp)
+
+L_exit:
+	movl	%edx,8(%esi)
+	movl	%ebx,4(%esi)
+	movl	%eax,(%esi)
+
+	movl	-4(%ebp),%eax
+
+	popl	%esi
+	popl	%ebx
+	leave
+	ret
+
+
+
+
+ENTRY(norm_Xsig)
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%ebx		/* Reserve some space */
+	pushl	%ebx
+	pushl	%esi
+
+	movl	PARAM1,%esi
+
+	movl	8(%esi),%edx
+	movl	4(%esi),%ebx
+	movl	(%esi),%eax
+
+	movl	$0,-4(%ebp)
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_n_exit		/* Already normalized */
+	jnz	L_n_shift_1	/* Shift left 1 - 31 bits */
+
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	movl	$-32,-4(%ebp)
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_n_exit	/* Normalized now */
+	jnz	L_n_shift_1	/* Shift left 1 - 31 bits */
+
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	addl	$-32,-4(%ebp)
+	jmp	L_n_exit	/* Might not be normalized,
+	                           but shift no more. */
+
+/* We need to shift left by 1 - 31 bits */
+L_n_shift_1:
+	bsrl	%edx,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	subl	%ecx,-4(%ebp)
+	shld	%cl,%ebx,%edx
+	shld	%cl,%eax,%ebx
+	shl	%cl,%eax
+
+L_n_exit:
+	movl	%edx,8(%esi)
+	movl	%ebx,4(%esi)
+	movl	%eax,(%esi)
+
+	movl	-4(%ebp),%eax
+
+	popl	%esi
+	popl	%ebx
+	leave
+	ret
+
diff --git a/arch/i386/math-emu/shr_Xsig.S b/arch/i386/math-emu/shr_Xsig.S
new file mode 100644
index 00000000000..31cdd118e91
--- /dev/null
+++ b/arch/i386/math-emu/shr_Xsig.S
@@ -0,0 +1,87 @@
+	.file	"shr_Xsig.S"
+/*---------------------------------------------------------------------------+
+ |  shr_Xsig.S                                                               |
+ |                                                                           |
+ | 12 byte right shift function                                              |
+ |                                                                           |
+ | Copyright (C) 1992,1994,1995                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void shr_Xsig(Xsig *arg, unsigned nr)                                   |
+ |                                                                           |
+ |   Extended shift right function.                                          |
+ |   Fastest for small shifts.                                               |
+ |   Shifts the 12 byte quantity pointed to by the first arg (arg)           |
+ |   right by the number of bits specified by the second arg (nr).           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_emu.h"
+
+.text
+ENTRY(shr_Xsig)
+	push	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	movl	PARAM2,%ecx
+	movl	PARAM1,%esi
+	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
+	jnc	L_more_than_31
+
+/* less than 32 bits */
+	pushl	%ebx
+	movl	(%esi),%eax	/* lsl */
+	movl	4(%esi),%ebx	/* midl */
+	movl	8(%esi),%edx	/* msl */
+	shrd	%cl,%ebx,%eax
+	shrd	%cl,%edx,%ebx
+	shr	%cl,%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%edx,8(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+L_more_than_31:
+	cmpl	$64,%ecx
+	jnc	L_more_than_63
+
+	subb	$32,%cl
+	movl	4(%esi),%eax	/* midl */
+	movl	8(%esi),%edx	/* msl */
+	shrd	%cl,%edx,%eax
+	shr	%cl,%edx
+	movl	%eax,(%esi)
+	movl	%edx,4(%esi)
+	movl	$0,8(%esi)
+	popl	%esi
+	leave
+	ret
+
+L_more_than_63:
+	cmpl	$96,%ecx
+	jnc	L_more_than_95
+
+	subb	$64,%cl
+	movl	8(%esi),%eax	/* msl */
+	shr	%cl,%eax
+	xorl	%edx,%edx
+	movl	%eax,(%esi)
+	movl	%edx,4(%esi)
+	movl	%edx,8(%esi)
+	popl	%esi
+	leave
+	ret
+
+L_more_than_95:
+	xorl	%eax,%eax
+	movl	%eax,(%esi)
+	movl	%eax,4(%esi)
+	movl	%eax,8(%esi)
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h
new file mode 100644
index 00000000000..78d7b7689dd
--- /dev/null
+++ b/arch/i386/math-emu/status_w.h
@@ -0,0 +1,65 @@
+/*---------------------------------------------------------------------------+
+ |  status_w.h                                                               |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _STATUS_H_
+#define _STATUS_H_
+
+#include "fpu_emu.h"    /* for definition of PECULIAR_486 */
+
+#ifdef __ASSEMBLY__
+#define	Const__(x)	$##x
+#else
+#define	Const__(x)	x
+#endif
+
+#define SW_Backward    	Const__(0x8000)	/* backward compatibility */
+#define SW_C3		Const__(0x4000)	/* condition bit 3 */
+#define SW_Top		Const__(0x3800)	/* top of stack */
+#define SW_Top_Shift 	Const__(11)	/* shift for top of stack bits */
+#define SW_C2		Const__(0x0400)	/* condition bit 2 */
+#define SW_C1		Const__(0x0200)	/* condition bit 1 */
+#define SW_C0		Const__(0x0100)	/* condition bit 0 */
+#define SW_Summary     	Const__(0x0080)	/* exception summary */
+#define SW_Stack_Fault	Const__(0x0040)	/* stack fault */
+#define SW_Precision   	Const__(0x0020)	/* loss of precision */
+#define SW_Underflow   	Const__(0x0010)	/* underflow */
+#define SW_Overflow    	Const__(0x0008)	/* overflow */
+#define SW_Zero_Div    	Const__(0x0004)	/* divide by zero */
+#define SW_Denorm_Op   	Const__(0x0002)	/* denormalized operand */
+#define SW_Invalid     	Const__(0x0001)	/* invalid operation */
+
+#define SW_Exc_Mask     Const__(0x27f)  /* Status word exception bit mask */
+
+#ifndef __ASSEMBLY__
+
+#define COMP_A_gt_B	1
+#define COMP_A_eq_B	2
+#define COMP_A_lt_B	3
+#define COMP_No_Comp	4
+#define COMP_Denormal   0x20
+#define COMP_NaN	0x40
+#define COMP_SNaN	0x80
+
+#define status_word() \
+  ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
+#define setcc(cc) ({ \
+  partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \
+  partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); })
+
+#ifdef PECULIAR_486
+   /* Default, this conveys no information, but an 80486 does it. */
+   /* Clear the SW_C1 bit, "other bits undefined". */
+#  define clear_C1()  { partial_status &= ~SW_C1; }
+# else
+#  define clear_C1()
+#endif /* PECULIAR_486 */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _STATUS_H_ */
diff --git a/arch/i386/math-emu/version.h b/arch/i386/math-emu/version.h
new file mode 100644
index 00000000000..a0d73a1d2b6
--- /dev/null
+++ b/arch/i386/math-emu/version.h
@@ -0,0 +1,12 @@
+/*---------------------------------------------------------------------------+
+ |  version.h                                                                |
+ |                                                                           |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1996,1997,1999                               |
+ |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
+ |                  E-mail   billm@melbpc.org.au                             |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#define FPU_VERSION "wm-FPU-emu version 2.01"
diff --git a/arch/i386/math-emu/wm_shrx.S b/arch/i386/math-emu/wm_shrx.S
new file mode 100644
index 00000000000..51842831798
--- /dev/null
+++ b/arch/i386/math-emu/wm_shrx.S
@@ -0,0 +1,204 @@
+	.file	"wm_shrx.S"
+/*---------------------------------------------------------------------------+
+ |  wm_shrx.S                                                                |
+ |                                                                           |
+ | 64 bit right shift functions                                              |
+ |                                                                           |
+ | Copyright (C) 1992,1995                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            |
+ | and                                                                       |
+ |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_emu.h"
+
+.text
+/*---------------------------------------------------------------------------+
+ |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            |
+ |                                                                           |
+ |   Extended shift right function.                                          |
+ |   Fastest for small shifts.                                               |
+ |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
+ |   right by the number of bits specified by the second arg (arg2).         |
+ |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
+ |                [  64 bit arg ][ eax ]                                     |
+ |            shift right  --------->                                        |
+ |   The eax register is initialized to 0 before the shifting.               |
+ |   Results returned in the 64 bit arg and eax.                             |
+ +---------------------------------------------------------------------------*/
+
+ENTRY(FPU_shrx)
+	push	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	movl	PARAM2,%ecx
+	movl	PARAM1,%esi
+	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
+	jnc	L_more_than_31
+
+/* less than 32 bits */
+	pushl	%ebx
+	movl	(%esi),%ebx	/* lsl */
+	movl	4(%esi),%edx	/* msl */
+	xorl	%eax,%eax	/* extension */
+	shrd	%cl,%ebx,%eax
+	shrd	%cl,%edx,%ebx
+	shr	%cl,%edx
+	movl	%ebx,(%esi)
+	movl	%edx,4(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+L_more_than_31:
+	cmpl	$64,%ecx
+	jnc	L_more_than_63
+
+	subb	$32,%cl
+	movl	(%esi),%eax	/* lsl */
+	movl	4(%esi),%edx	/* msl */
+	shrd	%cl,%edx,%eax
+	shr	%cl,%edx
+	movl	%edx,(%esi)
+	movl	$0,4(%esi)
+	popl	%esi
+	leave
+	ret
+
+L_more_than_63:
+	cmpl	$96,%ecx
+	jnc	L_more_than_95
+
+	subb	$64,%cl
+	movl	4(%esi),%eax	/* msl */
+	shr	%cl,%eax
+	xorl	%edx,%edx
+	movl	%edx,(%esi)
+	movl	%edx,4(%esi)
+	popl	%esi
+	leave
+	ret
+
+L_more_than_95:
+	xorl	%eax,%eax
+	movl	%eax,(%esi)
+	movl	%eax,4(%esi)
+	popl	%esi
+	leave
+	ret
+
+
+/*---------------------------------------------------------------------------+
+ |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           |
+ |                                                                           |
+ |   Extended shift right function (optimized for small floating point       |
+ |   integers).                                                              |
+ |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
+ |   right by the number of bits specified by the second arg (arg2).         |
+ |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
+ |                [  64 bit arg ][ eax ]                                     |
+ |            shift right  --------->                                        |
+ |   The eax register is initialized to 0 before the shifting.               |
+ |   The lower 8 bits of eax are lost and replaced by a flag which is        |
+ |   set (to 0x01) if any bit, apart from the first one, is set in the       |
+ |   part which has been shifted out of the arg.                             |
+ |   Results returned in the 64 bit arg and eax.                             |
+ +---------------------------------------------------------------------------*/
+ENTRY(FPU_shrxs)
+	push	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	pushl	%ebx
+	movl	PARAM2,%ecx
+	movl	PARAM1,%esi
+	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
+	jnc	Ls_more_than_63
+
+	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
+	jc	Ls_less_than_32
+
+/* We got here without jumps by assuming that the most common requirement
+   is for small integers */
+/* Shift by [32..63] bits */
+	subb	$32,%cl
+	movl	(%esi),%eax	/* lsl */
+	movl	4(%esi),%edx	/* msl */
+	xorl	%ebx,%ebx
+	shrd	%cl,%eax,%ebx
+	shrd	%cl,%edx,%eax
+	shr	%cl,%edx
+	orl	%ebx,%ebx		/* test these 32 bits */
+	setne	%bl
+	test	$0x7fffffff,%eax	/* and 31 bits here */
+	setne	%bh
+	orw	%bx,%bx			/* Any of the 63 bit set ? */
+	setne	%al
+	movl	%edx,(%esi)
+	movl	$0,4(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+/* Shift by [0..31] bits */
+Ls_less_than_32:
+	movl	(%esi),%ebx	/* lsl */
+	movl	4(%esi),%edx	/* msl */
+	xorl	%eax,%eax	/* extension */
+	shrd	%cl,%ebx,%eax
+	shrd	%cl,%edx,%ebx
+	shr	%cl,%edx
+	test	$0x7fffffff,%eax	/* only need to look at eax here */
+	setne	%al
+	movl	%ebx,(%esi)
+	movl	%edx,4(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+/* Shift by [64..95] bits */
+Ls_more_than_63:
+	cmpl	$96,%ecx
+	jnc	Ls_more_than_95
+
+	subb	$64,%cl
+	movl	(%esi),%ebx	/* lsl */
+	movl	4(%esi),%eax	/* msl */
+	xorl	%edx,%edx	/* extension */
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	orl	%ebx,%edx
+	setne	%bl
+	test	$0x7fffffff,%eax	/* only need to look at eax here */
+	setne	%bh
+	orw	%bx,%bx
+	setne	%al
+	xorl	%edx,%edx
+	movl	%edx,(%esi)	/* set to zero */
+	movl	%edx,4(%esi)	/* set to zero */
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+Ls_more_than_95:
+/* Shift by [96..inf) bits */
+	xorl	%eax,%eax
+	movl	(%esi),%ebx
+	orl	4(%esi),%ebx
+	setne	%al
+	xorl	%ebx,%ebx
+	movl	%ebx,(%esi)
+	movl	%ebx,4(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/wm_sqrt.S b/arch/i386/math-emu/wm_sqrt.S
new file mode 100644
index 00000000000..d258f59564e
--- /dev/null
+++ b/arch/i386/math-emu/wm_sqrt.S
@@ -0,0 +1,470 @@
+	.file	"wm_sqrt.S"
+/*---------------------------------------------------------------------------+
+ |  wm_sqrt.S                                                                |
+ |                                                                           |
+ | Fixed point arithmetic square root evaluation.                            |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1995,1997                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@suburbia.net               |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
+ |    returns the square root of n in n.                                     |
+ |                                                                           |
+ |  Use Newton's method to compute the square root of a number, which must   |
+ |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
+ |  Does not check the sign or tag of the argument.                          |
+ |  Sets the exponent, but not the sign or tag of the result.                |
+ |                                                                           |
+ |  The guess is kept in %esi:%edi                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_emu.h"
+
+
+#ifndef NON_REENTRANT_FPU
+/*	Local storage on the stack: */
+#define FPU_accum_3	-4(%ebp)	/* ms word */
+#define FPU_accum_2	-8(%ebp)
+#define FPU_accum_1	-12(%ebp)
+#define FPU_accum_0	-16(%ebp)
+
+/*
+ * The de-normalised argument:
+ *                  sq_2                  sq_1              sq_0
+ *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
+ *           ^ binary point here
+ */
+#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
+#define FPU_fsqrt_arg_1	-24(%ebp)
+#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
+
+#else
+/*	Local storage in a static area: */
+.data
+	.align 4,0
+FPU_accum_3:
+	.long	0		/* ms word */
+FPU_accum_2:
+	.long	0
+FPU_accum_1:
+	.long	0
+FPU_accum_0:
+	.long	0
+
+/* The de-normalised argument:
+                    sq_2                  sq_1              sq_0
+          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
+             ^ binary point here
+ */
+FPU_fsqrt_arg_2:
+	.long	0		/* ms word */
+FPU_fsqrt_arg_1:
+	.long	0
+FPU_fsqrt_arg_0:
+	.long	0		/* ls word, at most the ms bit is set */
+#endif /* NON_REENTRANT_FPU */ 
+
+
+.text
+ENTRY(wm_sqrt)
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$28,%esp
+#endif /* NON_REENTRANT_FPU */
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi
+
+	movl	SIGH(%esi),%eax
+	movl	SIGL(%esi),%ecx
+	xorl	%edx,%edx
+
+/* We use a rough linear estimate for the first guess.. */
+
+	cmpw	EXP_BIAS,EXP(%esi)
+	jnz	sqrt_arg_ge_2
+
+	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
+	rcrl	$1,%ecx
+	rcrl	$1,%edx
+
+sqrt_arg_ge_2:
+/* From here on, n is never accessed directly again until it is
+   replaced by the answer. */
+
+	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
+	movl	%ecx,FPU_fsqrt_arg_1
+	movl	%edx,FPU_fsqrt_arg_0
+
+/* Make a linear first estimate */
+	shrl	$1,%eax
+	addl	$0x40000000,%eax
+	movl	$0xaaaaaaaa,%ecx
+	mull	%ecx
+	shll	%edx			/* max result was 7fff... */
+	testl	$0x80000000,%edx	/* but min was 3fff... */
+	jnz	sqrt_prelim_no_adjust
+
+	movl	$0x80000000,%edx	/* round up */
+
+sqrt_prelim_no_adjust:
+	movl	%edx,%esi	/* Our first guess */
+
+/* We have now computed (approx)   (2 + x) / 3, which forms the basis
+   for a few iterations of Newton's method */
+
+	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
+
+/*
+ * From our initial estimate, three iterations are enough to get us
+ * to 30 bits or so. This will then allow two iterations at better
+ * precision to complete the process.
+ */
+
+/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
+	shrl	%ecx		/* Doing this first will prevent a divide */
+				/* overflow later. */
+
+	movl	%ecx,%edx	/* msw of the arg / 2 */
+	divl	%esi		/* current estimate */
+	shrl	%esi		/* divide by 2 */
+	addl	%eax,%esi	/* the new estimate */
+
+	movl	%ecx,%edx
+	divl	%esi
+	shrl	%esi
+	addl	%eax,%esi
+
+	movl	%ecx,%edx
+	divl	%esi
+	shrl	%esi
+	addl	%eax,%esi
+
+/*
+ * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
+ * we improve it to 60 bits or so.
+ *
+ * The strategy from now on is to compute new estimates from
+ *      guess := guess + (n - guess^2) / (2 * guess)
+ */
+
+/* First, find the square of the guess */
+	movl	%esi,%eax
+	mull	%esi
+/* guess^2 now in %edx:%eax */
+
+	movl	FPU_fsqrt_arg_1,%ecx
+	subl	%ecx,%eax
+	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
+	sbbl	%ecx,%edx
+	jnc	sqrt_stage_2_positive
+
+/* Subtraction gives a negative result,
+   negate the result before division. */
+	notl	%edx
+	notl	%eax
+	addl	$1,%eax
+	adcl	$0,%edx
+
+	divl	%esi
+	movl	%eax,%ecx
+
+	movl	%edx,%eax
+	divl	%esi
+	jmp	sqrt_stage_2_finish
+
+sqrt_stage_2_positive:
+	divl	%esi
+	movl	%eax,%ecx
+
+	movl	%edx,%eax
+	divl	%esi
+
+	notl	%ecx
+	notl	%eax
+	addl	$1,%eax
+	adcl	$0,%ecx
+
+sqrt_stage_2_finish:
+	sarl	$1,%ecx		/* divide by 2 */
+	rcrl	$1,%eax
+
+	/* Form the new estimate in %esi:%edi */
+	movl	%eax,%edi
+	addl	%ecx,%esi
+
+	jnz	sqrt_stage_2_done	/* result should be [1..2) */
+
+#ifdef PARANOID
+/* It should be possible to get here only if the arg is ffff....ffff */
+	cmp	$0xffffffff,FPU_fsqrt_arg_1
+	jnz	sqrt_stage_2_error
+#endif /* PARANOID */
+
+/* The best rounded result. */
+	xorl	%eax,%eax
+	decl	%eax
+	movl	%eax,%edi
+	movl	%eax,%esi
+	movl	$0x7fffffff,%eax
+	jmp	sqrt_round_result
+
+#ifdef PARANOID
+sqrt_stage_2_error:
+	pushl	EX_INTERNAL|0x213
+	call	EXCEPTION
+#endif /* PARANOID */ 
+
+sqrt_stage_2_done:
+
+/* Now the square root has been computed to better than 60 bits. */
+
+/* Find the square of the guess. */
+	movl	%edi,%eax		/* ls word of guess */
+	mull	%edi
+	movl	%edx,FPU_accum_1
+
+	movl	%esi,%eax
+	mull	%esi
+	movl	%edx,FPU_accum_3
+	movl	%eax,FPU_accum_2
+
+	movl	%edi,%eax
+	mull	%esi
+	addl	%eax,FPU_accum_1
+	adcl	%edx,FPU_accum_2
+	adcl	$0,FPU_accum_3
+
+/*	movl	%esi,%eax */
+/*	mull	%edi */
+	addl	%eax,FPU_accum_1
+	adcl	%edx,FPU_accum_2
+	adcl	$0,FPU_accum_3
+
+/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
+
+	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
+	subl	%eax,FPU_accum_1
+	movl	FPU_fsqrt_arg_1,%eax
+	sbbl	%eax,FPU_accum_2
+	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
+	sbbl	%eax,FPU_accum_3
+	jnc	sqrt_stage_3_positive
+
+/* Subtraction gives a negative result,
+   negate the result before division */
+	notl	FPU_accum_1
+	notl	FPU_accum_2
+	notl	FPU_accum_3
+	addl	$1,FPU_accum_1
+	adcl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	adcl	$0,FPU_accum_3	/* This must be zero */
+	jz	sqrt_stage_3_no_error
+
+sqrt_stage_3_error:
+	pushl	EX_INTERNAL|0x207
+	call	EXCEPTION
+
+sqrt_stage_3_no_error:
+#endif /* PARANOID */
+
+	movl	FPU_accum_2,%edx
+	movl	FPU_accum_1,%eax
+	divl	%esi
+	movl	%eax,%ecx
+
+	movl	%edx,%eax
+	divl	%esi
+
+	sarl	$1,%ecx		/* divide by 2 */
+	rcrl	$1,%eax
+
+	/* prepare to round the result */
+
+	addl	%ecx,%edi
+	adcl	$0,%esi
+
+	jmp	sqrt_stage_3_finished
+
+sqrt_stage_3_positive:
+	movl	FPU_accum_2,%edx
+	movl	FPU_accum_1,%eax
+	divl	%esi
+	movl	%eax,%ecx
+
+	movl	%edx,%eax
+	divl	%esi
+
+	sarl	$1,%ecx		/* divide by 2 */
+	rcrl	$1,%eax
+
+	/* prepare to round the result */
+
+	notl	%eax		/* Negate the correction term */
+	notl	%ecx
+	addl	$1,%eax
+	adcl	$0,%ecx		/* carry here ==> correction == 0 */
+	adcl	$0xffffffff,%esi
+
+	addl	%ecx,%edi
+	adcl	$0,%esi
+
+sqrt_stage_3_finished:
+
+/*
+ * The result in %esi:%edi:%esi should be good to about 90 bits here,
+ * and the rounding information here does not have sufficient accuracy
+ * in a few rare cases.
+ */
+	cmpl	$0xffffffe0,%eax
+	ja	sqrt_near_exact_x
+
+	cmpl	$0x00000020,%eax
+	jb	sqrt_near_exact
+
+	cmpl	$0x7fffffe0,%eax
+	jb	sqrt_round_result
+
+	cmpl	$0x80000020,%eax
+	jb	sqrt_get_more_precision
+
+sqrt_round_result:
+/* Set up for rounding operations */
+	movl	%eax,%edx
+	movl	%esi,%eax
+	movl	%edi,%ebx
+	movl	PARAM1,%edi
+	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
+	jmp	fpu_reg_round
+
+
+sqrt_near_exact_x:
+/* First, the estimate must be rounded up. */
+	addl	$1,%edi
+	adcl	$0,%esi
+
+sqrt_near_exact:
+/*
+ * This is an easy case because x^1/2 is monotonic.
+ * We need just find the square of our estimate, compare it
+ * with the argument, and deduce whether our estimate is
+ * above, below, or exact. We use the fact that the estimate
+ * is known to be accurate to about 90 bits.
+ */
+	movl	%edi,%eax		/* ls word of guess */
+	mull	%edi
+	movl	%edx,%ebx		/* 2nd ls word of square */
+	movl	%eax,%ecx		/* ls word of square */
+
+	movl	%edi,%eax
+	mull	%esi
+	addl	%eax,%ebx
+	addl	%eax,%ebx
+
+#ifdef PARANOID
+	cmp	$0xffffffb0,%ebx
+	jb	sqrt_near_exact_ok
+
+	cmp	$0x00000050,%ebx
+	ja	sqrt_near_exact_ok
+
+	pushl	EX_INTERNAL|0x214
+	call	EXCEPTION
+
+sqrt_near_exact_ok:
+#endif /* PARANOID */ 
+
+	or	%ebx,%ebx
+	js	sqrt_near_exact_small
+
+	jnz	sqrt_near_exact_large
+
+	or	%ebx,%edx
+	jnz	sqrt_near_exact_large
+
+/* Our estimate is exactly the right answer */
+	xorl	%eax,%eax
+	jmp	sqrt_round_result
+
+sqrt_near_exact_small:
+/* Our estimate is too small */
+	movl	$0x000000ff,%eax
+	jmp	sqrt_round_result
+	
+sqrt_near_exact_large:
+/* Our estimate is too large, we need to decrement it */
+	subl	$1,%edi
+	sbbl	$0,%esi
+	movl	$0xffffff00,%eax
+	jmp	sqrt_round_result
+
+
+sqrt_get_more_precision:
+/* This case is almost the same as the above, except we start
+   with an extra bit of precision in the estimate. */
+	stc			/* The extra bit. */
+	rcll	$1,%edi		/* Shift the estimate left one bit */
+	rcll	$1,%esi
+
+	movl	%edi,%eax		/* ls word of guess */
+	mull	%edi
+	movl	%edx,%ebx		/* 2nd ls word of square */
+	movl	%eax,%ecx		/* ls word of square */
+
+	movl	%edi,%eax
+	mull	%esi
+	addl	%eax,%ebx
+	addl	%eax,%ebx
+
+/* Put our estimate back to its original value */
+	stc			/* The ms bit. */
+	rcrl	$1,%esi		/* Shift the estimate left one bit */
+	rcrl	$1,%edi
+
+#ifdef PARANOID
+	cmp	$0xffffff60,%ebx
+	jb	sqrt_more_prec_ok
+
+	cmp	$0x000000a0,%ebx
+	ja	sqrt_more_prec_ok
+
+	pushl	EX_INTERNAL|0x215
+	call	EXCEPTION
+
+sqrt_more_prec_ok:
+#endif /* PARANOID */ 
+
+	or	%ebx,%ebx
+	js	sqrt_more_prec_small
+
+	jnz	sqrt_more_prec_large
+
+	or	%ebx,%ecx
+	jnz	sqrt_more_prec_large
+
+/* Our estimate is exactly the right answer */
+	movl	$0x80000000,%eax
+	jmp	sqrt_round_result
+
+sqrt_more_prec_small:
+/* Our estimate is too small */
+	movl	$0x800000ff,%eax
+	jmp	sqrt_round_result
+	
+sqrt_more_prec_large:
+/* Our estimate is too large */
+	movl	$0x7fffff00,%eax
+	jmp	sqrt_round_result
diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile
new file mode 100644
index 00000000000..fc327250684
--- /dev/null
+++ b/arch/i386/mm/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the linux i386-specific parts of the memory manager.
+#
+
+obj-y	:= init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o
+
+obj-$(CONFIG_DISCONTIGMEM)	+= discontig.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_HIGHMEM) += highmem.o
+obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
new file mode 100644
index 00000000000..523b30634e0
--- /dev/null
+++ b/arch/i386/mm/boot_ioremap.c
@@ -0,0 +1,97 @@
+/*
+ * arch/i386/mm/boot_ioremap.c
+ * 
+ * Re-map functions for early boot-time before paging_init() when the 
+ * boot-time pagetables are still in use
+ *
+ * Written by Dave Hansen <haveblue@us.ibm.com>
+ */
+
+
+/*
+ * We need to use the 2-level pagetable functions, but CONFIG_X86_PAE
+ * keeps that from happenning.  If anyone has a better way, I'm listening.
+ *
+ * boot_pte_t is defined only if this all works correctly
+ */
+
+#include <linux/config.h>
+#undef CONFIG_X86_PAE
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <linux/init.h>
+#include <linux/stddef.h>
+
+/* 
+ * I'm cheating here.  It is known that the two boot PTE pages are 
+ * allocated next to each other.  I'm pretending that they're just
+ * one big array. 
+ */
+
+#define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
+#define boot_pte_index(address) \
+	     (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1))
+
+static inline boot_pte_t* boot_vaddr_to_pte(void *address)
+{
+	boot_pte_t* boot_pg = (boot_pte_t*)pg0;
+	return &boot_pg[boot_pte_index((unsigned long)address)];
+}
+
+/*
+ * This is only for a caller who is clever enough to page-align
+ * phys_addr and virtual_source, and who also has a preference
+ * about which virtual address from which to steal ptes
+ */
+static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages, 
+		    void* virtual_source)
+{
+	boot_pte_t* pte;
+	int i;
+	char *vaddr = virtual_source;
+
+	pte = boot_vaddr_to_pte(virtual_source);
+	for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) {
+		set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL));
+		__flush_tlb_one(&vaddr[i*PAGE_SIZE]);
+	}
+}
+
+/* the virtual space we're going to remap comes from this array */
+#define BOOT_IOREMAP_PAGES 4
+#define BOOT_IOREMAP_SIZE (BOOT_IOREMAP_PAGES*PAGE_SIZE)
+static __initdata char boot_ioremap_space[BOOT_IOREMAP_SIZE]
+		       __attribute__ ((aligned (PAGE_SIZE)));
+
+/*
+ * This only applies to things which need to ioremap before paging_init()
+ * bt_ioremap() and plain ioremap() are both useless at this point.
+ * 
+ * When used, we're still using the boot-time pagetables, which only
+ * have 2 PTE pages mapping the first 8MB
+ *
+ * There is no unmap.  The boot-time PTE pages aren't used after boot.
+ * If you really want the space back, just remap it yourself.
+ * boot_ioremap(&ioremap_space-PAGE_OFFSET, BOOT_IOREMAP_SIZE)
+ */
+__init void* boot_ioremap(unsigned long phys_addr, unsigned long size)
+{
+	unsigned long last_addr, offset;
+	unsigned int nrpages;
+	
+	last_addr = phys_addr + size - 1;
+
+	/* page align the requested address */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr) - phys_addr;
+	
+	nrpages = size >> PAGE_SHIFT;
+	if (nrpages > BOOT_IOREMAP_PAGES)
+		return NULL;
+	
+	__boot_ioremap(phys_addr, nrpages, boot_ioremap_space);
+
+	return &boot_ioremap_space[offset];
+}
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
new file mode 100644
index 00000000000..1726b4096b1
--- /dev/null
+++ b/arch/i386/mm/discontig.c
@@ -0,0 +1,383 @@
+/*
+ * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation
+ * August 2002: added remote node KVA remap - Martin J. Bligh 
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/nodemask.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
+#include <asm/mmzone.h>
+#include <bios_ebda.h>
+
+struct pglist_data *node_data[MAX_NUMNODES];
+bootmem_data_t node0_bdata;
+
+/*
+ * numa interface - we expect the numa architecture specfic code to have
+ *                  populated the following initialisation.
+ *
+ * 1) node_online_map  - the map of all nodes configured (online) in the system
+ * 2) physnode_map     - the mapping between a pfn and owning node
+ * 3) node_start_pfn   - the starting page frame number for a node
+ * 3) node_end_pfn     - the ending page fram number for a node
+ */
+
+/*
+ * physnode_map keeps track of the physical memory layout of a generic
+ * numa node on a 256Mb break (each element of the array will
+ * represent 256Mb of memory and will be marked by the node id.  so,
+ * if the first gig is on node 0, and the second gig is on node 1
+ * physnode_map will contain:
+ *
+ *     physnode_map[0-3] = 0;
+ *     physnode_map[4-7] = 1;
+ *     physnode_map[8- ] = -1;
+ */
+s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1};
+
+void memory_present(int nid, unsigned long start, unsigned long end)
+{
+	unsigned long pfn;
+
+	printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n",
+			nid, start, end);
+	printk(KERN_DEBUG "  Setting physnode_map array to node %d for pfns:\n", nid);
+	printk(KERN_DEBUG "  ");
+	for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
+		physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
+		printk("%ld ", pfn);
+	}
+	printk("\n");
+}
+
+unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
+					      unsigned long end_pfn)
+{
+	unsigned long nr_pages = end_pfn - start_pfn;
+
+	if (!nr_pages)
+		return 0;
+
+	return (nr_pages + 1) * sizeof(struct page);
+}
+
+unsigned long node_start_pfn[MAX_NUMNODES];
+unsigned long node_end_pfn[MAX_NUMNODES];
+
+extern unsigned long find_max_low_pfn(void);
+extern void find_max_pfn(void);
+extern void one_highpage_init(struct page *, int, int);
+
+extern struct e820map e820;
+extern unsigned long init_pg_tables_end;
+extern unsigned long highend_pfn, highstart_pfn;
+extern unsigned long max_low_pfn;
+extern unsigned long totalram_pages;
+extern unsigned long totalhigh_pages;
+
+#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
+
+unsigned long node_remap_start_pfn[MAX_NUMNODES];
+unsigned long node_remap_size[MAX_NUMNODES];
+unsigned long node_remap_offset[MAX_NUMNODES];
+void *node_remap_start_vaddr[MAX_NUMNODES];
+void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
+
+/*
+ * FLAT - support for basic PC memory model with discontig enabled, essentially
+ *        a single node with all available processors in it with a flat
+ *        memory map.
+ */
+int __init get_memcfg_numa_flat(void)
+{
+	printk("NUMA - single node, flat memory mode\n");
+
+	/* Run the memory configuration and find the top of memory. */
+	find_max_pfn();
+	node_start_pfn[0] = 0;
+	node_end_pfn[0] = max_pfn;
+	memory_present(0, 0, max_pfn);
+
+        /* Indicate there is one node available. */
+	nodes_clear(node_online_map);
+	node_set_online(0);
+	return 1;
+}
+
+/*
+ * Find the highest page frame number we have available for the node
+ */
+static void __init find_max_pfn_node(int nid)
+{
+	if (node_end_pfn[nid] > max_pfn)
+		node_end_pfn[nid] = max_pfn;
+	/*
+	 * if a user has given mem=XXXX, then we need to make sure 
+	 * that the node _starts_ before that, too, not just ends
+	 */
+	if (node_start_pfn[nid] > max_pfn)
+		node_start_pfn[nid] = max_pfn;
+	if (node_start_pfn[nid] > node_end_pfn[nid])
+		BUG();
+}
+
+/* 
+ * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
+ * method.  For node zero take this from the bottom of memory, for
+ * subsequent nodes place them at node_remap_start_vaddr which contains
+ * node local data in physically node local memory.  See setup_memory()
+ * for details.
+ */
+static void __init allocate_pgdat(int nid)
+{
+	if (nid && node_has_online_mem(nid))
+		NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
+	else {
+		NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT));
+		min_low_pfn += PFN_UP(sizeof(pg_data_t));
+	}
+}
+
+void __init remap_numa_kva(void)
+{
+	void *vaddr;
+	unsigned long pfn;
+	int node;
+
+	for_each_online_node(node) {
+		if (node == 0)
+			continue;
+		for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
+			vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
+			set_pmd_pfn((ulong) vaddr, 
+				node_remap_start_pfn[node] + pfn, 
+				PAGE_KERNEL_LARGE);
+		}
+	}
+}
+
+static unsigned long calculate_numa_remap_pages(void)
+{
+	int nid;
+	unsigned long size, reserve_pages = 0;
+
+	for_each_online_node(nid) {
+		if (nid == 0)
+			continue;
+		if (!node_remap_size[nid])
+			continue;
+
+		/*
+		 * The acpi/srat node info can show hot-add memroy zones
+		 * where memory could be added but not currently present.
+		 */
+		if (node_start_pfn[nid] > max_pfn)
+			continue;
+		if (node_end_pfn[nid] > max_pfn)
+			node_end_pfn[nid] = max_pfn;
+
+		/* ensure the remap includes space for the pgdat. */
+		size = node_remap_size[nid] + sizeof(pg_data_t);
+
+		/* convert size to large (pmd size) pages, rounding up */
+		size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
+		/* now the roundup is correct, convert to PAGE_SIZE pages */
+		size = size * PTRS_PER_PTE;
+		printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
+				size, nid);
+		node_remap_size[nid] = size;
+		reserve_pages += size;
+		node_remap_offset[nid] = reserve_pages;
+		printk("Shrinking node %d from %ld pages to %ld pages\n",
+			nid, node_end_pfn[nid], node_end_pfn[nid] - size);
+		node_end_pfn[nid] -= size;
+		node_remap_start_pfn[nid] = node_end_pfn[nid];
+	}
+	printk("Reserving total of %ld pages for numa KVA remap\n",
+			reserve_pages);
+	return reserve_pages;
+}
+
+extern void setup_bootmem_allocator(void);
+unsigned long __init setup_memory(void)
+{
+	int nid;
+	unsigned long system_start_pfn, system_max_low_pfn;
+	unsigned long reserve_pages;
+
+	/*
+	 * When mapping a NUMA machine we allocate the node_mem_map arrays
+	 * from node local memory.  They are then mapped directly into KVA
+	 * between zone normal and vmalloc space.  Calculate the size of
+	 * this space and use it to adjust the boundry between ZONE_NORMAL
+	 * and ZONE_HIGHMEM.
+	 */
+	find_max_pfn();
+	get_memcfg_numa();
+
+	reserve_pages = calculate_numa_remap_pages();
+
+	/* partially used pages are not usable - thus round upwards */
+	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
+
+	system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
+	printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
+			reserve_pages, max_low_pfn + reserve_pages);
+	printk("max_pfn = %ld\n", max_pfn);
+#ifdef CONFIG_HIGHMEM
+	highstart_pfn = highend_pfn = max_pfn;
+	if (max_pfn > system_max_low_pfn)
+		highstart_pfn = system_max_low_pfn;
+	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+	       pages_to_mb(highend_pfn - highstart_pfn));
+#endif
+	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
+			pages_to_mb(system_max_low_pfn));
+	printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", 
+			min_low_pfn, max_low_pfn, highstart_pfn);
+
+	printk("Low memory ends at vaddr %08lx\n",
+			(ulong) pfn_to_kaddr(max_low_pfn));
+	for_each_online_node(nid) {
+		node_remap_start_vaddr[nid] = pfn_to_kaddr(
+			(highstart_pfn + reserve_pages) - node_remap_offset[nid]);
+		allocate_pgdat(nid);
+		printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
+			(ulong) node_remap_start_vaddr[nid],
+			(ulong) pfn_to_kaddr(highstart_pfn + reserve_pages
+			    - node_remap_offset[nid] + node_remap_size[nid]));
+	}
+	printk("High memory starts at vaddr %08lx\n",
+			(ulong) pfn_to_kaddr(highstart_pfn));
+	vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
+	for_each_online_node(nid)
+		find_max_pfn_node(nid);
+
+	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
+	NODE_DATA(0)->bdata = &node0_bdata;
+	setup_bootmem_allocator();
+	return max_low_pfn;
+}
+
+void __init zone_sizes_init(void)
+{
+	int nid;
+
+	/*
+	 * Insert nodes into pgdat_list backward so they appear in order.
+	 * Clobber node 0's links and NULL out pgdat_list before starting.
+	 */
+	pgdat_list = NULL;
+	for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) {
+		if (!node_online(nid))
+			continue;
+		NODE_DATA(nid)->pgdat_next = pgdat_list;
+		pgdat_list = NODE_DATA(nid);
+	}
+
+	for_each_online_node(nid) {
+		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+		unsigned long *zholes_size;
+		unsigned int max_dma;
+
+		unsigned long low = max_low_pfn;
+		unsigned long start = node_start_pfn[nid];
+		unsigned long high = node_end_pfn[nid];
+
+		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+
+		if (node_has_online_mem(nid)){
+			if (start > low) {
+#ifdef CONFIG_HIGHMEM
+				BUG_ON(start > high);
+				zones_size[ZONE_HIGHMEM] = high - start;
+#endif
+			} else {
+				if (low < max_dma)
+					zones_size[ZONE_DMA] = low;
+				else {
+					BUG_ON(max_dma > low);
+					BUG_ON(low > high);
+					zones_size[ZONE_DMA] = max_dma;
+					zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+					zones_size[ZONE_HIGHMEM] = high - low;
+#endif
+				}
+			}
+		}
+
+		zholes_size = get_zholes_size(nid);
+		/*
+		 * We let the lmem_map for node 0 be allocated from the
+		 * normal bootmem allocator, but other nodes come from the
+		 * remapped KVA area - mbligh
+		 */
+		if (!nid)
+			free_area_init_node(nid, NODE_DATA(nid),
+					zones_size, start, zholes_size);
+		else {
+			unsigned long lmem_map;
+			lmem_map = (unsigned long)node_remap_start_vaddr[nid];
+			lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
+			lmem_map &= PAGE_MASK;
+			NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
+			free_area_init_node(nid, NODE_DATA(nid), zones_size,
+				start, zholes_size);
+		}
+	}
+	return;
+}
+
+void __init set_highmem_pages_init(int bad_ppro) 
+{
+#ifdef CONFIG_HIGHMEM
+	struct zone *zone;
+
+	for_each_zone(zone) {
+		unsigned long node_pfn, node_high_size, zone_start_pfn;
+		struct page * zone_mem_map;
+		
+		if (!is_highmem(zone))
+			continue;
+
+		printk("Initializing %s for node %d\n", zone->name,
+			zone->zone_pgdat->node_id);
+
+		node_high_size = zone->spanned_pages;
+		zone_mem_map = zone->zone_mem_map;
+		zone_start_pfn = zone->zone_start_pfn;
+
+		for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) {
+			one_highpage_init((struct page *)(zone_mem_map + node_pfn),
+					  zone_start_pfn + node_pfn, bad_ppro);
+		}
+	}
+	totalram_pages += totalhigh_pages;
+#endif
+}
diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c
new file mode 100644
index 00000000000..f706449319c
--- /dev/null
+++ b/arch/i386/mm/extable.c
@@ -0,0 +1,36 @@
+/*
+ * linux/arch/i386/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+#ifdef CONFIG_PNPBIOS
+	if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3)))
+	{
+		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
+		extern u32 pnp_bios_is_utter_crap;
+		pnp_bios_is_utter_crap = 1;
+		printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
+		__asm__ volatile(
+			"movl %0, %%esp\n\t"
+			"jmp *%1\n\t"
+			: : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
+		panic("do_trap: can't hit this");
+	}
+#endif
+
+	fixup = search_exception_tables(regs->eip);
+	if (fixup) {
+		regs->eip = fixup->fixup;
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
new file mode 100644
index 00000000000..a509237c481
--- /dev/null
+++ b/arch/i386/mm/fault.c
@@ -0,0 +1,552 @@
+/*
+ *  linux/arch/i386/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+#include <linux/highmem.h>
+#include <linux/module.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/desc.h>
+#include <asm/kdebug.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out 
+ */
+void bust_spinlocks(int yes)
+{
+	int loglevel_save = console_loglevel;
+
+	if (yes) {
+		oops_in_progress = 1;
+		return;
+	}
+#ifdef CONFIG_VT
+	unblank_screen();
+#endif
+	oops_in_progress = 0;
+	/*
+	 * OK, the message is on the console.  Now we call printk()
+	 * without oops_in_progress set so that printk will give klogd
+	 * a poke.  Hold onto your hats...
+	 */
+	console_loglevel = 15;		/* NMI oopser may have shut the console up */
+	printk(" ");
+	console_loglevel = loglevel_save;
+}
+
+/*
+ * Return EIP plus the CS segment base.  The segment limit is also
+ * adjusted, clamped to the kernel/user address space (whichever is
+ * appropriate), and returned in *eip_limit.
+ *
+ * The segment is checked, because it might have been changed by another
+ * task between the original faulting instruction and here.
+ *
+ * If CS is no longer a valid code segment, or if EIP is beyond the
+ * limit, or if it is a kernel address when CS is not a kernel segment,
+ * then the returned value will be greater than *eip_limit.
+ * 
+ * This is slow, but is very rarely executed.
+ */
+static inline unsigned long get_segment_eip(struct pt_regs *regs,
+					    unsigned long *eip_limit)
+{
+	unsigned long eip = regs->eip;
+	unsigned seg = regs->xcs & 0xffff;
+	u32 seg_ar, seg_limit, base, *desc;
+
+	/* The standard kernel/user address space limit. */
+	*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
+
+	/* Unlikely, but must come before segment checks. */
+	if (unlikely((regs->eflags & VM_MASK) != 0))
+		return eip + (seg << 4);
+	
+	/* By far the most common cases. */
+	if (likely(seg == __USER_CS || seg == __KERNEL_CS))
+		return eip;
+
+	/* Check the segment exists, is within the current LDT/GDT size,
+	   that kernel/user (ring 0..3) has the appropriate privilege,
+	   that it's a code segment, and get the limit. */
+	__asm__ ("larl %3,%0; lsll %3,%1"
+		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
+	if ((~seg_ar & 0x9800) || eip > seg_limit) {
+		*eip_limit = 0;
+		return 1;	 /* So that returned eip > *eip_limit. */
+	}
+
+	/* Get the GDT/LDT descriptor base. 
+	   When you look for races in this code remember that
+	   LDT and other horrors are only used in user space. */
+	if (seg & (1<<2)) {
+		/* Must lock the LDT while reading it. */
+		down(&current->mm->context.sem);
+		desc = current->mm->context.ldt;
+		desc = (void *)desc + (seg & ~7);
+	} else {
+		/* Must disable preemption while reading the GDT. */
+		desc = (u32 *)&per_cpu(cpu_gdt_table, get_cpu());
+		desc = (void *)desc + (seg & ~7);
+	}
+
+	/* Decode the code segment base from the descriptor */
+	base = get_desc_base((unsigned long *)desc);
+
+	if (seg & (1<<2)) { 
+		up(&current->mm->context.sem);
+	} else
+		put_cpu();
+
+	/* Adjust EIP and segment limit, and clamp at the kernel limit.
+	   It's legitimate for segments to wrap at 0xffffffff. */
+	seg_limit += base;
+	if (seg_limit < *eip_limit && seg_limit >= base)
+		*eip_limit = seg_limit;
+	return eip + base;
+}
+
+/* 
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ */
+static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
+{ 
+	unsigned long limit;
+	unsigned long instr = get_segment_eip (regs, &limit);
+	int scan_more = 1;
+	int prefetch = 0; 
+	int i;
+
+	for (i = 0; scan_more && i < 15; i++) { 
+		unsigned char opcode;
+		unsigned char instr_hi;
+		unsigned char instr_lo;
+
+		if (instr > limit)
+			break;
+		if (__get_user(opcode, (unsigned char *) instr))
+			break; 
+
+		instr_hi = opcode & 0xf0; 
+		instr_lo = opcode & 0x0f; 
+		instr++;
+
+		switch (instr_hi) { 
+		case 0x20:
+		case 0x30:
+			/* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */
+			scan_more = ((instr_lo & 7) == 0x6);
+			break;
+			
+		case 0x60:
+			/* 0x64 thru 0x67 are valid prefixes in all modes. */
+			scan_more = (instr_lo & 0xC) == 0x4;
+			break;		
+		case 0xF0:
+			/* 0xF0, 0xF2, and 0xF3 are valid prefixes */
+			scan_more = !instr_lo || (instr_lo>>1) == 1;
+			break;			
+		case 0x00:
+			/* Prefetch instruction is 0x0F0D or 0x0F18 */
+			scan_more = 0;
+			if (instr > limit)
+				break;
+			if (__get_user(opcode, (unsigned char *) instr)) 
+				break;
+			prefetch = (instr_lo == 0xF) &&
+				(opcode == 0x0D || opcode == 0x18);
+			break;			
+		default:
+			scan_more = 0;
+			break;
+		} 
+	}
+	return prefetch;
+}
+
+static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
+			      unsigned long error_code)
+{
+	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+		     boot_cpu_data.x86 >= 6)) {
+		/* Catch an obscure case of prefetch inside an NX page. */
+		if (nx_enabled && (error_code & 16))
+			return 0;
+		return __is_prefetch(regs, addr);
+	}
+	return 0;
+} 
+
+fastcall void do_invalid_op(struct pt_regs *, unsigned long);
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * error_code:
+ *	bit 0 == 0 means no page found, 1 means protection fault
+ *	bit 1 == 0 means read, 1 means write
+ *	bit 2 == 0 means kernel, 1 means user-mode
+ */
+fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	unsigned long address;
+	unsigned long page;
+	int write;
+	siginfo_t info;
+
+	/* get the address */
+	__asm__("movl %%cr2,%0":"=r" (address));
+
+	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+					SIGSEGV) == NOTIFY_STOP)
+		return;
+	/* It's safe to allow irq's after cr2 has been saved */
+	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
+		local_irq_enable();
+
+	tsk = current;
+
+	info.si_code = SEGV_MAPERR;
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 *
+	 * This verifies that the fault happens in kernel space
+	 * (error_code & 4) == 0, and that the fault was not a
+	 * protection error (error_code & 1) == 0.
+	 */
+	if (unlikely(address >= TASK_SIZE)) { 
+		if (!(error_code & 5))
+			goto vmalloc_fault;
+		/* 
+		 * Don't take the mm semaphore here. If we fixup a prefetch
+		 * fault we could otherwise deadlock.
+		 */
+		goto bad_area_nosemaphore;
+	} 
+
+	mm = tsk->mm;
+
+	/*
+	 * If we're in an interrupt, have no user context or are running in an
+	 * atomic region then we must not take the fault..
+	 */
+	if (in_atomic() || !mm)
+		goto bad_area_nosemaphore;
+
+	/* When running in the kernel we expect faults to occur only to
+	 * addresses in user space.  All other faults represent errors in the
+	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
+	 * erroneous fault occuring in a code path which already holds mmap_sem
+	 * we will deadlock attempting to validate the fault against the
+	 * address space.  Luckily the kernel only validly references user
+	 * space from well defined areas of code, which are listed in the
+	 * exceptions table.
+	 *
+	 * As the vast majority of faults will be valid we will only perform
+	 * the source reference check when there is a possibilty of a deadlock.
+	 * Attempt to lock the address space, if we cannot we then validate the
+	 * source.  If this is invalid we can skip the address space check,
+	 * thus avoiding the deadlock.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if ((error_code & 4) == 0 &&
+		    !search_exception_tables(regs->eip))
+			goto bad_area_nosemaphore;
+		down_read(&mm->mmap_sem);
+	}
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (error_code & 4) {
+		/*
+		 * accessing the stack below %esp is always a bug.
+		 * The "+ 32" is there due to some instructions (like
+		 * pusha) doing post-decrement on the stack and that
+		 * doesn't show up until later..
+		 */
+		if (address + 32 < regs->esp)
+			goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	info.si_code = SEGV_ACCERR;
+	write = 0;
+	switch (error_code & 3) {
+		default:	/* 3: write, present */
+#ifdef TEST_VERIFY_AREA
+			if (regs->cs == KERNEL_CS)
+				printk("WP fault at %08lx\n", regs->eip);
+#endif
+			/* fall through */
+		case 2:		/* write, not present */
+			if (!(vma->vm_flags & VM_WRITE))
+				goto bad_area;
+			write++;
+			break;
+		case 1:		/* read, present */
+			goto bad_area;
+		case 0:		/* read, not present */
+			if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+				goto bad_area;
+	}
+
+ survive:
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	switch (handle_mm_fault(mm, vma, address, write)) {
+		case VM_FAULT_MINOR:
+			tsk->min_flt++;
+			break;
+		case VM_FAULT_MAJOR:
+			tsk->maj_flt++;
+			break;
+		case VM_FAULT_SIGBUS:
+			goto do_sigbus;
+		case VM_FAULT_OOM:
+			goto out_of_memory;
+		default:
+			BUG();
+	}
+
+	/*
+	 * Did it hit the DOS screen memory VA from vm86 mode?
+	 */
+	if (regs->eflags & VM_MASK) {
+		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
+		if (bit < 32)
+			tsk->thread.screen_bitmap |= 1 << bit;
+	}
+	up_read(&mm->mmap_sem);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	/* User mode accesses just cause a SIGSEGV */
+	if (error_code & 4) {
+		/* 
+		 * Valid to do another page fault here because this one came 
+		 * from user space.
+		 */
+		if (is_prefetch(regs, address, error_code))
+			return;
+
+		tsk->thread.cr2 = address;
+		/* Kernel addresses are always protection faults */
+		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+		tsk->thread.trap_no = 14;
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		/* info.si_code has been set above */
+		info.si_addr = (void __user *)address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+#ifdef CONFIG_X86_F00F_BUG
+	/*
+	 * Pentium F0 0F C7 C8 bug workaround.
+	 */
+	if (boot_cpu_data.f00f_bug) {
+		unsigned long nr;
+		
+		nr = (address - idt_descr.address) >> 3;
+
+		if (nr == 6) {
+			do_invalid_op(regs, 0);
+			return;
+		}
+	}
+#endif
+
+no_context:
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs))
+		return;
+
+	/* 
+	 * Valid to do another page fault here, because if this fault
+	 * had been triggered by is_prefetch fixup_exception would have 
+	 * handled it.
+	 */
+ 	if (is_prefetch(regs, address, error_code))
+ 		return;
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+	bust_spinlocks(1);
+
+#ifdef CONFIG_X86_PAE
+	if (error_code & 16) {
+		pte_t *pte = lookup_address(address);
+
+		if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+			printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
+	}
+#endif
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n",address);
+	printk(KERN_ALERT " printing eip:\n");
+	printk("%08lx\n", regs->eip);
+	asm("movl %%cr3,%0":"=r" (page));
+	page = ((unsigned long *) __va(page))[address >> 22];
+	printk(KERN_ALERT "*pde = %08lx\n", page);
+	/*
+	 * We must not directly access the pte in the highpte
+	 * case, the page table might be allocated in highmem.
+	 * And lets rather not kmap-atomic the pte, just in case
+	 * it's allocated already.
+	 */
+#ifndef CONFIG_HIGHPTE
+	if (page & 1) {
+		page &= PAGE_MASK;
+		address &= 0x003ff000;
+		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+		printk(KERN_ALERT "*pte = %08lx\n", page);
+	}
+#endif
+	die("Oops", regs, error_code);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (tsk->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", tsk->comm);
+	if (error_code & 4)
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!(error_code & 4))
+		goto no_context;
+
+	/* User space => ok to do another page fault */
+	if (is_prefetch(regs, address, error_code))
+		return;
+
+	tsk->thread.cr2 = address;
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = 14;
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void __user *)address;
+	force_sig_info(SIGBUS, &info, tsk);
+	return;
+
+vmalloc_fault:
+	{
+		/*
+		 * Synchronize this task's top level page-table
+		 * with the 'reference' page table.
+		 *
+		 * Do _not_ use "tsk" here. We might be inside
+		 * an interrupt in the middle of a task switch..
+		 */
+		int index = pgd_index(address);
+		unsigned long pgd_paddr;
+		pgd_t *pgd, *pgd_k;
+		pud_t *pud, *pud_k;
+		pmd_t *pmd, *pmd_k;
+		pte_t *pte_k;
+
+		asm("movl %%cr3,%0":"=r" (pgd_paddr));
+		pgd = index + (pgd_t *)__va(pgd_paddr);
+		pgd_k = init_mm.pgd + index;
+
+		if (!pgd_present(*pgd_k))
+			goto no_context;
+
+		/*
+		 * set_pgd(pgd, *pgd_k); here would be useless on PAE
+		 * and redundant with the set_pmd() on non-PAE. As would
+		 * set_pud.
+		 */
+
+		pud = pud_offset(pgd, address);
+		pud_k = pud_offset(pgd_k, address);
+		if (!pud_present(*pud_k))
+			goto no_context;
+		
+		pmd = pmd_offset(pud, address);
+		pmd_k = pmd_offset(pud_k, address);
+		if (!pmd_present(*pmd_k))
+			goto no_context;
+		set_pmd(pmd, *pmd_k);
+
+		pte_k = pte_offset_kernel(pmd_k, address);
+		if (!pte_present(*pte_k))
+			goto no_context;
+		return;
+	}
+}
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
new file mode 100644
index 00000000000..fc4c4cad4e9
--- /dev/null
+++ b/arch/i386/mm/highmem.c
@@ -0,0 +1,89 @@
+#include <linux/highmem.h>
+
+void *kmap(struct page *page)
+{
+	might_sleep();
+	if (!PageHighMem(page))
+		return page_address(page);
+	return kmap_high(page);
+}
+
+void kunmap(struct page *page)
+{
+	if (in_interrupt())
+		BUG();
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+
+/*
+ * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
+ * no global lock is needed and because the kmap code must perform a global TLB
+ * invalidation when the kmap pool wraps.
+ *
+ * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * kmaps are appropriate for short, tight code paths only.
+ */
+void *kmap_atomic(struct page *page, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	inc_preempt_count();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	if (!pte_none(*(kmap_pte-idx)))
+		BUG();
+#endif
+	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+	__flush_tlb_one(vaddr);
+
+	return (void*) vaddr;
+}
+
+void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#ifdef CONFIG_DEBUG_HIGHMEM
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+	if (vaddr < FIXADDR_START) { // FIXME
+		dec_preempt_count();
+		preempt_check_resched();
+		return;
+	}
+
+	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
+		BUG();
+
+	/*
+	 * force other mappings to Oops if they'll try to access
+	 * this pte without first remap it
+	 */
+	pte_clear(&init_mm, vaddr, kmap_pte-idx);
+	__flush_tlb_one(vaddr);
+#endif
+
+	dec_preempt_count();
+	preempt_check_resched();
+}
+
+struct page *kmap_atomic_to_page(void *ptr)
+{
+	unsigned long idx, vaddr = (unsigned long)ptr;
+	pte_t *pte;
+
+	if (vaddr < FIXADDR_START)
+		return virt_to_page(ptr);
+
+	idx = virt_to_fix(vaddr);
+	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
+	return pte_page(*pte);
+}
+
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
new file mode 100644
index 00000000000..a8c45143088
--- /dev/null
+++ b/arch/i386/mm/hugetlbpage.c
@@ -0,0 +1,431 @@
+/*
+ * IA-32 Huge TLB Page Support for Kernel.
+ *
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	pmd = pmd_alloc(mm, pud, addr);
+	return (pte_t *) pmd;
+}
+
+static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_offset(pgd, addr);
+	pmd = pmd_offset(pud, addr);
+	return (pte_t *) pmd;
+}
+
+static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access)
+{
+	pte_t entry;
+
+	add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
+	if (write_access) {
+		entry =
+		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
+	} else
+		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
+	entry = pte_mkyoung(entry);
+	mk_pte_huge(entry);
+	set_pte(page_table, entry);
+}
+
+/*
+ * This function checks for proper alignment of input addr and len parameters.
+ */
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	return 0;
+}
+
+int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
+			struct vm_area_struct *vma)
+{
+	pte_t *src_pte, *dst_pte, entry;
+	struct page *ptepage;
+	unsigned long addr = vma->vm_start;
+	unsigned long end = vma->vm_end;
+
+	while (addr < end) {
+		dst_pte = huge_pte_alloc(dst, addr);
+		if (!dst_pte)
+			goto nomem;
+		src_pte = huge_pte_offset(src, addr);
+		entry = *src_pte;
+		ptepage = pte_page(entry);
+		get_page(ptepage);
+		set_pte(dst_pte, entry);
+		add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
+		addr += HPAGE_SIZE;
+	}
+	return 0;
+
+nomem:
+	return -ENOMEM;
+}
+
+int
+follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		    struct page **pages, struct vm_area_struct **vmas,
+		    unsigned long *position, int *length, int i)
+{
+	unsigned long vpfn, vaddr = *position;
+	int remainder = *length;
+
+	WARN_ON(!is_vm_hugetlb_page(vma));
+
+	vpfn = vaddr/PAGE_SIZE;
+	while (vaddr < vma->vm_end && remainder) {
+
+		if (pages) {
+			pte_t *pte;
+			struct page *page;
+
+			pte = huge_pte_offset(mm, vaddr);
+
+			/* hugetlb should be locked, and hence, prefaulted */
+			WARN_ON(!pte || pte_none(*pte));
+
+			page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+
+			WARN_ON(!PageCompound(page));
+
+			get_page(page);
+			pages[i] = page;
+		}
+
+		if (vmas)
+			vmas[i] = vma;
+
+		vaddr += PAGE_SIZE;
+		++vpfn;
+		--remainder;
+		++i;
+	}
+
+	*length = remainder;
+	*position = vaddr;
+
+	return i;
+}
+
+#if 0	/* This is just for testing */
+struct page *
+follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
+{
+	unsigned long start = address;
+	int length = 1;
+	int nr;
+	struct page *page;
+	struct vm_area_struct *vma;
+
+	vma = find_vma(mm, addr);
+	if (!vma || !is_vm_hugetlb_page(vma))
+		return ERR_PTR(-EINVAL);
+
+	pte = huge_pte_offset(mm, address);
+
+	/* hugetlb should be locked, and hence, prefaulted */
+	WARN_ON(!pte || pte_none(*pte));
+
+	page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+
+	WARN_ON(!PageCompound(page));
+
+	return page;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	return NULL;
+}
+
+#else
+
+struct page *
+follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PSE);
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pmd);
+	if (page)
+		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+	return page;
+}
+#endif
+
+void unmap_hugepage_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t pte, *ptep;
+	struct page *page;
+
+	BUG_ON(start & (HPAGE_SIZE - 1));
+	BUG_ON(end & (HPAGE_SIZE - 1));
+
+	for (address = start; address < end; address += HPAGE_SIZE) {
+		ptep = huge_pte_offset(mm, address);
+		if (!ptep)
+			continue;
+		pte = ptep_get_and_clear(mm, address, ptep);
+		if (pte_none(pte))
+			continue;
+		page = pte_page(pte);
+		put_page(page);
+	}
+	add_mm_counter(mm ,rss, -((end - start) >> PAGE_SHIFT));
+	flush_tlb_range(vma, start, end);
+}
+
+int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret = 0;
+
+	BUG_ON(vma->vm_start & ~HPAGE_MASK);
+	BUG_ON(vma->vm_end & ~HPAGE_MASK);
+
+	spin_lock(&mm->page_table_lock);
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+		unsigned long idx;
+		pte_t *pte = huge_pte_alloc(mm, addr);
+		struct page *page;
+
+		if (!pte) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		if (!pte_none(*pte)) {
+			pmd_t *pmd = (pmd_t *) pte;
+
+			page = pmd_page(*pmd);
+			pmd_clear(pmd);
+			mm->nr_ptes--;
+			dec_page_state(nr_page_table_pages);
+			page_cache_release(page);
+		}
+
+		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+		page = find_get_page(mapping, idx);
+		if (!page) {
+			/* charge the fs quota first */
+			if (hugetlb_get_quota(mapping)) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			page = alloc_huge_page();
+			if (!page) {
+				hugetlb_put_quota(mapping);
+				ret = -ENOMEM;
+				goto out;
+			}
+			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
+			if (! ret) {
+				unlock_page(page);
+			} else {
+				hugetlb_put_quota(mapping);
+				free_huge_page(page);
+				goto out;
+			}
+		}
+		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+	}
+out:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
+/* x86_64 also uses this file */
+
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+		unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start_addr;
+
+	start_addr = mm->free_area_cache;
+
+full_search:
+	addr = ALIGN(start_addr, HPAGE_SIZE);
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr) {
+			/*
+			 * Start a new search - just in case we missed
+			 * some holes.
+			 */
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				start_addr = TASK_UNMAPPED_BASE;
+				goto full_search;
+			}
+			return -ENOMEM;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+	}
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+		unsigned long addr0, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma, *prev_vma;
+	unsigned long base = mm->mmap_base, addr = addr0;
+	int first_time = 1;
+
+	/* don't allow allocations above current base */
+	if (mm->free_area_cache > base)
+		mm->free_area_cache = base;
+
+try_again:
+	/* make sure it can fit in the remaining address space */
+	if (mm->free_area_cache < len)
+		goto fail;
+
+	/* either no address requested or cant fit in requested address hole */
+	addr = (mm->free_area_cache - len) & HPAGE_MASK;
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * i.e. return with success:
+		 */
+		if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
+			return addr;
+
+		/*
+		 * new region fits between prev_vma->vm_end and
+		 * vma->vm_start, use it:
+		 */
+		if (addr + len <= vma->vm_start &&
+				(!prev_vma || (addr >= prev_vma->vm_end)))
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr);
+		else
+			/* pull free_area_cache down to the first hole */
+			if (mm->free_area_cache == vma->vm_end)
+				mm->free_area_cache = vma->vm_start;
+
+		/* try just below the current vma->vm_start */
+		addr = (vma->vm_start - len) & HPAGE_MASK;
+	} while (len <= vma->vm_start);
+
+fail:
+	/*
+	 * if hint left us with no space for the requested
+	 * mapping then try again:
+	 */
+	if (first_time) {
+		mm->free_area_cache = base;
+		first_time = 0;
+		goto try_again;
+	}
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
+			len, pgoff, flags);
+
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = base;
+
+	return addr;
+}
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		addr = ALIGN(addr, HPAGE_SIZE);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+	if (mm->get_unmapped_area == arch_get_unmapped_area)
+		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
+				pgoff, flags);
+	else
+		return hugetlb_get_unmapped_area_topdown(file, addr, len,
+				pgoff, flags);
+}
+
+#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
+
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
new file mode 100644
index 00000000000..7a7ea373726
--- /dev/null
+++ b/arch/i386/mm/init.c
@@ -0,0 +1,696 @@
+/*
+ *  linux/arch/i386/mm/init.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/efi.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/dma.h>
+#include <asm/fixmap.h>
+#include <asm/e820.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+
+unsigned int __VMALLOC_RESERVE = 128 << 20;
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+unsigned long highstart_pfn, highend_pfn;
+
+static int noinline do_test_wp_bit(void);
+
+/*
+ * Creates a middle page table and puts a pointer to it in the
+ * given global directory entry. This only returns the gd entry
+ * in non-PAE compilation mode, since the middle layer is folded.
+ */
+static pmd_t * __init one_md_table_init(pgd_t *pgd)
+{
+	pud_t *pud;
+	pmd_t *pmd_table;
+		
+#ifdef CONFIG_X86_PAE
+	pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+	set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+	pud = pud_offset(pgd, 0);
+	if (pmd_table != pmd_offset(pud, 0)) 
+		BUG();
+#else
+	pud = pud_offset(pgd, 0);
+	pmd_table = pmd_offset(pud, 0);
+#endif
+
+	return pmd_table;
+}
+
+/*
+ * Create a page table and place a pointer to it in a middle page
+ * directory entry.
+ */
+static pte_t * __init one_page_table_init(pmd_t *pmd)
+{
+	if (pmd_none(*pmd)) {
+		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+		if (page_table != pte_offset_kernel(pmd, 0))
+			BUG();	
+
+		return page_table;
+	}
+	
+	return pte_offset_kernel(pmd, 0);
+}
+
+/*
+ * This function initializes a certain range of kernel virtual memory 
+ * with new bootmem page tables, everywhere page tables are missing in
+ * the given range.
+ */
+
+/*
+ * NOTE: The pagetables are allocated contiguous on the physical space 
+ * so we can cache the place of the first one and move around without 
+ * checking the pgd every time.
+ */
+static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	int pgd_idx, pmd_idx;
+	unsigned long vaddr;
+
+	vaddr = start;
+	pgd_idx = pgd_index(vaddr);
+	pmd_idx = pmd_index(vaddr);
+	pgd = pgd_base + pgd_idx;
+
+	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
+		if (pgd_none(*pgd)) 
+			one_md_table_init(pgd);
+		pud = pud_offset(pgd, vaddr);
+		pmd = pmd_offset(pud, vaddr);
+		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
+			if (pmd_none(*pmd)) 
+				one_page_table_init(pmd);
+
+			vaddr += PMD_SIZE;
+		}
+		pmd_idx = 0;
+	}
+}
+
+static inline int is_kernel_text(unsigned long addr)
+{
+	if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
+		return 1;
+	return 0;
+}
+
+/*
+ * This maps the physical memory to kernel virtual address space, a total 
+ * of max_low_pfn pages, by creating page tables starting from address 
+ * PAGE_OFFSET.
+ */
+static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+{
+	unsigned long pfn;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	int pgd_idx, pmd_idx, pte_ofs;
+
+	pgd_idx = pgd_index(PAGE_OFFSET);
+	pgd = pgd_base + pgd_idx;
+	pfn = 0;
+
+	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+		pmd = one_md_table_init(pgd);
+		if (pfn >= max_low_pfn)
+			continue;
+		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
+			unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+
+			/* Map with big pages if possible, otherwise create normal page tables. */
+			if (cpu_has_pse) {
+				unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
+
+				if (is_kernel_text(address) || is_kernel_text(address2))
+					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
+				else
+					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+				pfn += PTRS_PER_PTE;
+			} else {
+				pte = one_page_table_init(pmd);
+
+				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
+						if (is_kernel_text(address))
+							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+						else
+							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+				}
+			}
+		}
+	}
+}
+
+static inline int page_kills_ppro(unsigned long pagenr)
+{
+	if (pagenr >= 0x70000 && pagenr <= 0x7003F)
+		return 1;
+	return 0;
+}
+
+extern int is_available_memory(efi_memory_desc_t *);
+
+static inline int page_is_ram(unsigned long pagenr)
+{
+	int i;
+	unsigned long addr, end;
+
+	if (efi_enabled) {
+		efi_memory_desc_t *md;
+
+		for (i = 0; i < memmap.nr_map; i++) {
+			md = &memmap.map[i];
+			if (!is_available_memory(md))
+				continue;
+			addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
+			end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
+
+			if ((pagenr >= addr) && (pagenr < end))
+				return 1;
+		}
+		return 0;
+	}
+
+	for (i = 0; i < e820.nr_map; i++) {
+
+		if (e820.map[i].type != E820_RAM)	/* not usable memory */
+			continue;
+		/*
+		 *	!!!FIXME!!! Some BIOSen report areas as RAM that
+		 *	are not. Notably the 640->1Mb area. We need a sanity
+		 *	check here.
+		 */
+		addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
+		end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
+		if  ((pagenr >= addr) && (pagenr < end))
+			return 1;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+#define kmap_get_fixmap_pte(vaddr)					\
+	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
+
+static void __init kmap_init(void)
+{
+	unsigned long kmap_vstart;
+
+	/* cache the first kmap pte */
+	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+	kmap_prot = PAGE_KERNEL;
+}
+
+static void __init permanent_kmaps_init(pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long vaddr;
+
+	vaddr = PKMAP_BASE;
+	page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	pud = pud_offset(pgd, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	pte = pte_offset_kernel(pmd, vaddr);
+	pkmap_page_table = pte;	
+}
+
+void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+{
+	if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
+		ClearPageReserved(page);
+		set_bit(PG_highmem, &page->flags);
+		set_page_count(page, 1);
+		__free_page(page);
+		totalhigh_pages++;
+	} else
+		SetPageReserved(page);
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+static void __init set_highmem_pages_init(int bad_ppro)
+{
+	int pfn;
+	for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
+		one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+	totalram_pages += totalhigh_pages;
+}
+#else
+extern void set_highmem_pages_init(int);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+#else
+#define kmap_init() do { } while (0)
+#define permanent_kmaps_init(pgd_base) do { } while (0)
+#define set_highmem_pages_init(bad_ppro) do { } while (0)
+#endif /* CONFIG_HIGHMEM */
+
+unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
+unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
+
+#ifndef CONFIG_DISCONTIGMEM
+#define remap_numa_kva() do {} while (0)
+#else
+extern void __init remap_numa_kva(void);
+#endif
+
+static void __init pagetable_init (void)
+{
+	unsigned long vaddr;
+	pgd_t *pgd_base = swapper_pg_dir;
+
+#ifdef CONFIG_X86_PAE
+	int i;
+	/* Init entries of the first-level page table to the zero page */
+	for (i = 0; i < PTRS_PER_PGD; i++)
+		set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+#endif
+
+	/* Enable PSE if available */
+	if (cpu_has_pse) {
+		set_in_cr4(X86_CR4_PSE);
+	}
+
+	/* Enable PGE if available */
+	if (cpu_has_pge) {
+		set_in_cr4(X86_CR4_PGE);
+		__PAGE_KERNEL |= _PAGE_GLOBAL;
+		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
+	}
+
+	kernel_physical_mapping_init(pgd_base);
+	remap_numa_kva();
+
+	/*
+	 * Fixed mappings, only the page table structure has to be
+	 * created - mappings will be set by set_fixmap():
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	page_table_range_init(vaddr, 0, pgd_base);
+
+	permanent_kmaps_init(pgd_base);
+
+#ifdef CONFIG_X86_PAE
+	/*
+	 * Add low memory identity-mappings - SMP needs it when
+	 * starting up on an AP from real-mode. In the non-PAE
+	 * case we already have these mappings through head.S.
+	 * All user-space mappings are explicitly cleared after
+	 * SMP startup.
+	 */
+	pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+#endif
+}
+
+#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
+/*
+ * Swap suspend & friends need this for resume because things like the intel-agp
+ * driver might have split up a kernel 4MB mapping.
+ */
+char __nosavedata swsusp_pg_dir[PAGE_SIZE]
+	__attribute__ ((aligned (PAGE_SIZE)));
+
+static inline void save_pg_dir(void)
+{
+	memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
+}
+#else
+static inline void save_pg_dir(void)
+{
+}
+#endif
+
+void zap_low_mappings (void)
+{
+	int i;
+
+	save_pg_dir();
+
+	/*
+	 * Zap initial low-memory mappings.
+	 *
+	 * Note that "pgd_clear()" doesn't do it for
+	 * us, because pgd_clear() is a no-op on i386.
+	 */
+	for (i = 0; i < USER_PTRS_PER_PGD; i++)
+#ifdef CONFIG_X86_PAE
+		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
+#else
+		set_pgd(swapper_pg_dir+i, __pgd(0));
+#endif
+	flush_tlb_all();
+}
+
+static int disable_nx __initdata = 0;
+u64 __supported_pte_mask = ~_PAGE_NX;
+
+/*
+ * noexec = on|off
+ *
+ * Control non executable mappings.
+ *
+ * on      Enable
+ * off     Disable
+ */
+void __init noexec_setup(const char *str)
+{
+	if (!strncmp(str, "on",2) && cpu_has_nx) {
+		__supported_pte_mask |= _PAGE_NX;
+		disable_nx = 0;
+	} else if (!strncmp(str,"off",3)) {
+		disable_nx = 1;
+		__supported_pte_mask &= ~_PAGE_NX;
+	}
+}
+
+int nx_enabled = 0;
+#ifdef CONFIG_X86_PAE
+
+static void __init set_nx(void)
+{
+	unsigned int v[4], l, h;
+
+	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+		if ((v[3] & (1 << 20)) && !disable_nx) {
+			rdmsr(MSR_EFER, l, h);
+			l |= EFER_NX;
+			wrmsr(MSR_EFER, l, h);
+			nx_enabled = 1;
+			__supported_pte_mask |= _PAGE_NX;
+		}
+	}
+}
+
+/*
+ * Enables/disables executability of a given kernel page and
+ * returns the previous setting.
+ */
+int __init set_kernel_exec(unsigned long vaddr, int enable)
+{
+	pte_t *pte;
+	int ret = 1;
+
+	if (!nx_enabled)
+		goto out;
+
+	pte = lookup_address(vaddr);
+	BUG_ON(!pte);
+
+	if (!pte_exec_kernel(*pte))
+		ret = 0;
+
+	if (enable)
+		pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
+	else
+		pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
+	__flush_tlb_all();
+out:
+	return ret;
+}
+
+#endif
+
+/*
+ * paging_init() sets up the page tables - note that the first 8MB are
+ * already mapped by head.S.
+ *
+ * This routines also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+#ifdef CONFIG_X86_PAE
+	set_nx();
+	if (nx_enabled)
+		printk("NX (Execute Disable) protection: active\n");
+#endif
+
+	pagetable_init();
+
+	load_cr3(swapper_pg_dir);
+
+#ifdef CONFIG_X86_PAE
+	/*
+	 * We will bail out later - printk doesn't work right now so
+	 * the user would just see a hanging kernel.
+	 */
+	if (cpu_has_pae)
+		set_in_cr4(X86_CR4_PAE);
+#endif
+	__flush_tlb_all();
+
+	kmap_init();
+}
+
+/*
+ * Test if the WP bit works in supervisor mode. It isn't supported on 386's
+ * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
+ * used to involve black magic jumps to work around some nasty CPU bugs,
+ * but fortunately the switch to using exceptions got rid of all that.
+ */
+
+static void __init test_wp_bit(void)
+{
+	printk("Checking if this processor honours the WP bit even in supervisor mode... ");
+
+	/* Any page-aligned address will do, the test is non-destructive */
+	__set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
+	boot_cpu_data.wp_works_ok = do_test_wp_bit();
+	clear_fixmap(FIX_WP_TEST);
+
+	if (!boot_cpu_data.wp_works_ok) {
+		printk("No.\n");
+#ifdef CONFIG_X86_WP_WORKS_OK
+		panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
+#endif
+	} else {
+		printk("Ok.\n");
+	}
+}
+
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+	num_physpages = highend_pfn;
+#else
+	num_physpages = max_low_pfn;
+#endif
+#ifndef CONFIG_DISCONTIGMEM
+	max_mapnr = num_physpages;
+#endif
+}
+
+static struct kcore_list kcore_mem, kcore_vmalloc; 
+
+void __init mem_init(void)
+{
+	extern int ppro_with_ram_bug(void);
+	int codesize, reservedpages, datasize, initsize;
+	int tmp;
+	int bad_ppro;
+
+#ifndef CONFIG_DISCONTIGMEM
+	if (!mem_map)
+		BUG();
+#endif
+	
+	bad_ppro = ppro_with_ram_bug();
+
+#ifdef CONFIG_HIGHMEM
+	/* check that fixmap and pkmap do not overlap */
+	if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
+		printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
+		printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
+				PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
+		BUG();
+	}
+#endif
+ 
+	set_max_mapnr_init();
+
+#ifdef CONFIG_HIGHMEM
+	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
+#else
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
+#endif
+
+	/* this will put all low memory onto the freelists */
+	totalram_pages += free_all_bootmem();
+
+	reservedpages = 0;
+	for (tmp = 0; tmp < max_low_pfn; tmp++)
+		/*
+		 * Only count reserved RAM pages
+		 */
+		if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
+			reservedpages++;
+
+	set_highmem_pages_init(bad_ppro);
+
+	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
+	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
+		   VMALLOC_END-VMALLOC_START);
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
+		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		num_physpages << (PAGE_SHIFT-10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT-10),
+		datasize >> 10,
+		initsize >> 10,
+		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+	       );
+
+#ifdef CONFIG_X86_PAE
+	if (!cpu_has_pae)
+		panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
+#endif
+	if (boot_cpu_data.wp_works_ok < 0)
+		test_wp_bit();
+
+	/*
+	 * Subtle. SMP is doing it's boot stuff late (because it has to
+	 * fork idle threads) - but it also needs low mappings for the
+	 * protected-mode entry to work. We zap these entries only after
+	 * the WP-bit has been tested.
+	 */
+#ifndef CONFIG_SMP
+	zap_low_mappings();
+#endif
+}
+
+kmem_cache_t *pgd_cache;
+kmem_cache_t *pmd_cache;
+
+void __init pgtable_cache_init(void)
+{
+	if (PTRS_PER_PMD > 1) {
+		pmd_cache = kmem_cache_create("pmd",
+					PTRS_PER_PMD*sizeof(pmd_t),
+					PTRS_PER_PMD*sizeof(pmd_t),
+					0,
+					pmd_ctor,
+					NULL);
+		if (!pmd_cache)
+			panic("pgtable_cache_init(): cannot create pmd cache");
+	}
+	pgd_cache = kmem_cache_create("pgd",
+				PTRS_PER_PGD*sizeof(pgd_t),
+				PTRS_PER_PGD*sizeof(pgd_t),
+				0,
+				pgd_ctor,
+				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+	if (!pgd_cache)
+		panic("pgtable_cache_init(): Cannot create pgd cache");
+}
+
+/*
+ * This function cannot be __init, since exceptions don't work in that
+ * section.  Put this after the callers, so that it cannot be inlined.
+ */
+static int noinline do_test_wp_bit(void)
+{
+	char tmp_reg;
+	int flag;
+
+	__asm__ __volatile__(
+		"	movb %0,%1	\n"
+		"1:	movb %1,%0	\n"
+		"	xorl %2,%2	\n"
+		"2:			\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4	\n"
+		"	.long 1b,2b	\n"
+		".previous		\n"
+		:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
+		 "=q" (tmp_reg),
+		 "=r" (flag)
+		:"2" (1)
+		:"memory");
+	
+	return flag;
+}
+
+void free_initmem(void)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)(&__init_begin);
+	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		set_page_count(virt_to_page(addr), 1);
+		memset((void *)addr, 0xcc, PAGE_SIZE);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
new file mode 100644
index 00000000000..db06f739991
--- /dev/null
+++ b/arch/i386/mm/ioremap.c
@@ -0,0 +1,320 @@
+/*
+ * arch/i386/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+#include <asm/fixmap.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#define ISA_START_ADDRESS	0xa0000
+#define ISA_END_ADDRESS		0x100000
+
+static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
+		unsigned long end, unsigned long phys_addr, unsigned long flags)
+{
+	pte_t *pte;
+	unsigned long pfn;
+
+	pfn = phys_addr >> PAGE_SHIFT;
+	pte = pte_alloc_kernel(&init_mm, pmd, addr);
+	if (!pte)
+		return -ENOMEM;
+	do {
+		BUG_ON(!pte_none(*pte));
+		set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | 
+					_PAGE_DIRTY | _PAGE_ACCESSED | flags)));
+		pfn++;
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	return 0;
+}
+
+static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
+		unsigned long end, unsigned long phys_addr, unsigned long flags)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	phys_addr -= addr;
+	pmd = pmd_alloc(&init_mm, pud, addr);
+	if (!pmd)
+		return -ENOMEM;
+	do {
+		next = pmd_addr_end(addr, end);
+		if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, flags))
+			return -ENOMEM;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+		unsigned long end, unsigned long phys_addr, unsigned long flags)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	phys_addr -= addr;
+	pud = pud_alloc(&init_mm, pgd, addr);
+	if (!pud)
+		return -ENOMEM;
+	do {
+		next = pud_addr_end(addr, end);
+		if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, flags))
+			return -ENOMEM;
+	} while (pud++, addr = next, addr != end);
+	return 0;
+}
+
+static int ioremap_page_range(unsigned long addr,
+		unsigned long end, unsigned long phys_addr, unsigned long flags)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	int err;
+
+	BUG_ON(addr >= end);
+	flush_cache_all();
+	phys_addr -= addr;
+	pgd = pgd_offset_k(addr);
+	spin_lock(&init_mm.page_table_lock);
+	do {
+		next = pgd_addr_end(addr, end);
+		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
+		if (err)
+			break;
+	} while (pgd++, addr = next, addr != end);
+	spin_unlock(&init_mm.page_table_lock);
+	flush_tlb_all();
+	return err;
+}
+
+/*
+ * Generic mapping function (not visible outside):
+ */
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+	void __iomem * addr;
+	struct vm_struct * area;
+	unsigned long offset, last_addr;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * Don't remap the low PCI/ISA area, it's always mapped..
+	 */
+	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
+		return (void __iomem *) phys_to_virt(phys_addr);
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using..
+	 */
+	if (phys_addr <= virt_to_phys(high_memory - 1)) {
+		char *t_addr, *t_end;
+		struct page *page;
+
+		t_addr = __va(phys_addr);
+		t_end = t_addr + (size - 1);
+	   
+		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+			if(!PageReserved(page))
+				return NULL;
+	}
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP | (flags << 20));
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	addr = (void __iomem *) area->addr;
+	if (ioremap_page_range((unsigned long) addr,
+			(unsigned long) addr + size, phys_addr, flags)) {
+		vunmap((void __force *) addr);
+		return NULL;
+	}
+	return (void __iomem *) (offset + (char __iomem *)addr);
+}
+
+
+/**
+ * ioremap_nocache     -   map bus memory into CPU space
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address. 
+ *
+ * This version of ioremap ensures that the memory is marked uncachable
+ * on the CPU as well as honouring existing caching rules from things like
+ * the PCI bus. Note that there are other caches and buffers on many 
+ * busses. In particular driver authors should read up on PCI writes
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
+ * 
+ * Must be freed with iounmap.
+ */
+
+void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+	unsigned long last_addr;
+	void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
+	if (!p) 
+		return p; 
+
+	/* Guaranteed to be > phys_addr, as per __ioremap() */
+	last_addr = phys_addr + size - 1;
+
+	if (last_addr < virt_to_phys(high_memory) - 1) {
+		struct page *ppage = virt_to_page(__va(phys_addr));		
+		unsigned long npages;
+
+		phys_addr &= PAGE_MASK;
+
+		/* This might overflow and become zero.. */
+		last_addr = PAGE_ALIGN(last_addr);
+
+		/* .. but that's ok, because modulo-2**n arithmetic will make
+	 	* the page-aligned "last - first" come out right.
+	 	*/
+		npages = (last_addr - phys_addr) >> PAGE_SHIFT;
+
+		if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
+			iounmap(p); 
+			p = NULL;
+		}
+		global_flush_tlb();
+	}
+
+	return p;					
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	struct vm_struct *p;
+	if ((void __force *) addr <= high_memory) 
+		return;
+
+	/*
+	 * __ioremap special-cases the PCI/ISA range by not instantiating a
+	 * vm_area and by simply returning an address into the kernel mapping
+	 * of ISA space.   So handle that here.
+	 */
+	if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
+			addr < phys_to_virt(ISA_END_ADDRESS))
+		return;
+
+	p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
+	if (!p) { 
+		printk("__iounmap: bad address %p\n", addr);
+		return;
+	}
+
+	if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
+		/* p->size includes the guard page, but cpa doesn't like that */
+		change_page_attr(virt_to_page(__va(p->phys_addr)),
+				 p->size >> PAGE_SHIFT,
+				 PAGE_KERNEL);
+		global_flush_tlb();
+	} 
+	kfree(p); 
+}
+
+void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+{
+	unsigned long offset, last_addr;
+	unsigned int nrpages;
+	enum fixed_addresses idx;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * Don't remap the low PCI/ISA area, it's always mapped..
+	 */
+	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
+		return phys_to_virt(phys_addr);
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr) - phys_addr;
+
+	/*
+	 * Mappings have to fit in the FIX_BTMAP area.
+	 */
+	nrpages = size >> PAGE_SHIFT;
+	if (nrpages > NR_FIX_BTMAPS)
+		return NULL;
+
+	/*
+	 * Ok, go for it..
+	 */
+	idx = FIX_BTMAP_BEGIN;
+	while (nrpages > 0) {
+		set_fixmap(idx, phys_addr);
+		phys_addr += PAGE_SIZE;
+		--idx;
+		--nrpages;
+	}
+	return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
+}
+
+void __init bt_iounmap(void *addr, unsigned long size)
+{
+	unsigned long virt_addr;
+	unsigned long offset;
+	unsigned int nrpages;
+	enum fixed_addresses idx;
+
+	virt_addr = (unsigned long)addr;
+	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
+		return;
+	offset = virt_addr & ~PAGE_MASK;
+	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
+
+	idx = FIX_BTMAP_BEGIN;
+	while (nrpages > 0) {
+		clear_fixmap(idx);
+		--idx;
+		--nrpages;
+	}
+}
diff --git a/arch/i386/mm/mmap.c b/arch/i386/mm/mmap.c
new file mode 100644
index 00000000000..e4730a1a43d
--- /dev/null
+++ b/arch/i386/mm/mmap.c
@@ -0,0 +1,76 @@
+/*
+ *  linux/arch/i386/mm/mmap.c
+ *
+ *  flexible mmap layout support
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * Started by Ingo Molnar <mingo@elte.hu>
+ */
+
+#include <linux/personality.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)
+
+static inline unsigned long mmap_base(struct mm_struct *mm)
+{
+	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+	unsigned long random_factor = 0;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = get_random_int() % (1024*1024);
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	/*
+	 * Fall back to the standard layout if the personality
+	 * bit is set, or if the expected stack growth is unlimited:
+	 */
+	if (sysctl_legacy_va_layout ||
+			(current->personality & ADDR_COMPAT_LAYOUT) ||
+			current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
+		mm->mmap_base = TASK_UNMAPPED_BASE;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+		mm->unmap_area = arch_unmap_area;
+	} else {
+		mm->mmap_base = mmap_base(mm);
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		mm->unmap_area = arch_unmap_area_topdown;
+	}
+}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
new file mode 100644
index 00000000000..cb3da6baa70
--- /dev/null
+++ b/arch/i386/mm/pageattr.c
@@ -0,0 +1,221 @@
+/* 
+ * Copyright 2002 Andi Kleen, SuSE Labs. 
+ * Thanks to Ben LaHaise for precious feedback.
+ */ 
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+
+static DEFINE_SPINLOCK(cpa_lock);
+static struct list_head df_list = LIST_HEAD_INIT(df_list);
+
+
+pte_t *lookup_address(unsigned long address) 
+{ 
+	pgd_t *pgd = pgd_offset_k(address);
+	pud_t *pud;
+	pmd_t *pmd;
+	if (pgd_none(*pgd))
+		return NULL;
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud))
+		return NULL;
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd))
+		return NULL;
+	if (pmd_large(*pmd))
+		return (pte_t *)pmd;
+        return pte_offset_kernel(pmd, address);
+} 
+
+static struct page *split_large_page(unsigned long address, pgprot_t prot)
+{ 
+	int i; 
+	unsigned long addr;
+	struct page *base;
+	pte_t *pbase;
+
+	spin_unlock_irq(&cpa_lock);
+	base = alloc_pages(GFP_KERNEL, 0);
+	spin_lock_irq(&cpa_lock);
+	if (!base) 
+		return NULL;
+
+	address = __pa(address);
+	addr = address & LARGE_PAGE_MASK; 
+	pbase = (pte_t *)page_address(base);
+	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
+		pbase[i] = pfn_pte(addr >> PAGE_SHIFT, 
+				   addr == address ? prot : PAGE_KERNEL);
+	}
+	return base;
+} 
+
+static void flush_kernel_map(void *dummy) 
+{ 
+	/* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
+	if (boot_cpu_data.x86_model >= 4) 
+		asm volatile("wbinvd":::"memory"); 
+	/* Flush all to work around Errata in early athlons regarding 
+	 * large page flushing. 
+	 */
+	__flush_tlb_all(); 	
+}
+
+static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 
+{ 
+	struct page *page;
+	unsigned long flags;
+
+	set_pte_atomic(kpte, pte); 	/* change init_mm */
+	if (PTRS_PER_PMD > 1)
+		return;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	for (page = pgd_list; page; page = (struct page *)page->index) {
+		pgd_t *pgd;
+		pud_t *pud;
+		pmd_t *pmd;
+		pgd = (pgd_t *)page_address(page) + pgd_index(address);
+		pud = pud_offset(pgd, address);
+		pmd = pmd_offset(pud, address);
+		set_pte_atomic((pte_t *)pmd, pte);
+	}
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+/* 
+ * No more special protections in this 2/4MB area - revert to a
+ * large page again. 
+ */
+static inline void revert_page(struct page *kpte_page, unsigned long address)
+{
+	pte_t *linear = (pte_t *) 
+		pmd_offset(pud_offset(pgd_offset_k(address), address), address);
+	set_pmd_pte(linear,  address,
+		    pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
+			    PAGE_KERNEL_LARGE));
+}
+
+static int
+__change_page_attr(struct page *page, pgprot_t prot)
+{ 
+	pte_t *kpte; 
+	unsigned long address;
+	struct page *kpte_page;
+
+	BUG_ON(PageHighMem(page));
+	address = (unsigned long)page_address(page);
+
+	kpte = lookup_address(address);
+	if (!kpte)
+		return -EINVAL;
+	kpte_page = virt_to_page(kpte);
+	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
+		if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
+			set_pte_atomic(kpte, mk_pte(page, prot)); 
+		} else {
+			struct page *split = split_large_page(address, prot); 
+			if (!split)
+				return -ENOMEM;
+			set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
+			kpte_page = split;
+		}	
+		get_page(kpte_page);
+	} else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
+		set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
+		__put_page(kpte_page);
+	} else
+		BUG();
+
+	/*
+	 * If the pte was reserved, it means it was created at boot
+	 * time (not via split_large_page) and in turn we must not
+	 * replace it with a largepage.
+	 */
+	if (!PageReserved(kpte_page)) {
+		/* memleak and potential failed 2M page regeneration */
+		BUG_ON(!page_count(kpte_page));
+
+		if (cpu_has_pse && (page_count(kpte_page) == 1)) {
+			list_add(&kpte_page->lru, &df_list);
+			revert_page(kpte_page, address);
+		}
+	}
+	return 0;
+} 
+
+static inline void flush_map(void)
+{
+	on_each_cpu(flush_kernel_map, NULL, 1, 1);
+}
+
+/*
+ * Change the page attributes of an page in the linear mapping.
+ *
+ * This should be used when a page is mapped with a different caching policy
+ * than write-back somewhere - some CPUs do not like it when mappings with
+ * different caching policies exist. This changes the page attributes of the
+ * in kernel linear mapping too.
+ * 
+ * The caller needs to ensure that there are no conflicting mappings elsewhere.
+ * This function only deals with the kernel linear map.
+ * 
+ * Caller must call global_flush_tlb() after this.
+ */
+int change_page_attr(struct page *page, int numpages, pgprot_t prot)
+{
+	int err = 0; 
+	int i; 
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpa_lock, flags);
+	for (i = 0; i < numpages; i++, page++) { 
+		err = __change_page_attr(page, prot);
+		if (err) 
+			break; 
+	} 	
+	spin_unlock_irqrestore(&cpa_lock, flags);
+	return err;
+}
+
+void global_flush_tlb(void)
+{ 
+	LIST_HEAD(l);
+	struct page *pg, *next;
+
+	BUG_ON(irqs_disabled());
+
+	spin_lock_irq(&cpa_lock);
+	list_splice_init(&df_list, &l);
+	spin_unlock_irq(&cpa_lock);
+	flush_map();
+	list_for_each_entry_safe(pg, next, &l, lru)
+		__free_page(pg);
+} 
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (PageHighMem(page))
+		return;
+	/* the return value is ignored - the calls cannot fail,
+	 * large pages are disabled at boot time.
+	 */
+	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+	/* we should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu.
+	 */
+	__flush_tlb_all();
+}
+#endif
+
+EXPORT_SYMBOL(change_page_attr);
+EXPORT_SYMBOL(global_flush_tlb);
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
new file mode 100644
index 00000000000..0742d54f8bb
--- /dev/null
+++ b/arch/i386/mm/pgtable.c
@@ -0,0 +1,260 @@
+/*
+ *  linux/arch/i386/mm/pgtable.c
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/spinlock.h>
+
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+#include <asm/e820.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+void show_mem(void)
+{
+	int total = 0, reserved = 0;
+	int shared = 0, cached = 0;
+	int highmem = 0;
+	struct page *page;
+	pg_data_t *pgdat;
+	unsigned long i;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+			page = pgdat->node_mem_map + i;
+			total++;
+			if (PageHighMem(page))
+				highmem++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page) - 1;
+		}
+	}
+	printk("%d pages of RAM\n", total);
+	printk("%d pages of HIGHMEM\n",highmem);
+	printk("%d reserved pages\n",reserved);
+	printk("%d pages shared\n",shared);
+	printk("%d pages swap cached\n",cached);
+}
+
+/*
+ * Associate a virtual page frame with a given physical page frame 
+ * and protection flags for that frame.
+ */ 
+static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	if (pgd_none(*pgd)) {
+		BUG();
+		return;
+	}
+	pud = pud_offset(pgd, vaddr);
+	if (pud_none(*pud)) {
+		BUG();
+		return;
+	}
+	pmd = pmd_offset(pud, vaddr);
+	if (pmd_none(*pmd)) {
+		BUG();
+		return;
+	}
+	pte = pte_offset_kernel(pmd, vaddr);
+	/* <pfn,flags> stored as-is, to permit clearing entries */
+	set_pte(pte, pfn_pte(pfn, flags));
+
+	/*
+	 * It's enough to flush this one mapping.
+	 * (PGE mappings get flushed as well)
+	 */
+	__flush_tlb_one(vaddr);
+}
+
+/*
+ * Associate a large virtual page frame with a given physical page frame 
+ * and protection flags for that frame. pfn is for the base of the page,
+ * vaddr is what the page gets mapped to - both must be properly aligned. 
+ * The pmd must already be instantiated. Assumes PAE mode.
+ */ 
+void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (vaddr & (PMD_SIZE-1)) {		/* vaddr is misaligned */
+		printk ("set_pmd_pfn: vaddr misaligned\n");
+		return; /* BUG(); */
+	}
+	if (pfn & (PTRS_PER_PTE-1)) {		/* pfn is misaligned */
+		printk ("set_pmd_pfn: pfn misaligned\n");
+		return; /* BUG(); */
+	}
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	if (pgd_none(*pgd)) {
+		printk ("set_pmd_pfn: pgd_none\n");
+		return; /* BUG(); */
+	}
+	pud = pud_offset(pgd, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	set_pmd(pmd, pfn_pmd(pfn, flags));
+	/*
+	 * It's enough to flush this one mapping.
+	 * (PGE mappings get flushed as well)
+	 */
+	__flush_tlb_one(vaddr);
+}
+
+void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+{
+	unsigned long address = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+}
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *pte;
+
+#ifdef CONFIG_HIGHPTE
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+#else
+	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+#endif
+	return pte;
+}
+
+void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
+{
+	memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
+}
+
+/*
+ * List of all pgd's needed for non-PAE so it can invalidate entries
+ * in both cached and uncached pgd's; not needed for PAE since the
+ * kernel pmd is shared. If PAE were not to share the pmd a similar
+ * tactic would be needed. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * The locking scheme was chosen on the basis of manfred's
+ * recommendations and having no core impact whatsoever.
+ * -- wli
+ */
+DEFINE_SPINLOCK(pgd_lock);
+struct page *pgd_list;
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+	page->index = (unsigned long)pgd_list;
+	if (pgd_list)
+		pgd_list->private = (unsigned long)&page->index;
+	pgd_list = page;
+	page->private = (unsigned long)&pgd_list;
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+	struct page *next, **pprev, *page = virt_to_page(pgd);
+	next = (struct page *)page->index;
+	pprev = (struct page **)page->private;
+	*pprev = next;
+	if (next)
+		next->private = (unsigned long)pprev;
+}
+
+void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+{
+	unsigned long flags;
+
+	if (PTRS_PER_PMD == 1)
+		spin_lock_irqsave(&pgd_lock, flags);
+
+	memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+			swapper_pg_dir + USER_PTRS_PER_PGD,
+			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+
+	if (PTRS_PER_PMD > 1)
+		return;
+
+	pgd_list_add(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+}
+
+/* never called when PTRS_PER_PMD > 1 */
+void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+{
+	unsigned long flags; /* can be called from interrupt context */
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	pgd_list_del(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	int i;
+	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+
+	if (PTRS_PER_PMD == 1 || !pgd)
+		return pgd;
+
+	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+		if (!pmd)
+			goto out_oom;
+		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+	}
+	return pgd;
+
+out_oom:
+	for (i--; i >= 0; i--)
+		kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+	kmem_cache_free(pgd_cache, pgd);
+	return NULL;
+}
+
+void pgd_free(pgd_t *pgd)
+{
+	int i;
+
+	/* in the PAE case user pgd entries are overwritten before usage */
+	if (PTRS_PER_PMD > 1)
+		for (i = 0; i < USER_PTRS_PER_PGD; ++i)
+			kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+	/* in the non-PAE case, clear_page_range() clears user pgd entries */
+	kmem_cache_free(pgd_cache, pgd);
+}
diff --git a/arch/i386/oprofile/Kconfig b/arch/i386/oprofile/Kconfig
new file mode 100644
index 00000000000..5ade19801b9
--- /dev/null
+++ b/arch/i386/oprofile/Kconfig
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+	depends on EXPERIMENTAL
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+	  
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+endmenu
+
diff --git a/arch/i386/oprofile/Makefile b/arch/i386/oprofile/Makefile
new file mode 100644
index 00000000000..30f3eb36666
--- /dev/null
+++ b/arch/i386/oprofile/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o  \
+		timer_int.o )
+
+oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_X86_LOCAL_APIC) 	+= nmi_int.o op_model_athlon.o \
+					   op_model_ppro.o op_model_p4.o
+oprofile-$(CONFIG_X86_IO_APIC)		+= nmi_timer_int.o
diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c
new file mode 100644
index 00000000000..52d72e074f7
--- /dev/null
+++ b/arch/i386/oprofile/backtrace.c
@@ -0,0 +1,111 @@
+/**
+ * @file backtrace.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author David Smith
+ */
+
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+
+struct frame_head {
+	struct frame_head * ebp;
+	unsigned long ret;
+} __attribute__((packed));
+
+static struct frame_head *
+dump_backtrace(struct frame_head * head)
+{
+	oprofile_add_trace(head->ret);
+
+	/* frame pointers should strictly progress back up the stack
+	 * (towards higher addresses) */
+	if (head >= head->ebp)
+		return NULL;
+
+	return head->ebp;
+}
+
+/* check that the page(s) containing the frame head are present */
+static int pages_present(struct frame_head * head)
+{
+	struct mm_struct * mm = current->mm;
+
+	/* FIXME: only necessary once per page */
+	if (!check_user_page_readable(mm, (unsigned long)head))
+		return 0;
+
+	return check_user_page_readable(mm, (unsigned long)(head + 1));
+}
+
+/*
+ * |             | /\ Higher addresses
+ * |             |
+ * --------------- stack base (address of current_thread_info)
+ * | thread info |
+ * .             .
+ * |    stack    |
+ * --------------- saved regs->ebp value if valid (frame_head address)
+ * .             .
+ * --------------- struct pt_regs stored on stack (struct pt_regs *)
+ * |             |
+ * .             .
+ * |             |
+ * --------------- %esp
+ * |             |
+ * |             | \/ Lower addresses
+ *
+ * Thus, &pt_regs <-> stack base restricts the valid(ish) ebp values
+ */
+#ifdef CONFIG_FRAME_POINTER
+static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
+{
+	unsigned long headaddr = (unsigned long)head;
+	unsigned long stack = (unsigned long)regs;
+	unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
+
+	return headaddr > stack && headaddr < stack_base;
+}
+#else
+/* without fp, it's just junk */
+static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
+{
+	return 0;
+}
+#endif
+
+
+void
+x86_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+	struct frame_head *head;
+
+#ifdef CONFIG_X86_64
+	head = (struct frame_head *)regs->rbp;
+#else
+	head = (struct frame_head *)regs->ebp;
+#endif
+
+	if (!user_mode(regs)) {
+		while (depth-- && valid_kernel_stack(head, regs))
+			head = dump_backtrace(head);
+		return;
+	}
+
+#ifdef CONFIG_SMP
+	if (!spin_trylock(&current->mm->page_table_lock))
+		return;
+#endif
+
+	while (depth-- && head && pages_present(head))
+		head = dump_backtrace(head);
+
+#ifdef CONFIG_SMP
+	spin_unlock(&current->mm->page_table_lock);
+#endif
+}
diff --git a/arch/i386/oprofile/init.c b/arch/i386/oprofile/init.c
new file mode 100644
index 00000000000..c90332de582
--- /dev/null
+++ b/arch/i386/oprofile/init.c
@@ -0,0 +1,48 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+ 
+/* We support CPUs that have performance counters like the Pentium Pro
+ * with the NMI mode driver.
+ */
+ 
+extern int nmi_init(struct oprofile_operations * ops);
+extern int nmi_timer_init(struct oprofile_operations * ops);
+extern void nmi_exit(void);
+extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+	int ret;
+
+	ret = -ENODEV;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	ret = nmi_init(ops);
+#endif
+#ifdef CONFIG_X86_IO_APIC
+	if (ret < 0)
+		ret = nmi_timer_init(ops);
+#endif
+	ops->backtrace = x86_backtrace;
+
+	return ret;
+}
+
+
+void oprofile_arch_exit(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	nmi_exit();
+#endif
+}
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
new file mode 100644
index 00000000000..3492d961d3f
--- /dev/null
+++ b/arch/i386/oprofile/nmi_int.c
@@ -0,0 +1,427 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+ 
+#include "op_counter.h"
+#include "op_x86_model.h"
+ 
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+ 
+static int nmi_start(void);
+static void nmi_stop(void);
+
+/* 0 == registered but off, 1 == registered and on */
+static int nmi_enabled = 0;
+
+#ifdef CONFIG_PM
+
+static int nmi_suspend(struct sys_device *dev, u32 state)
+{
+	if (nmi_enabled == 1)
+		nmi_stop();
+	return 0;
+}
+
+
+static int nmi_resume(struct sys_device *dev)
+{
+	if (nmi_enabled == 1)
+		nmi_start();
+	return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+	set_kset_name("oprofile"),
+	.resume		= nmi_resume,
+	.suspend	= nmi_suspend,
+};
+
+
+static struct sys_device device_oprofile = {
+	.id	= 0,
+	.cls	= &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+	int error;
+	if (!(error = sysdev_class_register(&oprofile_sysclass)))
+		error = sysdev_register(&device_oprofile);
+	return error;
+}
+
+
+static void exit_driverfs(void)
+{
+	sysdev_unregister(&device_oprofile);
+	sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+
+static int nmi_callback(struct pt_regs * regs, int cpu)
+{
+	return model->check_ctrs(regs, &cpu_msrs[cpu]);
+}
+ 
+ 
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrs; ++i) {
+		rdmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrls; ++i) {
+		rdmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+}
+
+
+static void nmi_save_registers(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	model->fill_in_addresses(msrs);
+	nmi_cpu_save_registers(msrs);
+}
+
+
+static void free_msrs(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		kfree(cpu_msrs[i].counters);
+		cpu_msrs[i].counters = NULL;
+		kfree(cpu_msrs[i].controls);
+		cpu_msrs[i].controls = NULL;
+	}
+}
+
+
+static int allocate_msrs(void)
+{
+	int success = 1;
+	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		if (!cpu_online(i))
+			continue;
+
+		cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
+		if (!cpu_msrs[i].counters) {
+			success = 0;
+			break;
+		}
+		cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
+		if (!cpu_msrs[i].controls) {
+			success = 0;
+			break;
+		}
+	}
+
+	if (!success)
+		free_msrs();
+
+	return success;
+}
+
+
+static void nmi_cpu_setup(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	spin_lock(&oprofilefs_lock);
+	model->setup_ctrs(msrs);
+	spin_unlock(&oprofilefs_lock);
+	saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+
+
+static int nmi_setup(void)
+{
+	if (!allocate_msrs())
+		return -ENOMEM;
+
+	/* We walk a thin line between law and rape here.
+	 * We need to be careful to install our NMI handler
+	 * without actually triggering any NMIs as this will
+	 * break the core code horrifically.
+	 */
+	if (reserve_lapic_nmi() < 0) {
+		free_msrs();
+		return -EBUSY;
+	}
+	/* We need to serialize save and setup for HT because the subset
+	 * of msrs are distinct for save and setup operations
+	 */
+	on_each_cpu(nmi_save_registers, NULL, 0, 1);
+	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+	set_nmi_callback(nmi_callback);
+	nmi_enabled = 1;
+	return 0;
+}
+
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrls; ++i) {
+		wrmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrs; ++i) {
+		wrmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+}
+ 
+
+static void nmi_cpu_shutdown(void * dummy)
+{
+	unsigned int v;
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+ 
+	/* restoring APIC_LVTPC can trigger an apic error because the delivery
+	 * mode and vector nr combination can be illegal. That's by design: on
+	 * power on apic lvt contain a zero vector nr which are legal only for
+	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
+	 */
+	v = apic_read(APIC_LVTERR);
+	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+	apic_write(APIC_LVTERR, v);
+	nmi_restore_registers(msrs);
+}
+
+ 
+static void nmi_shutdown(void)
+{
+	nmi_enabled = 0;
+	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+	unset_nmi_callback();
+	release_lapic_nmi();
+	free_msrs();
+}
+
+ 
+static void nmi_cpu_start(void * dummy)
+{
+	struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+	model->start(msrs);
+}
+ 
+
+static int nmi_start(void)
+{
+	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+	return 0;
+}
+ 
+ 
+static void nmi_cpu_stop(void * dummy)
+{
+	struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+	model->stop(msrs);
+}
+ 
+ 
+static void nmi_stop(void)
+{
+	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+}
+
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int nmi_create_files(struct super_block * sb, struct dentry * root)
+{
+	unsigned int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		struct dentry * dir;
+		char buf[2];
+ 
+		snprintf(buf, 2, "%d", i);
+		dir = oprofilefs_mkdir(sb, root, buf);
+		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 
+		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 
+		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); 
+		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); 
+		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); 
+		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); 
+	}
+
+	return 0;
+}
+ 
+ 
+static int __init p4_init(char ** cpu_type)
+{
+	__u8 cpu_model = boot_cpu_data.x86_model;
+
+	if (cpu_model > 4)
+		return 0;
+
+#ifndef CONFIG_SMP
+	*cpu_type = "i386/p4";
+	model = &op_p4_spec;
+	return 1;
+#else
+	switch (smp_num_siblings) {
+		case 1:
+			*cpu_type = "i386/p4";
+			model = &op_p4_spec;
+			return 1;
+
+		case 2:
+			*cpu_type = "i386/p4-ht";
+			model = &op_p4_ht2_spec;
+			return 1;
+	}
+#endif
+
+	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
+	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
+	return 0;
+}
+
+
+static int __init ppro_init(char ** cpu_type)
+{
+	__u8 cpu_model = boot_cpu_data.x86_model;
+
+	if (cpu_model > 0xd)
+		return 0;
+
+	if (cpu_model == 9) {
+		*cpu_type = "i386/p6_mobile";
+	} else if (cpu_model > 5) {
+		*cpu_type = "i386/piii";
+	} else if (cpu_model > 2) {
+		*cpu_type = "i386/pii";
+	} else {
+		*cpu_type = "i386/ppro";
+	}
+
+	model = &op_ppro_spec;
+	return 1;
+}
+
+/* in order to get driverfs right */
+static int using_nmi;
+
+int __init nmi_init(struct oprofile_operations *ops)
+{
+	__u8 vendor = boot_cpu_data.x86_vendor;
+	__u8 family = boot_cpu_data.x86;
+	char *cpu_type;
+
+	if (!cpu_has_apic)
+		return -ENODEV;
+ 
+	switch (vendor) {
+		case X86_VENDOR_AMD:
+			/* Needs to be at least an Athlon (or hammer in 32bit mode) */
+
+			switch (family) {
+			default:
+				return -ENODEV;
+			case 6:
+				model = &op_athlon_spec;
+				cpu_type = "i386/athlon";
+				break;
+			case 0xf:
+				model = &op_athlon_spec;
+				/* Actually it could be i386/hammer too, but give
+				   user space an consistent name. */
+				cpu_type = "x86-64/hammer";
+				break;
+			}
+			break;
+ 
+		case X86_VENDOR_INTEL:
+			switch (family) {
+				/* Pentium IV */
+				case 0xf:
+					if (!p4_init(&cpu_type))
+						return -ENODEV;
+					break;
+
+				/* A P6-class processor */
+				case 6:
+					if (!ppro_init(&cpu_type))
+						return -ENODEV;
+					break;
+
+				default:
+					return -ENODEV;
+			}
+			break;
+
+		default:
+			return -ENODEV;
+	}
+
+	init_driverfs();
+	using_nmi = 1;
+	ops->create_files = nmi_create_files;
+	ops->setup = nmi_setup;
+	ops->shutdown = nmi_shutdown;
+	ops->start = nmi_start;
+	ops->stop = nmi_stop;
+	ops->cpu_type = cpu_type;
+	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
+	return 0;
+}
+
+
+void nmi_exit(void)
+{
+	if (using_nmi)
+		exit_driverfs();
+}
diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c
new file mode 100644
index 00000000000..b2e462abf33
--- /dev/null
+++ b/arch/i386/oprofile/nmi_timer_int.c
@@ -0,0 +1,55 @@
+/**
+ * @file nmi_timer_int.c
+ *
+ * @remark Copyright 2003 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Zwane Mwaikambo <zwane@linuxpower.ca>
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/oprofile.h>
+#include <linux/rcupdate.h>
+
+
+#include <asm/nmi.h>
+#include <asm/apic.h>
+#include <asm/ptrace.h>
+ 
+static int nmi_timer_callback(struct pt_regs * regs, int cpu)
+{
+	oprofile_add_sample(regs, 0);
+	return 1;
+}
+
+static int timer_start(void)
+{
+	disable_timer_nmi_watchdog();
+	set_nmi_callback(nmi_timer_callback);
+	return 0;
+}
+
+
+static void timer_stop(void)
+{
+	enable_timer_nmi_watchdog();
+	unset_nmi_callback();
+	synchronize_kernel();
+}
+
+
+int __init nmi_timer_init(struct oprofile_operations * ops)
+{
+	extern int nmi_active;
+
+	if (nmi_active <= 0)
+		return -ENODEV;
+
+	ops->start = timer_start;
+	ops->stop = timer_stop;
+	ops->cpu_type = "timer";
+	printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
+	return 0;
+}
diff --git a/arch/i386/oprofile/op_counter.h b/arch/i386/oprofile/op_counter.h
new file mode 100644
index 00000000000..2880b15c467
--- /dev/null
+++ b/arch/i386/oprofile/op_counter.h
@@ -0,0 +1,29 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
new file mode 100644
index 00000000000..3ad9a72a503
--- /dev/null
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -0,0 +1,149 @@
+/**
+ * @file op_model_athlon.h
+ * athlon / K7 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 4
+#define NUM_CONTROLS 4
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void athlon_fill_in_addresses(struct op_msrs * const msrs)
+{
+	msrs->counters[0].addr = MSR_K7_PERFCTR0;
+	msrs->counters[1].addr = MSR_K7_PERFCTR1;
+	msrs->counters[2].addr = MSR_K7_PERFCTR2;
+	msrs->counters[3].addr = MSR_K7_PERFCTR3;
+
+	msrs->controls[0].addr = MSR_K7_EVNTSEL0;
+	msrs->controls[1].addr = MSR_K7_EVNTSEL1;
+	msrs->controls[2].addr = MSR_K7_EVNTSEL2;
+	msrs->controls[3].addr = MSR_K7_EVNTSEL3;
+}
+
+ 
+static void athlon_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+ 
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+	
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR(low);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_WRITE(low, high, msrs, i);
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+ 
+static int athlon_check_ctrs(struct pt_regs * const regs,
+			     struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			oprofile_add_sample(regs, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+		}
+	}
+
+	/* See op_model_ppro.c */
+	return 1;
+}
+
+ 
+static void athlon_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (reset_value[i]) {
+			CTRL_READ(low, high, msrs, i);
+			CTRL_SET_ACTIVE(low);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+}
+
+
+static void athlon_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	int i;
+
+	/* Subtle: stop on all counters to avoid race with
+	 * setting our pm callback */
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_SET_INACTIVE(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+}
+
+
+struct op_x86_model_spec const op_athlon_spec = {
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &athlon_fill_in_addresses,
+	.setup_ctrs = &athlon_setup_ctrs,
+	.check_ctrs = &athlon_check_ctrs,
+	.start = &athlon_start,
+	.stop = &athlon_stop
+};
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
new file mode 100644
index 00000000000..ac8a066035c
--- /dev/null
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -0,0 +1,725 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <asm/msr.h>
+#include <asm/ptrace.h>
+#include <asm/fixmap.h>
+#include <asm/apic.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+   hyper-threadedness of a chip is discovered at
+   kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+	if (smp_num_siblings == 2)
+		num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+	return smp_num_siblings == 2 ? 2 : 1;
+#else
+	return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+	int virt_counter;
+	int counter_address;
+	int cccr_address;
+};
+
+struct p4_event_binding {
+	int escr_select;  /* value to put in CCCR */
+	int event_select; /* value to put in ESCR */
+	struct {
+		int virt_counter; /* for this counter... */
+		int escr_address; /* use this ESCR       */
+	} bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+   event/i386.p4*events. */
+
+
+#define CTR_BPU_0      (1 << 0)
+#define CTR_MS_0       (1 << 1)
+#define CTR_FLAME_0    (1 << 2)
+#define CTR_IQ_4       (1 << 3)
+#define CTR_BPU_2      (1 << 4)
+#define CTR_MS_2       (1 << 5)
+#define CTR_FLAME_2    (1 << 6)
+#define CTR_IQ_5       (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
+	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
+	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
+	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
+	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
+	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+	MSR_P4_BPU_CCCR1,	MSR_P4_BPU_CCCR3,
+	MSR_P4_MS_CCCR1,	MSR_P4_MS_CCCR3,
+	MSR_P4_FLAME_CCCR1,	MSR_P4_FLAME_CCCR3,
+	MSR_P4_IQ_CCCR0,	MSR_P4_IQ_CCCR1,
+	MSR_P4_IQ_CCCR2,	MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+	
+	{ /* BRANCH_RETIRED */
+		0x05, 0x06, 
+		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+	
+	{ /* MISPRED_BRANCH_RETIRED */
+		0x04, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+	
+	{ /* TC_DELIVER_MODE */
+		0x01, 0x01,
+		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
+		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
+	},
+	
+	{ /* BPU_FETCH_REQUEST */
+		0x00, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+	},
+
+	{ /* ITLB_REFERENCE */
+		0x03, 0x18,
+		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+	},
+
+	{ /* MEMORY_CANCEL */
+		0x05, 0x02,
+		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+	},
+
+	{ /* MEMORY_COMPLETE */
+		0x02, 0x08,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* LOAD_PORT_REPLAY */
+		0x02, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* STORE_PORT_REPLAY */
+		0x02, 0x05,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* MOB_LOAD_REPLAY */
+		0x02, 0x03,
+		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+	},
+
+	{ /* PAGE_WALK_TYPE */
+		0x04, 0x01,
+		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+	},
+
+	{ /* BSQ_CACHE_REFERENCE */
+		0x07, 0x0c, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+	},
+
+	{ /* IOQ_ALLOCATION */
+		0x06, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* IOQ_ACTIVE_ENTRIES */
+		0x06, 0x1a, 
+		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+		  { 0, 0 } }
+	},
+
+	{ /* FSB_DATA_ACTIVITY */
+		0x06, 0x17, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+
+	{ /* BSQ_ALLOCATION */
+		0x07, 0x05, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* BSQ_ACTIVE_ENTRIES */
+		0x07, 0x06,
+		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+		  { 0, 0 } }
+	},
+
+	{ /* X87_ASSIST */
+		0x05, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* SSE_INPUT_ASSIST */
+		0x01, 0x34,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_SP_UOP */
+		0x01, 0x08, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_DP_UOP */
+		0x01, 0x0c, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_SP_UOP */
+		0x01, 0x0a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_DP_UOP */
+		0x01, 0x0e,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* 64BIT_MMX_UOP */
+		0x01, 0x02, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* 128BIT_MMX_UOP */
+		0x01, 0x1a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* X87_FP_UOP */
+		0x01, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* X87_SIMD_MOVES_UOP */
+		0x01, 0x2e, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* MACHINE_CLEAR */
+		0x05, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* GLOBAL_POWER_EVENTS */
+		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+  
+	{ /* TC_MS_XFER */
+		0x00, 0x05, 
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* UOP_QUEUE_WRITES */
+		0x00, 0x09,
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* FRONT_END_EVENT */
+		0x05, 0x08,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* EXECUTION_EVENT */
+		0x05, 0x0c,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* REPLAY_EVENT */
+		0x05, 0x09,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* INSTR_RETIRED */
+		0x04, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOPS_RETIRED */
+		0x04, 0x01,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOP_TYPE */    
+		0x02, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+	},
+
+	{ /* RETIRED_MISPRED_BRANCH_TYPE */
+		0x02, 0x05, 
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	},
+
+	{ /* RETIRED_BRANCH_TYPE */
+		0x02, 0x04,
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	}
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+   the code in this module as an extra array offset, to select the "even"
+   or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+	int cpu = smp_processor_id();
+	return (cpu != first_cpu(cpu_sibling_map[cpu]));
+#endif	
+	return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+   by passing a "virtual" counter numer to this macro,
+   along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+	unsigned int i; 
+	unsigned int addr, stag;
+
+	setup_num_counters();
+	stag = get_stagger();
+
+	/* the counter registers we pay attention to */
+	for (i = 0; i < num_counters; ++i) {
+		msrs->counters[i].addr = 
+			p4_counters[VIRT_CTR(stag, i)].counter_address;
+	}
+
+	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+	/* 18 CCCR registers */
+	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	/* 43 ESCR registers in three or four discontiguous group */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+
+	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+	 * to avoid special case in nmi_{save|restore}_registers() */
+	if (boot_cpu_data.x86_model >= 0x3) {
+		for (addr = MSR_P4_BSU_ESCR0 + stag;
+		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	} else {
+		for (addr = MSR_P4_IQ_ESCR0 + stag;
+		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+
+	/* there are 2 remaining non-contiguously located ESCRs */
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		/* standard non-HT CPUs handle both remaining ESCRs*/
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else if (stag == 0) {
+		/* HT CPUs give the first remainder to the even thread, as
+		   the 32nd control register */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else {
+		/* and two copies of the second to the odd thread,
+		   for the 22st and 23nd control registers */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+	}
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+	int i;
+	int const maxbind = 2;
+	unsigned int cccr = 0;
+	unsigned int escr = 0;
+	unsigned int high = 0;
+	unsigned int counter_bit;
+	struct p4_event_binding *ev = NULL;
+	unsigned int stag;
+
+	stag = get_stagger();
+	
+	/* convert from counter *number* to counter *bit* */
+	counter_bit = 1 << VIRT_CTR(stag, ctr);
+	
+	/* find our event binding structure. */
+	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
+		printk(KERN_ERR 
+		       "oprofile: P4 event code 0x%lx out of range\n", 
+		       counter_config[ctr].event);
+		return;
+	}
+	
+	ev = &(p4_events[counter_config[ctr].event - 1]);
+	
+	for (i = 0; i < maxbind; i++) {
+		if (ev->bindings[i].virt_counter & counter_bit) {
+
+			/* modify ESCR */
+			ESCR_READ(escr, high, ev, i);
+			ESCR_CLEAR(escr);
+			if (stag == 0) {
+				ESCR_SET_USR_0(escr, counter_config[ctr].user);
+				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+			} else {
+				ESCR_SET_USR_1(escr, counter_config[ctr].user);
+				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+			}
+			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
+			ESCR_WRITE(escr, high, ev, i);
+		       
+			/* modify CCCR */
+			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+			CCCR_CLEAR(cccr);
+			CCCR_SET_REQUIRED_BITS(cccr);
+			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+			if (stag == 0) {
+				CCCR_SET_PMI_OVF_0(cccr);
+			} else {
+				CCCR_SET_PMI_OVF_1(cccr);
+			}
+			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+			return;
+		}
+	}
+
+	printk(KERN_ERR 
+	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+	       counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int i;
+	unsigned int low, high;
+	unsigned int addr;
+	unsigned int stag;
+
+	stag = get_stagger();
+
+	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+	if (! MISC_PMC_ENABLED_P(low)) {
+		printk(KERN_ERR "oprofile: P4 PMC not available\n");
+		return;
+	}
+
+	/* clear the cccrs we will use */
+	for (i = 0 ; i < num_counters ; i++) {
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+	}
+
+	/* clear cccrs outside our concern */
+	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+		rdmsr(p4_unused_cccr[i], low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_unused_cccr[i], low, high);
+	}
+
+	/* clear all escrs (including those outside our concern) */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+
+	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
+	if (boot_cpu_data.x86_model < 0x3) {
+		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	} else if (stag == 0) {
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+	} else {
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	}		
+	
+	/* setup all counters */
+	for (i = 0 ; i < num_counters ; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+			pmc_setup_one_p4_counter(i);
+			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+
+static int p4_check_ctrs(struct pt_regs * const regs,
+			 struct op_msrs const * const msrs)
+{
+	unsigned long ctr, low, high, stag, real;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		
+		if (!reset_value[i]) 
+			continue;
+
+		/* 
+		 * there is some eccentricity in the hardware which
+		 * requires that we perform 2 extra corrections:
+		 *
+		 * - check both the CCCR:OVF flag for overflow and the
+		 *   counter high bit for un-flagged overflows.
+		 *
+		 * - write the counter back twice to ensure it gets
+		 *   updated properly.
+		 * 
+		 * the former seems to be related to extra NMIs happening
+		 * during the current NMI; the latter is reported as errata
+		 * N15 in intel doc 249199-029, pentium 4 specification
+		 * update, though their suggested work-around does not
+		 * appear to solve the problem.
+		 */
+		
+		real = VIRT_CTR(stag, i);
+
+		CCCR_READ(low, high, real);
+ 		CTR_READ(ctr, high, real);
+		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+			oprofile_add_sample(regs, i);
+ 			CTR_WRITE(reset_value[i], real);
+			CCCR_CLEAR_OVF(low);
+			CCCR_WRITE(low, high, real);
+ 			CTR_WRITE(reset_value[i], real);
+		}
+	}
+
+	/* P4 quirk: you have to re-unmask the apic vector */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* See op_model_ppro.c */
+	return 1;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i])
+			continue;
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_ENABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_DISABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+	.num_counters = NUM_COUNTERS_HT2,
+	.num_controls = NUM_CONTROLS_HT2,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+	.num_counters = NUM_COUNTERS_NON_HT,
+	.num_controls = NUM_CONTROLS_NON_HT,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
new file mode 100644
index 00000000000..d719015fc04
--- /dev/null
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -0,0 +1,143 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+	msrs->counters[0].addr = MSR_P6_PERFCTR0;
+	msrs->counters[1].addr = MSR_P6_PERFCTR1;
+	
+	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+	
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR(low);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+}
+
+ 
+static int ppro_check_ctrs(struct pt_regs * const regs,
+			   struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+ 
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			oprofile_add_sample(regs, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+		}
+	}
+
+	/* Only P6 based Pentium M need to re-unmask the apic vector but it
+	 * doesn't hurt other P6 variant */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* We can't work out if we really handled an interrupt. We
+	 * might have caught a *second* counter just after overflowing
+	 * the interrupt for this counter then arrives
+	 * and we don't find a counter that's overflowed, so we
+	 * would return 0 and get dazed + confused. Instead we always
+	 * assume we found an overflow. This sucks.
+	 */
+	return 1;
+}
+
+ 
+static void ppro_start(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_ACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_INACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+
+struct op_x86_model_spec const op_ppro_spec = {
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &ppro_fill_in_addresses,
+	.setup_ctrs = &ppro_setup_ctrs,
+	.check_ctrs = &ppro_check_ctrs,
+	.start = &ppro_start,
+	.stop = &ppro_stop
+};
diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h
new file mode 100644
index 00000000000..123b7e90a9e
--- /dev/null
+++ b/arch/i386/oprofile/op_x86_model.h
@@ -0,0 +1,50 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+struct op_saved_msr {
+	unsigned int high;
+	unsigned int low;
+};
+
+struct op_msr {
+	unsigned long addr;
+	struct op_saved_msr saved;
+};
+
+struct op_msrs {
+	struct op_msr * counters;
+	struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+	unsigned int const num_counters;
+	unsigned int const num_controls;
+	void (*fill_in_addresses)(struct op_msrs * const msrs);
+	void (*setup_ctrs)(struct op_msrs const * const msrs);
+	int (*check_ctrs)(struct pt_regs * const regs,
+		struct op_msrs const * const msrs);
+	void (*start)(struct op_msrs const * const msrs);
+	void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile
new file mode 100644
index 00000000000..1bff03f3696
--- /dev/null
+++ b/arch/i386/pci/Makefile
@@ -0,0 +1,14 @@
+obj-y				:= i386.o
+
+obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig.o
+obj-$(CONFIG_PCI_DIRECT)	+= direct.o
+
+pci-y				:= fixup.o
+pci-$(CONFIG_ACPI_PCI)		+= acpi.o
+pci-y				+= legacy.o irq.o
+
+pci-$(CONFIG_X86_VISWS)		:= visws.o fixup.o
+pci-$(CONFIG_X86_NUMAQ)		:= numa.o irq.o
+
+obj-y				+= $(pci-y) common.o
diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c
new file mode 100644
index 00000000000..2db65ec45dc
--- /dev/null
+++ b/arch/i386/pci/acpi.c
@@ -0,0 +1,53 @@
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <asm/hw_irq.h>
+#include "pci.h"
+
+struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
+{
+	if (domain != 0) {
+		printk(KERN_WARNING "PCI: Multiple domains not supported\n");
+		return NULL;
+	}
+
+	return pcibios_scan_root(busnum);
+}
+
+extern int pci_routeirq;
+static int __init pci_acpi_init(void)
+{
+	struct pci_dev *dev = NULL;
+
+	if (pcibios_scanned)
+		return 0;
+
+	if (acpi_noirq)
+		return 0;
+
+	printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+	acpi_irq_penalty_init();
+	pcibios_scanned++;
+	pcibios_enable_irq = acpi_pci_irq_enable;
+
+	if (pci_routeirq) {
+		/*
+		 * PCI IRQ routing is set up by pci_enable_device(), but we
+		 * also do it here in case there are still broken drivers that
+		 * don't use pci_enable_device().
+		 */
+		printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+		while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL)
+			acpi_pci_irq_enable(dev);
+	} else
+		printk(KERN_INFO "PCI: If a device doesn't work, try \"pci=routeirq\".  If it helps, post a report\n");
+
+#ifdef CONFIG_X86_IO_APIC
+	if (acpi_ioapic)
+		print_IO_APIC();
+#endif
+
+	return 0;
+}
+subsys_initcall(pci_acpi_init);
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
new file mode 100644
index 00000000000..720975e1af5
--- /dev/null
+++ b/arch/i386/pci/common.c
@@ -0,0 +1,251 @@
+/*
+ *	Low-Level PCI Support for PC
+ *
+ *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+
+#include <asm/acpi.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+
+#include "pci.h"
+
+#ifdef CONFIG_PCI_BIOS
+extern  void pcibios_sort(void);
+#endif
+
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
+				PCI_PROBE_MMCONF;
+
+int pci_routeirq;
+int pcibios_last_bus = -1;
+struct pci_bus *pci_root_bus = NULL;
+struct pci_raw_ops *raw_pci_ops;
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
+{
+	return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
+{
+	return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
+}
+
+struct pci_ops pci_root_ops = {
+	.read = pci_read,
+	.write = pci_write,
+};
+
+/*
+ * legacy, numa, and acpi all want to call pcibios_scan_root
+ * from their initcalls. This flag prevents that.
+ */
+int pcibios_scanned;
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+DEFINE_SPINLOCK(pci_config_lock);
+
+/*
+ * Several buggy motherboards address only 16 devices and mirror
+ * them to next 16 IDs. We try to detect this `feature' on all
+ * primary buses (those containing host bridges as they are
+ * expected to be unique) and remove the ghost devices.
+ */
+
+static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
+{
+	struct list_head *ln, *mn;
+	struct pci_dev *d, *e;
+	int mirror = PCI_DEVFN(16,0);
+	int seen_host_bridge = 0;
+	int i;
+
+	DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
+	list_for_each(ln, &b->devices) {
+		d = pci_dev_b(ln);
+		if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+			seen_host_bridge++;
+		for (mn=ln->next; mn != &b->devices; mn=mn->next) {
+			e = pci_dev_b(mn);
+			if (e->devfn != d->devfn + mirror ||
+			    e->vendor != d->vendor ||
+			    e->device != d->device ||
+			    e->class != d->class)
+				continue;
+			for(i=0; i<PCI_NUM_RESOURCES; i++)
+				if (e->resource[i].start != d->resource[i].start ||
+				    e->resource[i].end != d->resource[i].end ||
+				    e->resource[i].flags != d->resource[i].flags)
+					continue;
+			break;
+		}
+		if (mn == &b->devices)
+			return;
+	}
+	if (!seen_host_bridge)
+		return;
+	printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
+
+	ln = &b->devices;
+	while (ln->next != &b->devices) {
+		d = pci_dev_b(ln->next);
+		if (d->devfn >= mirror) {
+			list_del(&d->global_list);
+			list_del(&d->bus_list);
+			kfree(d);
+		} else
+			ln = ln->next;
+	}
+}
+
+/*
+ *  Called after each bus is probed, but before its children
+ *  are examined.
+ */
+
+void __devinit  pcibios_fixup_bus(struct pci_bus *b)
+{
+	pcibios_fixup_ghosts(b);
+	pci_read_bridge_bases(b);
+}
+
+
+struct pci_bus * __devinit pcibios_scan_root(int busnum)
+{
+	struct pci_bus *bus = NULL;
+
+	while ((bus = pci_find_next_bus(bus)) != NULL) {
+		if (bus->number == busnum) {
+			/* Already scanned */
+			return bus;
+		}
+	}
+
+	printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
+
+	return pci_scan_bus(busnum, &pci_root_ops, NULL);
+}
+
+extern u8 pci_cache_line_size;
+
+static int __init pcibios_init(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if (!raw_pci_ops) {
+		printk("PCI: System does not support PCI\n");
+		return 0;
+	}
+
+	/*
+	 * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8
+	 * and P4. It's also good for 386/486s (which actually have 16)
+	 * as quite a few PCI devices do not support smaller values.
+	 */
+	pci_cache_line_size = 32 >> 2;
+	if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
+		pci_cache_line_size = 64 >> 2;	/* K7 & K8 */
+	else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
+		pci_cache_line_size = 128 >> 2;	/* P4 */
+
+	pcibios_resource_survey();
+
+#ifdef CONFIG_PCI_BIOS
+	if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
+		pcibios_sort();
+#endif
+	return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+char * __devinit  pcibios_setup(char *str)
+{
+	if (!strcmp(str, "off")) {
+		pci_probe = 0;
+		return NULL;
+	}
+#ifdef CONFIG_PCI_BIOS
+	else if (!strcmp(str, "bios")) {
+		pci_probe = PCI_PROBE_BIOS;
+		return NULL;
+	} else if (!strcmp(str, "nobios")) {
+		pci_probe &= ~PCI_PROBE_BIOS;
+		return NULL;
+	} else if (!strcmp(str, "nosort")) {
+		pci_probe |= PCI_NO_SORT;
+		return NULL;
+	} else if (!strcmp(str, "biosirq")) {
+		pci_probe |= PCI_BIOS_IRQ_SCAN;
+		return NULL;
+	}
+#endif
+#ifdef CONFIG_PCI_DIRECT
+	else if (!strcmp(str, "conf1")) {
+		pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
+		return NULL;
+	}
+	else if (!strcmp(str, "conf2")) {
+		pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
+		return NULL;
+	}
+#endif
+#ifdef CONFIG_PCI_MMCONFIG
+	else if (!strcmp(str, "nommconf")) {
+		pci_probe &= ~PCI_PROBE_MMCONF;
+		return NULL;
+	}
+#endif
+	else if (!strcmp(str, "noacpi")) {
+		acpi_noirq_set();
+		return NULL;
+	}
+#ifndef CONFIG_X86_VISWS
+	else if (!strcmp(str, "usepirqmask")) {
+		pci_probe |= PCI_USE_PIRQ_MASK;
+		return NULL;
+	} else if (!strncmp(str, "irqmask=", 8)) {
+		pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
+		return NULL;
+	} else if (!strncmp(str, "lastbus=", 8)) {
+		pcibios_last_bus = simple_strtol(str+8, NULL, 0);
+		return NULL;
+	}
+#endif
+	else if (!strcmp(str, "rom")) {
+		pci_probe |= PCI_ASSIGN_ROMS;
+		return NULL;
+	} else if (!strcmp(str, "assign-busses")) {
+		pci_probe |= PCI_ASSIGN_ALL_BUSSES;
+		return NULL;
+	} else if (!strcmp(str, "routeirq")) {
+		pci_routeirq = 1;
+		return NULL;
+	}
+	return str;
+}
+
+unsigned int pcibios_assign_all_busses(void)
+{
+	return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	int err;
+
+	if ((err = pcibios_enable_resources(dev, mask)) < 0)
+		return err;
+
+	return pcibios_enable_irq(dev);
+}
diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c
new file mode 100644
index 00000000000..30b7e9b4f6a
--- /dev/null
+++ b/arch/i386/pci/direct.c
@@ -0,0 +1,289 @@
+/*
+ * direct.c - Low-level direct PCI config space access
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/*
+ * Functions for accessing PCI configuration space with type 1 accesses
+ */
+
+#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
+	(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
+
+static int pci_conf1_read(unsigned int seg, unsigned int bus,
+			  unsigned int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 255))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8);
+
+	switch (len) {
+	case 1:
+		*value = inb(0xCFC + (reg & 3));
+		break;
+	case 2:
+		*value = inw(0xCFC + (reg & 2));
+		break;
+	case 4:
+		*value = inl(0xCFC);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_conf1_write(unsigned int seg, unsigned int bus,
+			   unsigned int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+
+	if ((bus > 255) || (devfn > 255) || (reg > 255)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8);
+
+	switch (len) {
+	case 1:
+		outb((u8)value, 0xCFC + (reg & 3));
+		break;
+	case 2:
+		outw((u16)value, 0xCFC + (reg & 2));
+		break;
+	case 4:
+		outl((u32)value, 0xCFC);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+#undef PCI_CONF1_ADDRESS
+
+struct pci_raw_ops pci_direct_conf1 = {
+	.read =		pci_conf1_read,
+	.write =	pci_conf1_write,
+};
+
+
+/*
+ * Functions for accessing PCI configuration space with type 2 accesses
+ */
+
+#define PCI_CONF2_ADDRESS(dev, reg)	(u16)(0xC000 | (dev << 8) | reg)
+
+static int pci_conf2_read(unsigned int seg, unsigned int bus,
+			  unsigned int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+	int dev, fn;
+
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 255))
+		return -EINVAL;
+
+	dev = PCI_SLOT(devfn);
+	fn = PCI_FUNC(devfn);
+
+	if (dev & 0x10) 
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+	outb((u8)bus, 0xCFA);
+
+	switch (len) {
+	case 1:
+		*value = inb(PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	case 2:
+		*value = inw(PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	case 4:
+		*value = inl(PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	}
+
+	outb(0, 0xCF8);
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_conf2_write(unsigned int seg, unsigned int bus,
+			   unsigned int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+	int dev, fn;
+
+	if ((bus > 255) || (devfn > 255) || (reg > 255)) 
+		return -EINVAL;
+
+	dev = PCI_SLOT(devfn);
+	fn = PCI_FUNC(devfn);
+
+	if (dev & 0x10) 
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+	outb((u8)bus, 0xCFA);
+
+	switch (len) {
+	case 1:
+		outb((u8)value, PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	case 2:
+		outw((u16)value, PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	case 4:
+		outl((u32)value, PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	}
+
+	outb(0, 0xCF8);    
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+#undef PCI_CONF2_ADDRESS
+
+static struct pci_raw_ops pci_direct_conf2 = {
+	.read =		pci_conf2_read,
+	.write =	pci_conf2_write,
+};
+
+
+/*
+ * Before we decide to use direct hardware access mechanisms, we try to do some
+ * trivial checks to ensure it at least _seems_ to be working -- we just test
+ * whether bus 00 contains a host bridge (this is similar to checking
+ * techniques used in XFree86, but ours should be more reliable since we
+ * attempt to make use of direct access hints provided by the PCI BIOS).
+ *
+ * This should be close to trivial, but it isn't, because there are buggy
+ * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
+ */
+static int __init pci_sanity_check(struct pci_raw_ops *o)
+{
+	u32 x = 0;
+	int devfn;
+
+	if (pci_probe & PCI_NO_CHECKS)
+		return 1;
+
+	for (devfn = 0; devfn < 0x100; devfn++) {
+		if (o->read(0, 0, devfn, PCI_CLASS_DEVICE, 2, &x))
+			continue;
+		if (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)
+			return 1;
+
+		if (o->read(0, 0, devfn, PCI_VENDOR_ID, 2, &x))
+			continue;
+		if (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)
+			return 1;
+	}
+
+	DBG("PCI: Sanity check failed\n");
+	return 0;
+}
+
+static int __init pci_check_type1(void)
+{
+	unsigned long flags;
+	unsigned int tmp;
+	int works = 0;
+
+	local_irq_save(flags);
+
+	outb(0x01, 0xCFB);
+	tmp = inl(0xCF8);
+	outl(0x80000000, 0xCF8);
+	if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) {
+		works = 1;
+	}
+	outl(tmp, 0xCF8);
+	local_irq_restore(flags);
+
+	return works;
+}
+
+static int __init pci_check_type2(void)
+{
+	unsigned long flags;
+	int works = 0;
+
+	local_irq_save(flags);
+
+	outb(0x00, 0xCFB);
+	outb(0x00, 0xCF8);
+	outb(0x00, 0xCFA);
+	if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 &&
+	    pci_sanity_check(&pci_direct_conf2)) {
+		works = 1;
+	}
+
+	local_irq_restore(flags);
+
+	return works;
+}
+
+static int __init pci_direct_init(void)
+{
+	struct resource *region, *region2;
+
+	if ((pci_probe & PCI_PROBE_CONF1) == 0)
+		goto type2;
+	region = request_region(0xCF8, 8, "PCI conf1");
+	if (!region)
+		goto type2;
+
+	if (pci_check_type1()) {
+		printk(KERN_INFO "PCI: Using configuration type 1\n");
+		raw_pci_ops = &pci_direct_conf1;
+		return 0;
+	}
+	release_resource(region);
+
+ type2:
+	if ((pci_probe & PCI_PROBE_CONF2) == 0)
+		goto out;
+	region = request_region(0xCF8, 4, "PCI conf2");
+	if (!region)
+		goto out;
+	region2 = request_region(0xC000, 0x1000, "PCI conf2");
+	if (!region2)
+		goto fail2;
+
+	if (pci_check_type2()) {
+		printk(KERN_INFO "PCI: Using configuration type 2\n");
+		raw_pci_ops = &pci_direct_conf2;
+		return 0;
+	}
+
+	release_resource(region2);
+ fail2:
+	release_resource(region);
+
+ out:
+	return 0;
+}
+
+arch_initcall(pci_direct_init);
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c
new file mode 100644
index 00000000000..be52c5ac4e0
--- /dev/null
+++ b/arch/i386/pci/fixup.c
@@ -0,0 +1,386 @@
+/*
+ * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+
+static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+{
+	/*
+	 * i450NX -- Find and scan all secondary buses on all PXB's.
+	 */
+	int pxb, reg;
+	u8 busno, suba, subb;
+
+	printk(KERN_WARNING "PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
+	reg = 0xd0;
+	for(pxb=0; pxb<2; pxb++) {
+		pci_read_config_byte(d, reg++, &busno);
+		pci_read_config_byte(d, reg++, &suba);
+		pci_read_config_byte(d, reg++, &subb);
+		DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
+		if (busno)
+			pci_scan_bus(busno, &pci_root_ops, NULL);	/* Bus A */
+		if (suba < subb)
+			pci_scan_bus(suba+1, &pci_root_ops, NULL);	/* Bus B */
+	}
+	pcibios_last_bus = -1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
+
+static void __devinit pci_fixup_i450gx(struct pci_dev *d)
+{
+	/*
+	 * i450GX and i450KX -- Find and scan all secondary buses.
+	 * (called separately for each PCI bridge found)
+	 */
+	u8 busno;
+	pci_read_config_byte(d, 0x4a, &busno);
+	printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno);
+	pci_scan_bus(busno, &pci_root_ops, NULL);
+	pcibios_last_bus = -1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx);
+
+static void __devinit  pci_fixup_umc_ide(struct pci_dev *d)
+{
+	/*
+	 * UM8886BF IDE controller sets region type bits incorrectly,
+	 * therefore they look like memory despite of them being I/O.
+	 */
+	int i;
+
+	printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", pci_name(d));
+	for(i=0; i<4; i++)
+		d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide);
+
+static void __devinit  pci_fixup_ncr53c810(struct pci_dev *d)
+{
+	/*
+	 * NCR 53C810 returns class code 0 (at least on some systems).
+	 * Fix class to be PCI_CLASS_STORAGE_SCSI
+	 */
+	if (!d->class) {
+		printk(KERN_WARNING "PCI: fixing NCR 53C810 class code for %s\n", pci_name(d));
+		d->class = PCI_CLASS_STORAGE_SCSI << 8;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810);
+
+static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
+{
+	int i;
+
+	/*
+	 * PCI IDE controllers use non-standard I/O port decoding, respect it.
+	 */
+	if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return;
+	DBG("PCI: IDE base address fixup for %s\n", pci_name(d));
+	for(i=0; i<4; i++) {
+		struct resource *r = &d->resource[i];
+		if ((r->start & ~0x80) == 0x374) {
+			r->start |= 2;
+			r->end = r->start;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
+
+static void __devinit  pci_fixup_ide_trash(struct pci_dev *d)
+{
+	int i;
+
+	/*
+	 * Runs the fixup only for the first IDE controller
+	 * (Shai Fultheim - shai@ftcon.com)
+	 */
+	static int called = 0;
+	if (called)
+		return;
+	called = 1;
+
+	/*
+	 * There exist PCI IDE controllers which have utter garbage
+	 * in first four base registers. Ignore that.
+	 */
+	DBG("PCI: IDE base address trash cleared for %s\n", pci_name(d));
+	for(i=0; i<4; i++)
+		d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, pci_fixup_ide_trash);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_11, pci_fixup_ide_trash);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_9, pci_fixup_ide_trash);
+
+static void __devinit  pci_fixup_latency(struct pci_dev *d)
+{
+	/*
+	 *  SiS 5597 and 5598 chipsets require latency timer set to
+	 *  at most 32 to avoid lockups.
+	 */
+	DBG("PCI: Setting max latency to 32\n");
+	pcibios_max_latency = 32;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency);
+
+static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
+{
+	/*
+	 * PIIX4 ACPI device: hardwired IRQ9
+	 */
+	d->irq = 9;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi);
+
+/*
+ * Addresses issues with problems in the memory write queue timer in
+ * certain VIA Northbridges.  This bugfix is per VIA's specifications,
+ * except for the KL133/KM133: clearing bit 5 on those Northbridges seems
+ * to trigger a bug in its integrated ProSavage video card, which
+ * causes screen corruption.  We only clear bits 6 and 7 for that chipset,
+ * until VIA can provide us with definitive information on why screen
+ * corruption occurs, and what exactly those bits do.
+ *
+ * VIA 8363,8622,8361 Northbridges:
+ *  - bits  5, 6, 7 at offset 0x55 need to be turned off
+ * VIA 8367 (KT266x) Northbridges:
+ *  - bits  5, 6, 7 at offset 0x95 need to be turned off
+ * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges:
+ *  - bits     6, 7 at offset 0x55 need to be turned off
+ */
+
+#define VIA_8363_KL133_REVISION_ID 0x81
+#define VIA_8363_KM133_REVISION_ID 0x84
+
+static void __devinit pci_fixup_via_northbridge_bug(struct pci_dev *d)
+{
+	u8 v;
+	u8 revision;
+	int where = 0x55;
+	int mask = 0x1f; /* clear bits 5, 6, 7 by default */
+
+	pci_read_config_byte(d, PCI_REVISION_ID, &revision);
+
+	if (d->device == PCI_DEVICE_ID_VIA_8367_0) {
+		/* fix pci bus latency issues resulted by NB bios error
+		   it appears on bug free^Wreduced kt266x's bios forces
+		   NB latency to zero */
+		pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
+
+		where = 0x95; /* the memory write queue timer register is 
+				different for the KT266x's: 0x95 not 0x55 */
+	} else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
+			(revision == VIA_8363_KL133_REVISION_ID || 
+			revision == VIA_8363_KM133_REVISION_ID)) {
+			mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5
+					causes screen corruption on the KL133/KM133 */
+	}
+
+	pci_read_config_byte(d, where, &v);
+	if (v & ~mask) {
+		printk(KERN_WARNING "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
+			d->device, revision, where, v, mask, v & mask);
+		v &= mask;
+		pci_write_config_byte(d, where, v);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug);
+
+/*
+ * For some reasons Intel decided that certain parts of their
+ * 815, 845 and some other chipsets must look like PCI-to-PCI bridges
+ * while they are obviously not. The 82801 family (AA, AB, BAM/CAM,
+ * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according
+ * to Intel terminology. These devices do forward all addresses from
+ * system to PCI bus no matter what are their window settings, so they are
+ * "transparent" (or subtractive decoding) from programmers point of view.
+ */
+static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev)
+{
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
+	    (dev->device & 0xff00) == 0x2400)
+		dev->transparent = 1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge);
+
+/*
+ * Fixup for C1 Halt Disconnect problem on nForce2 systems.
+ *
+ * From information provided by "Allen Martin" <AMartin@nvidia.com>:
+ *
+ * A hang is caused when the CPU generates a very fast CONNECT/HALT cycle
+ * sequence.  Workaround is to set the SYSTEM_IDLE_TIMEOUT to 80 ns.
+ * This allows the state-machine and timer to return to a proper state within
+ * 80 ns of the CONNECT and probe appearing together.  Since the CPU will not
+ * issue another HALT within 80 ns of the initial HALT, the failure condition
+ * is avoided.
+ */
+static void __init pci_fixup_nforce2(struct pci_dev *dev)
+{
+	u32 val;
+
+	/*
+	 * Chip  Old value   New value
+	 * C17   0x1F0FFF01  0x1F01FF01
+	 * C18D  0x9F0FFF01  0x9F01FF01
+	 *
+	 * Northbridge chip version may be determined by
+	 * reading the PCI revision ID (0xC1 or greater is C18D).
+	 */
+	pci_read_config_dword(dev, 0x6c, &val);
+
+	/*
+	 * Apply fixup if needed, but don't touch disconnect state
+	 */
+	if ((val & 0x00FF0000) != 0x00010000) {
+		printk(KERN_WARNING "PCI: nForce2 C1 Halt Disconnect fixup\n");
+		pci_write_config_dword(dev, 0x6c, (val & 0xFF00FFFF) | 0x00010000);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2);
+
+/* Max PCI Express root ports */
+#define MAX_PCIEROOT	6
+static int quirk_aspm_offset[MAX_PCIEROOT << 3];
+
+#define GET_INDEX(a, b) (((a - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + b)
+
+static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
+{
+	return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
+}
+
+/*
+ * Replace the original pci bus ops for write with a new one that will filter
+ * the request to insure ASPM cannot be enabled.
+ */
+static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
+{
+	u8 offset;
+
+	offset = quirk_aspm_offset[GET_INDEX(bus->self->device, devfn)];
+
+	if ((offset) && (where == offset))
+		value = value & 0xfffffffc;
+	
+	return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
+}
+
+static struct pci_ops quirk_pcie_aspm_ops = {
+	.read = quirk_pcie_aspm_read,
+	.write = quirk_pcie_aspm_write,
+};
+
+/*
+ * Prevents PCI Express ASPM (Active State Power Management) being enabled.
+ *
+ * Save the register offset, where the ASPM control bits are located,
+ * for each PCI Express device that is in the device list of
+ * the root port in an array for fast indexing. Replace the bus ops
+ * with the modified one.
+ */
+static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
+{
+	int cap_base, i;
+	struct pci_bus  *pbus;
+	struct pci_dev *dev;
+
+	if ((pbus = pdev->subordinate) == NULL)
+		return;
+
+	/*
+	 * Check if the DID of pdev matches one of the six root ports. This
+	 * check is needed in the case this function is called directly by the
+	 * hot-plug driver.
+	 */
+	if ((pdev->device < PCI_DEVICE_ID_INTEL_MCH_PA) ||
+	    (pdev->device > PCI_DEVICE_ID_INTEL_MCH_PC1))
+		return;
+
+	if (list_empty(&pbus->devices)) {
+		/*
+		 * If no device is attached to the root port at power-up or
+		 * after hot-remove, the pbus->devices is empty and this code
+		 * will set the offsets to zero and the bus ops to parent's bus
+		 * ops, which is unmodified.
+	 	 */
+		for (i= GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i)
+			quirk_aspm_offset[i] = 0;
+
+		pbus->ops = pbus->parent->ops;
+	} else {
+		/*
+		 * If devices are attached to the root port at power-up or
+		 * after hot-add, the code loops through the device list of
+		 * each root port to save the register offsets and replace the
+		 * bus ops.
+		 */
+		list_for_each_entry(dev, &pbus->devices, bus_list) {
+			/* There are 0 to 8 devices attached to this bus */
+			cap_base = pci_find_capability(dev, PCI_CAP_ID_EXP);
+			quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)]= cap_base + 0x10;
+		}
+		pbus->ops = &quirk_pcie_aspm_ops;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PA,	pcie_rootport_aspm_quirk );
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PA1,	pcie_rootport_aspm_quirk );
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PB,	pcie_rootport_aspm_quirk );
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PB1,	pcie_rootport_aspm_quirk );
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PC,	pcie_rootport_aspm_quirk );
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PC1,	pcie_rootport_aspm_quirk );
+
+/*
+ * Fixup to mark boot BIOS video selected by BIOS before it changes
+ *
+ * From information provided by "Jon Smirl" <jonsmirl@gmail.com>
+ *
+ * The standard boot ROM sequence for an x86 machine uses the BIOS
+ * to select an initial video card for boot display. This boot video 
+ * card will have it's BIOS copied to C0000 in system RAM. 
+ * IORESOURCE_ROM_SHADOW is used to associate the boot video
+ * card with this copy. On laptops this copy has to be used since
+ * the main ROM may be compressed or combined with another image.
+ * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW
+ * is marked here since the boot video device will be the only enabled
+ * video device at this point.
+ */
+
+static void __devinit pci_fixup_video(struct pci_dev *pdev)
+{
+	struct pci_dev *bridge;
+	struct pci_bus *bus;
+	u16 config;
+
+	if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+		return;
+
+	/* Is VGA routed to us? */
+	bus = pdev->bus;
+	while (bus) {
+		bridge = bus->self;
+		if (bridge) {
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+						&config);
+			if (!(config & PCI_BRIDGE_CTL_VGA))
+				return;
+		}
+		bus = bus->parent;
+	}
+	pci_read_config_word(pdev, PCI_COMMAND, &config);
+	if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
+		pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
+		printk(KERN_DEBUG "Boot video device is %s\n", pci_name(pdev));
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c
new file mode 100644
index 00000000000..c205ea7e233
--- /dev/null
+++ b/arch/i386/pci/i386.c
@@ -0,0 +1,304 @@
+/*
+ *	Low-Level PCI Access for i386 machines
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing
+ *      (Unix and Linux consulting and custom programming)
+ *      Drew@Colorado.EDU
+ *      +1 (303) 786-7975
+ *
+ * Drew's work was sponsored by:
+ *	iX Multiuser Multitasking Magazine
+ *	Hannover, Germany
+ *	hm@ix.de
+ *
+ * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/errno.h>
+
+#include "pci.h"
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void
+pcibios_align_resource(void *data, struct resource *res,
+		       unsigned long size, unsigned long align)
+{
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long start = res->start;
+
+		if (start & 0x300) {
+			start = (start + 0x3ff) & ~0x3ff;
+			res->start = start;
+		}
+	}
+}
+
+
+/*
+ *  Handle resources of PCI devices.  If the world were perfect, we could
+ *  just allocate all the resource regions and do nothing more.  It isn't.
+ *  On the other hand, we cannot just re-allocate all devices, as it would
+ *  require us to know lots of host bridge internals.  So we attempt to
+ *  keep as much of the original configuration as possible, but tweak it
+ *  when it's found to be wrong.
+ *
+ *  Known BIOS problems we have to work around:
+ *	- I/O or memory regions not configured
+ *	- regions configured, but not enabled in the command register
+ *	- bogus I/O addresses above 64K used
+ *	- expansion ROMs left enabled (this may sound harmless, but given
+ *	  the fact the PCI specs explicitly allow address decoders to be
+ *	  shared between expansion ROMs and other resource regions, it's
+ *	  at least dangerous)
+ *
+ *  Our solution:
+ *	(1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ *	    This gives us fixed barriers on where we can allocate.
+ *	(2) Allocate resources for all enabled devices.  If there is
+ *	    a collision, just mark the resource as unallocated. Also
+ *	    disable expansion ROMs during this step.
+ *	(3) Try to allocate resources for disabled devices.  If the
+ *	    resources were assigned correctly, everything goes well,
+ *	    if they weren't, they won't disturb allocation of other
+ *	    resources.
+ *	(4) Assign new addresses to resources which were either
+ *	    not configured at all or misconfigured.  If explicitly
+ *	    requested by the user, configure expansion ROM address
+ *	    as well.
+ */
+
+static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
+{
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+	int idx;
+	struct resource *r, *pr;
+
+	/* Depth-First Search on bus tree */
+	list_for_each_entry(bus, bus_list, node) {
+		if ((dev = bus->self)) {
+			for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+				r = &dev->resource[idx];
+				if (!r->start)
+					continue;
+				pr = pci_find_parent_resource(dev, r);
+				if (!pr || request_resource(pr, r) < 0)
+					printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, pci_name(dev));
+			}
+		}
+		pcibios_allocate_bus_resources(&bus->children);
+	}
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+	struct pci_dev *dev = NULL;
+	int idx, disabled;
+	u16 command;
+	struct resource *r, *pr;
+
+	for_each_pci_dev(dev) {
+		pci_read_config_word(dev, PCI_COMMAND, &command);
+		for(idx = 0; idx < 6; idx++) {
+			r = &dev->resource[idx];
+			if (r->parent)		/* Already allocated */
+				continue;
+			if (!r->start)		/* Address not assigned at all */
+				continue;
+			if (r->flags & IORESOURCE_IO)
+				disabled = !(command & PCI_COMMAND_IO);
+			else
+				disabled = !(command & PCI_COMMAND_MEMORY);
+			if (pass == disabled) {
+				DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
+				    r->start, r->end, r->flags, disabled, pass);
+				pr = pci_find_parent_resource(dev, r);
+				if (!pr || request_resource(pr, r) < 0) {
+					printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, pci_name(dev));
+					/* We'll assign a new address later */
+					r->end -= r->start;
+					r->start = 0;
+				}
+			}
+		}
+		if (!pass) {
+			r = &dev->resource[PCI_ROM_RESOURCE];
+			if (r->flags & IORESOURCE_ROM_ENABLE) {
+				/* Turn the ROM off, leave the resource region, but keep it unregistered. */
+				u32 reg;
+				DBG("PCI: Switching off ROM of %s\n", pci_name(dev));
+				r->flags &= ~IORESOURCE_ROM_ENABLE;
+				pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+				pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
+			}
+		}
+	}
+}
+
+static int __init pcibios_assign_resources(void)
+{
+	struct pci_dev *dev = NULL;
+	int idx;
+	struct resource *r;
+
+	for_each_pci_dev(dev) {
+		int class = dev->class >> 8;
+
+		/* Don't touch classless devices and host bridges */
+		if (!class || class == PCI_CLASS_BRIDGE_HOST)
+			continue;
+
+		for(idx=0; idx<6; idx++) {
+			r = &dev->resource[idx];
+
+			/*
+			 *  Don't touch IDE controllers and I/O ports of video cards!
+			 */
+			if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
+			    (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
+				continue;
+
+			/*
+			 *  We shall assign a new address to this resource, either because
+			 *  the BIOS forgot to do so or because we have decided the old
+			 *  address was unusable for some reason.
+			 */
+			if (!r->start && r->end)
+				pci_assign_resource(dev, idx);
+		}
+
+		if (pci_probe & PCI_ASSIGN_ROMS) {
+			r = &dev->resource[PCI_ROM_RESOURCE];
+			r->end -= r->start;
+			r->start = 0;
+			if (r->end)
+				pci_assign_resource(dev, PCI_ROM_RESOURCE);
+		}
+	}
+	return 0;
+}
+
+void __init pcibios_resource_survey(void)
+{
+	DBG("PCI: Allocating resources\n");
+	pcibios_allocate_bus_resources(&pci_root_buses);
+	pcibios_allocate_resources(0);
+	pcibios_allocate_resources(1);
+}
+
+/**
+ * called in fs_initcall (one below subsys_initcall),
+ * give a chance for motherboard reserve resources
+ */
+fs_initcall(pcibios_assign_resources);
+
+int pcibios_enable_resources(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	for(idx=0; idx<6; idx++) {
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<idx)))
+			continue;
+
+		r = &dev->resource[idx];
+		if (!r->start && r->end) {
+			printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev));
+			return -EINVAL;
+		}
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	if (dev->resource[PCI_ROM_RESOURCE].start)
+		cmd |= PCI_COMMAND_MEMORY;
+	if (cmd != old_cmd) {
+		printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check the latency
+ *  timer as certain crappy BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", pci_name(dev), lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	unsigned long prot;
+
+	/* I/O space cannot be accessed via normal processor loads and
+	 * stores on this platform.
+	 */
+	if (mmap_state == pci_mmap_io)
+		return -EINVAL;
+
+	/* Leave vm_pgoff as-is, the PCI space address is the physical
+	 * address on this platform.
+	 */
+	vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
+
+	prot = pgprot_val(vma->vm_page_prot);
+	if (boot_cpu_data.x86 > 3)
+		prot |= _PAGE_PCD | _PAGE_PWT;
+	vma->vm_page_prot = __pgprot(prot);
+
+	/* Write-combine setting is ignored, it is changed via the mtrr
+	 * interfaces on this platform.
+	 */
+	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			     vma->vm_end - vma->vm_start,
+			     vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
new file mode 100644
index 00000000000..1128451b5d7
--- /dev/null
+++ b/arch/i386/pci/irq.c
@@ -0,0 +1,1119 @@
+/*
+ *	Low-Level PCI Support for PC -- Routing of Interrupts
+ *
+ *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/io_apic.h>
+#include <asm/hw_irq.h>
+#include <linux/acpi.h>
+
+#include "pci.h"
+
+#define PIRQ_SIGNATURE	(('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
+#define PIRQ_VERSION 0x0100
+
+static int broken_hp_bios_irq9;
+static int acer_tm360_irqrouting;
+
+static struct irq_routing_table *pirq_table;
+
+static int pirq_enable_irq(struct pci_dev *dev);
+
+/*
+ * Never use: 0, 1, 2 (timer, keyboard, and cascade)
+ * Avoid using: 13, 14 and 15 (FP error and IDE).
+ * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
+ */
+unsigned int pcibios_irq_mask = 0xfff8;
+
+static int pirq_penalty[16] = {
+	1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
+	0, 0, 0, 0, 1000, 100000, 100000, 100000
+};
+
+struct irq_router {
+	char *name;
+	u16 vendor, device;
+	int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
+	int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+};
+
+struct irq_router_handler {
+	u16 vendor;
+	int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
+};
+
+int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL;
+
+/*
+ *  Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
+ */
+
+static struct irq_routing_table * __init pirq_find_routing_table(void)
+{
+	u8 *addr;
+	struct irq_routing_table *rt;
+	int i;
+	u8 sum;
+
+	for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
+		rt = (struct irq_routing_table *) addr;
+		if (rt->signature != PIRQ_SIGNATURE ||
+		    rt->version != PIRQ_VERSION ||
+		    rt->size % 16 ||
+		    rt->size < sizeof(struct irq_routing_table))
+			continue;
+		sum = 0;
+		for(i=0; i<rt->size; i++)
+			sum += addr[i];
+		if (!sum) {
+			DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+			return rt;
+		}
+	}
+	return NULL;
+}
+
+/*
+ *  If we have a IRQ routing table, use it to search for peer host
+ *  bridges.  It's a gross hack, but since there are no other known
+ *  ways how to get a list of buses, we have to go this way.
+ */
+
+static void __init pirq_peer_trick(void)
+{
+	struct irq_routing_table *rt = pirq_table;
+	u8 busmap[256];
+	int i;
+	struct irq_info *e;
+
+	memset(busmap, 0, sizeof(busmap));
+	for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
+		e = &rt->slots[i];
+#ifdef DEBUG
+		{
+			int j;
+			DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+			for(j=0; j<4; j++)
+				DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
+			DBG("\n");
+		}
+#endif
+		busmap[e->bus] = 1;
+	}
+	for(i = 1; i < 256; i++) {
+		if (!busmap[i] || pci_find_bus(0, i))
+			continue;
+		if (pci_scan_bus(i, &pci_root_ops, NULL))
+			printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
+	}
+	pcibios_last_bus = -1;
+}
+
+/*
+ *  Code for querying and setting of IRQ routes on various interrupt routers.
+ */
+
+void eisa_set_level_irq(unsigned int irq)
+{
+	unsigned char mask = 1 << (irq & 7);
+	unsigned int port = 0x4d0 + (irq >> 3);
+	unsigned char val;
+	static u16 eisa_irq_mask;
+
+	if (irq >= 16 || (1 << irq) & eisa_irq_mask)
+		return;
+
+	eisa_irq_mask |= (1 << irq);
+	printk("PCI: setting IRQ %u as level-triggered\n", irq);
+	val = inb(port);
+	if (!(val & mask)) {
+		DBG(" -> edge");
+		outb(val | mask, port);
+	}
+}
+
+/*
+ * Common IRQ routing practice: nybbles in config space,
+ * offset by some magic constant.
+ */
+static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
+{
+	u8 x;
+	unsigned reg = offset + (nr >> 1);
+
+	pci_read_config_byte(router, reg, &x);
+	return (nr & 1) ? (x >> 4) : (x & 0xf);
+}
+
+static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+{
+	u8 x;
+	unsigned reg = offset + (nr >> 1);
+
+	pci_read_config_byte(router, reg, &x);
+	x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
+	pci_write_config_byte(router, reg, x);
+}
+
+/*
+ * ALI pirq entries are damn ugly, and completely undocumented.
+ * This has been figured out from pirq tables, and it's not a pretty
+ * picture.
+ */
+static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+
+	return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+}
+
+static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+	unsigned int val = irqmap[irq];
+		
+	if (val) {
+		write_config_nybble(router, 0x48, pirq-1, val);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
+ * just a pointer to the config space.
+ */
+static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	u8 x;
+
+	pci_read_config_byte(router, pirq, &x);
+	return (x < 16) ? x : 0;
+}
+
+static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	pci_write_config_byte(router, pirq, irq);
+	return 1;
+}
+
+/*
+ * The VIA pirq rules are nibble-based, like ALI,
+ * but without the ugly irq number munging.
+ * However, PIRQD is in the upper instead of lower 4 bits.
+ */
+static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq);
+}
+
+static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq);
+	return 1;
+}
+
+/*
+ * ITE 8330G pirq rules are nibble-based
+ * FIXME: pirqmap may be { 1, 0, 3, 2 },
+ * 	  2+3 are both mapped to irq 9 on my system
+ */
+static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+	return read_config_nybble(router,0x43, pirqmap[pirq-1]);
+}
+
+static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+	write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
+	return 1;
+}
+
+/*
+ * OPTI: high four bits are nibble pointer..
+ * I wonder what the low bits do?
+ */
+static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	return read_config_nybble(router, 0xb8, pirq >> 4);
+}
+
+static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	write_config_nybble(router, 0xb8, pirq >> 4, irq);
+	return 1;
+}
+
+/*
+ * Cyrix: nibble offset 0x5C
+ * 0x5C bits 7:4 is INTB bits 3:0 is INTA 
+ * 0x5D bits 7:4 is INTD bits 3:0 is INTC
+ */
+static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	return read_config_nybble(router, 0x5C, (pirq-1)^1);
+}
+
+static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	write_config_nybble(router, 0x5C, (pirq-1)^1, irq);
+	return 1;
+}
+
+/*
+ *	PIRQ routing for SiS 85C503 router used in several SiS chipsets.
+ *	We have to deal with the following issues here:
+ *	- vendors have different ideas about the meaning of link values
+ *	- some onboard devices (integrated in the chipset) have special
+ *	  links and are thus routed differently (i.e. not via PCI INTA-INTD)
+ *	- different revision of the router have a different layout for
+ *	  the routing registers, particularly for the onchip devices
+ *
+ *	For all routing registers the common thing is we have one byte
+ *	per routeable link which is defined as:
+ *		 bit 7      IRQ mapping enabled (0) or disabled (1)
+ *		 bits [6:4] reserved (sometimes used for onchip devices)
+ *		 bits [3:0] IRQ to map to
+ *		     allowed: 3-7, 9-12, 14-15
+ *		     reserved: 0, 1, 2, 8, 13
+ *
+ *	The config-space registers located at 0x41/0x42/0x43/0x44 are
+ *	always used to route the normal PCI INT A/B/C/D respectively.
+ *	Apparently there are systems implementing PCI routing table using
+ *	link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
+ *	We try our best to handle both link mappings.
+ *	
+ *	Currently (2003-05-21) it appears most SiS chipsets follow the
+ *	definition of routing registers from the SiS-5595 southbridge.
+ *	According to the SiS 5595 datasheets the revision id's of the
+ *	router (ISA-bridge) should be 0x01 or 0xb0.
+ *
+ *	Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1.
+ *	Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets.
+ *	They seem to work with the current routing code. However there is
+ *	some concern because of the two USB-OHCI HCs (original SiS 5595
+ *	had only one). YMMV.
+ *
+ *	Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1:
+ *
+ *	0x61:	IDEIRQ:
+ *		bits [6:5] must be written 01
+ *		bit 4 channel-select primary (0), secondary (1)
+ *
+ *	0x62:	USBIRQ:
+ *		bit 6 OHCI function disabled (0), enabled (1)
+ *	
+ *	0x6a:	ACPI/SCI IRQ: bits 4-6 reserved
+ *
+ *	0x7e:	Data Acq. Module IRQ - bits 4-6 reserved
+ *
+ *	We support USBIRQ (in addition to INTA-INTD) and keep the
+ *	IDE, ACPI and DAQ routing untouched as set by the BIOS.
+ *
+ *	Currently the only reported exception is the new SiS 65x chipset
+ *	which includes the SiS 69x southbridge. Here we have the 85C503
+ *	router revision 0x04 and there are changes in the register layout
+ *	mostly related to the different USB HCs with USB 2.0 support.
+ *
+ *	Onchip routing for router rev-id 0x04 (try-and-error observation)
+ *
+ *	0x60/0x61/0x62/0x63:	1xEHCI and 3xOHCI (companion) USB-HCs
+ *				bit 6-4 are probably unused, not like 5595
+ */
+
+#define PIRQ_SIS_IRQ_MASK	0x0f
+#define PIRQ_SIS_IRQ_DISABLE	0x80
+#define PIRQ_SIS_USB_ENABLE	0x40
+
+static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	u8 x;
+	int reg;
+
+	reg = pirq;
+	if (reg >= 0x01 && reg <= 0x04)
+		reg += 0x40;
+	pci_read_config_byte(router, reg, &x);
+	return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
+}
+
+static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	u8 x;
+	int reg;
+
+	reg = pirq;
+	if (reg >= 0x01 && reg <= 0x04)
+		reg += 0x40;
+	pci_read_config_byte(router, reg, &x);
+	x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE);
+	x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE;
+	pci_write_config_byte(router, reg, x);
+	return 1;
+}
+
+
+/*
+ * VLSI: nibble offset 0x74 - educated guess due to routing table and
+ *       config space of VLSI 82C534 PCI-bridge/router (1004:0102)
+ *       Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
+ *       devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
+ *       for the busbridge to the docking station.
+ */
+
+static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	if (pirq > 8) {
+		printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+		return 0;
+	}
+	return read_config_nybble(router, 0x74, pirq-1);
+}
+
+static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	if (pirq > 8) {
+		printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+		return 0;
+	}
+	write_config_nybble(router, 0x74, pirq-1, irq);
+	return 1;
+}
+
+/*
+ * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
+ * and Redirect I/O registers (0x0c00 and 0x0c01).  The Index register
+ * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a.  The Redirect
+ * register is a straight binary coding of desired PIC IRQ (low nibble).
+ *
+ * The 'link' value in the PIRQ table is already in the correct format
+ * for the Index register.  There are some special index values:
+ * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
+ * and 0x03 for SMBus.
+ */
+static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	outb_p(pirq, 0xc00);
+	return inb(0xc01) & 0xf;
+}
+
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	outb_p(pirq, 0xc00);
+	outb_p(irq, 0xc01);
+	return 1;
+}
+
+/* Support for AMD756 PCI IRQ Routing
+ * Jhon H. Caicedo <jhcaiced@osso.org.co>
+ * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
+ * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
+ * The AMD756 pirq rules are nibble-based
+ * offset 0x56 0-3 PIRQA  4-7  PIRQB
+ * offset 0x57 0-3 PIRQC  4-7  PIRQD
+ */
+static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	u8 irq;
+	irq = 0;
+	if (pirq <= 4)
+	{
+		irq = read_config_nybble(router, 0x56, pirq - 1);
+	}
+	printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
+		dev->vendor, dev->device, pirq, irq);
+	return irq;
+}
+
+static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", 
+		dev->vendor, dev->device, pirq, irq);
+	if (pirq <= 4)
+	{
+		write_config_nybble(router, 0x56, pirq - 1, irq);
+	}
+	return 1;
+}
+
+#ifdef CONFIG_PCI_BIOS
+
+static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	struct pci_dev *bridge;
+	int pin = pci_get_interrupt_pin(dev, &bridge);
+	return pcibios_set_irq_routing(bridge, pin, irq);
+}
+
+#endif
+
+static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	static struct pci_device_id pirq_440gx[] = {
+		{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) },
+		{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) },
+		{ },
+	};
+
+	/* 440GX has a proprietary PIRQ router -- don't use it */
+	if (pci_dev_present(pirq_440gx))
+		return 0;
+
+	switch(device)
+	{
+		case PCI_DEVICE_ID_INTEL_82371FB_0:
+		case PCI_DEVICE_ID_INTEL_82371SB_0:
+		case PCI_DEVICE_ID_INTEL_82371AB_0:
+		case PCI_DEVICE_ID_INTEL_82371MX:
+		case PCI_DEVICE_ID_INTEL_82443MX_0:
+		case PCI_DEVICE_ID_INTEL_82801AA_0:
+		case PCI_DEVICE_ID_INTEL_82801AB_0:
+		case PCI_DEVICE_ID_INTEL_82801BA_0:
+		case PCI_DEVICE_ID_INTEL_82801BA_10:
+		case PCI_DEVICE_ID_INTEL_82801CA_0:
+		case PCI_DEVICE_ID_INTEL_82801CA_12:
+		case PCI_DEVICE_ID_INTEL_82801DB_0:
+		case PCI_DEVICE_ID_INTEL_82801E_0:
+		case PCI_DEVICE_ID_INTEL_82801EB_0:
+		case PCI_DEVICE_ID_INTEL_ESB_1:
+		case PCI_DEVICE_ID_INTEL_ICH6_0:
+		case PCI_DEVICE_ID_INTEL_ICH6_1:
+		case PCI_DEVICE_ID_INTEL_ICH7_0:
+		case PCI_DEVICE_ID_INTEL_ICH7_1:
+			r->name = "PIIX/ICH";
+			r->get = pirq_piix_get;
+			r->set = pirq_piix_set;
+			return 1;
+	}
+	return 0;
+}
+
+static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	/* FIXME: We should move some of the quirk fixup stuff here */
+	switch(device)
+	{
+		case PCI_DEVICE_ID_VIA_82C586_0:
+		case PCI_DEVICE_ID_VIA_82C596:
+		case PCI_DEVICE_ID_VIA_82C686:
+		case PCI_DEVICE_ID_VIA_8231:
+		/* FIXME: add new ones for 8233/5 */
+			r->name = "VIA";
+			r->get = pirq_via_get;
+			r->set = pirq_via_set;
+			return 1;
+	}
+	return 0;
+}
+
+static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	switch(device)
+	{
+		case PCI_DEVICE_ID_VLSI_82C534:
+			r->name = "VLSI 82C534";
+			r->get = pirq_vlsi_get;
+			r->set = pirq_vlsi_set;
+			return 1;
+	}
+	return 0;
+}
+
+
+static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	switch(device)
+	{
+		case PCI_DEVICE_ID_SERVERWORKS_OSB4:
+		case PCI_DEVICE_ID_SERVERWORKS_CSB5:
+			r->name = "ServerWorks";
+			r->get = pirq_serverworks_get;
+			r->set = pirq_serverworks_set;
+			return 1;
+	}
+	return 0;
+}
+
+static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	if (device != PCI_DEVICE_ID_SI_503)
+		return 0;
+		
+	r->name = "SIS";
+	r->get = pirq_sis_get;
+	r->set = pirq_sis_set;
+	return 1;
+}
+
+static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	switch(device)
+	{
+		case PCI_DEVICE_ID_CYRIX_5520:
+			r->name = "NatSemi";
+			r->get = pirq_cyrix_get;
+			r->set = pirq_cyrix_set;
+			return 1;
+	}
+	return 0;
+}
+
+static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	switch(device)
+	{
+		case PCI_DEVICE_ID_OPTI_82C700:
+			r->name = "OPTI";
+			r->get = pirq_opti_get;
+			r->set = pirq_opti_set;
+			return 1;
+	}
+	return 0;
+}
+
+static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	switch(device)
+	{
+		case PCI_DEVICE_ID_ITE_IT8330G_0:
+			r->name = "ITE";
+			r->get = pirq_ite_get;
+			r->set = pirq_ite_set;
+			return 1;
+	}
+	return 0;
+}
+
+static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	switch(device)
+	{
+	case PCI_DEVICE_ID_AL_M1533:
+	case PCI_DEVICE_ID_AL_M1563:
+		printk("PCI: Using ALI IRQ Router\n");
+			r->name = "ALI";
+			r->get = pirq_ali_get;
+			r->set = pirq_ali_set;
+			return 1;
+	}
+	return 0;
+}
+
+static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+	switch(device)
+	{
+		case PCI_DEVICE_ID_AMD_VIPER_740B:
+			r->name = "AMD756";
+			break;
+		case PCI_DEVICE_ID_AMD_VIPER_7413:
+			r->name = "AMD766";
+			break;
+		case PCI_DEVICE_ID_AMD_VIPER_7443:
+			r->name = "AMD768";
+			break;
+		default:
+			return 0;
+	}
+	r->get = pirq_amd756_get;
+	r->set = pirq_amd756_set;
+	return 1;
+}
+		
+static __initdata struct irq_router_handler pirq_routers[] = {
+	{ PCI_VENDOR_ID_INTEL, intel_router_probe },
+	{ PCI_VENDOR_ID_AL, ali_router_probe },
+	{ PCI_VENDOR_ID_ITE, ite_router_probe },
+	{ PCI_VENDOR_ID_VIA, via_router_probe },
+	{ PCI_VENDOR_ID_OPTI, opti_router_probe },
+	{ PCI_VENDOR_ID_SI, sis_router_probe },
+	{ PCI_VENDOR_ID_CYRIX, cyrix_router_probe },
+	{ PCI_VENDOR_ID_VLSI, vlsi_router_probe },
+	{ PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe },
+	{ PCI_VENDOR_ID_AMD, amd_router_probe },
+	/* Someone with docs needs to add the ATI Radeon IGP */
+	{ 0, NULL }
+};
+static struct irq_router pirq_router;
+static struct pci_dev *pirq_router_dev;
+
+
+/*
+ *	FIXME: should we have an option to say "generic for
+ *	chipset" ?
+ */
+ 
+static void __init pirq_find_router(struct irq_router *r)
+{
+	struct irq_routing_table *rt = pirq_table;
+	struct irq_router_handler *h;
+
+#ifdef CONFIG_PCI_BIOS
+	if (!rt->signature) {
+		printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
+		r->set = pirq_bios_set;
+		r->name = "BIOS";
+		return;
+	}
+#endif
+
+	/* Default unless a driver reloads it */
+	r->name = "default";
+	r->get = NULL;
+	r->set = NULL;
+	
+	DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
+	    rt->rtr_vendor, rt->rtr_device);
+
+	pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
+	if (!pirq_router_dev) {
+		DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
+		return;
+	}
+
+	for( h = pirq_routers; h->vendor; h++) {
+		/* First look for a router match */
+		if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
+			break;
+		/* Fall back to a device match */
+		if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
+			break;
+	}
+	printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
+		pirq_router.name,
+		pirq_router_dev->vendor,
+		pirq_router_dev->device,
+		pci_name(pirq_router_dev));
+}
+
+static struct irq_info *pirq_get_info(struct pci_dev *dev)
+{
+	struct irq_routing_table *rt = pirq_table;
+	int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+	struct irq_info *info;
+
+	for (info = rt->slots; entries--; info++)
+		if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+			return info;
+	return NULL;
+}
+
+static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
+{
+	u8 pin;
+	struct irq_info *info;
+	int i, pirq, newirq;
+	int irq = 0;
+	u32 mask;
+	struct irq_router *r = &pirq_router;
+	struct pci_dev *dev2 = NULL;
+	char *msg = NULL;
+
+	/* Find IRQ pin */
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	if (!pin) {
+		DBG(" -> no interrupt pin\n");
+		return 0;
+	}
+	pin = pin - 1;
+
+	/* Find IRQ routing entry */
+
+	if (!pirq_table)
+		return 0;
+	
+	DBG("IRQ for %s[%c]", pci_name(dev), 'A' + pin);
+	info = pirq_get_info(dev);
+	if (!info) {
+		DBG(" -> not found in routing table\n");
+		return 0;
+	}
+	pirq = info->irq[pin].link;
+	mask = info->irq[pin].bitmap;
+	if (!pirq) {
+		DBG(" -> not routed\n");
+		return 0;
+	}
+	DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+	mask &= pcibios_irq_mask;
+
+	/* Work around broken HP Pavilion Notebooks which assign USB to
+	   IRQ 9 even though it is actually wired to IRQ 11 */
+
+	if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) {
+		dev->irq = 11;
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
+		r->set(pirq_router_dev, dev, pirq, 11);
+	}
+
+	/* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
+	if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) {
+		pirq = 0x68;
+		mask = 0x400;
+		dev->irq = r->get(pirq_router_dev, dev, pirq);
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+	}
+
+	/*
+	 * Find the best IRQ to assign: use the one
+	 * reported by the device if possible.
+	 */
+	newirq = dev->irq;
+	if (!((1 << newirq) & mask)) {
+		if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0;
+		else printk(KERN_WARNING "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n", newirq, pci_name(dev));
+	}
+	if (!newirq && assign) {
+		for (i = 0; i < 16; i++) {
+			if (!(mask & (1 << i)))
+				continue;
+			if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, SA_SHIRQ))
+				newirq = i;
+		}
+	}
+	DBG(" -> newirq=%d", newirq);
+
+	/* Check if it is hardcoded */
+	if ((pirq & 0xf0) == 0xf0) {
+		irq = pirq & 0xf;
+		DBG(" -> hardcoded IRQ %d\n", irq);
+		msg = "Hardcoded";
+	} else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
+	((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) {
+		DBG(" -> got IRQ %d\n", irq);
+		msg = "Found";
+	} else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+		DBG(" -> assigning IRQ %d", newirq);
+		if (r->set(pirq_router_dev, dev, pirq, newirq)) {
+			eisa_set_level_irq(newirq);
+			DBG(" ... OK\n");
+			msg = "Assigned";
+			irq = newirq;
+		}
+	}
+
+	if (!irq) {
+		DBG(" ... failed\n");
+		if (newirq && mask == (1 << newirq)) {
+			msg = "Guessed";
+			irq = newirq;
+		} else
+			return 0;
+	}
+	printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev));
+
+	/* Update IRQ for all devices with the same pirq value */
+	while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
+		pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
+		if (!pin)
+			continue;
+		pin--;
+		info = pirq_get_info(dev2);
+		if (!info)
+			continue;
+		if (info->irq[pin].link == pirq) {
+			/* We refuse to override the dev->irq information. Give a warning! */
+		    	if ( dev2->irq && dev2->irq != irq && \
+			(!(pci_probe & PCI_USE_PIRQ_MASK) || \
+			((1 << dev2->irq) & mask)) ) {
+#ifndef CONFIG_PCI_MSI
+		    		printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
+				       pci_name(dev2), dev2->irq, irq);
+#endif
+		    		continue;
+		    	}
+			dev2->irq = irq;
+			pirq_penalty[irq]++;
+			if (dev != dev2)
+				printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2));
+		}
+	}
+	return 1;
+}
+
+static void __init pcibios_fixup_irqs(void)
+{
+	struct pci_dev *dev = NULL;
+	u8 pin;
+
+	DBG("PCI: IRQ fixup\n");
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		/*
+		 * If the BIOS has set an out of range IRQ number, just ignore it.
+		 * Also keep track of which IRQ's are already in use.
+		 */
+		if (dev->irq >= 16) {
+			DBG("%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq);
+			dev->irq = 0;
+		}
+		/* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
+		if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+			pirq_penalty[dev->irq] = 0;
+		pirq_penalty[dev->irq]++;
+	}
+
+	dev = NULL;
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+#ifdef CONFIG_X86_IO_APIC
+		/*
+		 * Recalculate IRQ numbers if we use the I/O APIC.
+		 */
+		if (io_apic_assign_pci_irqs)
+		{
+			int irq;
+
+			if (pin) {
+				pin--;		/* interrupt pins are numbered starting from 1 */
+				irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+	/*
+	 * Busses behind bridges are typically not listed in the MP-table.
+	 * In this case we have to look up the IRQ based on the parent bus,
+	 * parent slot, and pin number. The SMP code detects such bridged
+	 * busses itself so we should get into this branch reliably.
+	 */
+				if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+					struct pci_dev * bridge = dev->bus->self;
+
+					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+					irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 
+							PCI_SLOT(bridge->devfn), pin);
+					if (irq >= 0)
+						printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
+							pci_name(bridge), 'A' + pin, irq);
+				}
+				if (irq >= 0) {
+					if (use_pci_vector() &&
+						!platform_legacy_irq(irq))
+						irq = IO_APIC_VECTOR(irq);
+
+					printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+						pci_name(dev), 'A' + pin, irq);
+					dev->irq = irq;
+				}
+			}
+		}
+#endif
+		/*
+		 * Still no IRQ? Try to lookup one...
+		 */
+		if (pin && !dev->irq)
+			pcibios_lookup_irq(dev, 0);
+	}
+}
+
+/*
+ * Work around broken HP Pavilion Notebooks which assign USB to
+ * IRQ 9 even though it is actually wired to IRQ 11
+ */
+static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d)
+{
+	if (!broken_hp_bios_irq9) {
+		broken_hp_bios_irq9 = 1;
+		printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+	}
+	return 0;
+}
+
+/*
+ * Work around broken Acer TravelMate 360 Notebooks which assign
+ * Cardbus to IRQ 11 even though it is actually wired to IRQ 10
+ */
+static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d)
+{
+	if (!acer_tm360_irqrouting) {
+		acer_tm360_irqrouting = 1;
+		printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+	}
+	return 0;
+}
+
+static struct dmi_system_id __initdata pciirq_dmi_table[] = {
+	{
+		.callback = fix_broken_hp_bios_irq9,
+		.ident = "HP Pavilion N5400 Series Laptop",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"),
+			DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
+		},
+	},
+	{
+		.callback = fix_acer_tm360_irqrouting,
+		.ident = "Acer TravelMate 36x Laptop",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+		},
+	},
+	{ }
+};
+
+static int __init pcibios_irq_init(void)
+{
+	DBG("PCI: IRQ init\n");
+
+	if (pcibios_enable_irq || raw_pci_ops == NULL)
+		return 0;
+
+	dmi_check_system(pciirq_dmi_table);
+
+	pirq_table = pirq_find_routing_table();
+
+#ifdef CONFIG_PCI_BIOS
+	if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
+		pirq_table = pcibios_get_irq_routing_table();
+#endif
+	if (pirq_table) {
+		pirq_peer_trick();
+		pirq_find_router(&pirq_router);
+		if (pirq_table->exclusive_irqs) {
+			int i;
+			for (i=0; i<16; i++)
+				if (!(pirq_table->exclusive_irqs & (1 << i)))
+					pirq_penalty[i] += 100;
+		}
+		/* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+		if (io_apic_assign_pci_irqs)
+			pirq_table = NULL;
+	}
+
+	pcibios_enable_irq = pirq_enable_irq;
+
+	pcibios_fixup_irqs();
+	return 0;
+}
+
+subsys_initcall(pcibios_irq_init);
+
+
+static void pirq_penalize_isa_irq(int irq)
+{
+	/*
+	 *  If any ISAPnP device reports an IRQ in its list of possible
+	 *  IRQ's, we try to avoid assigning it to PCI devices.
+	 */
+	if (irq < 16)
+		pirq_penalty[irq] += 100;
+}
+
+void pcibios_penalize_isa_irq(int irq)
+{
+#ifdef CONFIG_ACPI_PCI
+	if (!acpi_noirq)
+		acpi_penalize_isa_irq(irq);
+	else
+#endif
+		pirq_penalize_isa_irq(irq);
+}
+
+static int pirq_enable_irq(struct pci_dev *dev)
+{
+	u8 pin;
+	extern int via_interrupt_line_quirk;
+	struct pci_dev *temp_dev;
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+		char *msg = "";
+
+		pin--;		/* interrupt pins are numbered starting from 1 */
+
+		if (io_apic_assign_pci_irqs) {
+			int irq;
+
+			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+			/*
+			 * Busses behind bridges are typically not listed in the MP-table.
+			 * In this case we have to look up the IRQ based on the parent bus,
+			 * parent slot, and pin number. The SMP code detects such bridged
+			 * busses itself so we should get into this branch reliably.
+			 */
+			temp_dev = dev;
+			while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+				struct pci_dev * bridge = dev->bus->self;
+
+				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 
+						PCI_SLOT(bridge->devfn), pin);
+				if (irq >= 0)
+					printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
+						pci_name(bridge), 'A' + pin, irq);
+				dev = bridge;
+			}
+			dev = temp_dev;
+			if (irq >= 0) {
+#ifdef CONFIG_PCI_MSI
+				if (!platform_legacy_irq(irq))
+					irq = IO_APIC_VECTOR(irq);
+#endif
+				printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+					pci_name(dev), 'A' + pin, irq);
+				dev->irq = irq;
+				return 0;
+			} else
+				msg = " Probably buggy MP table.";
+		} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
+			msg = "";
+		else
+			msg = " Please try using pci=biosirq.";
+
+		/* With IDE legacy devices the IRQ lookup failure is not a problem.. */
+		if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
+			return 0;
+
+		printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+		       'A' + pin, pci_name(dev), msg);
+	}
+	/* VIA bridges use interrupt line for apic/pci steering across
+	   the V-Link */
+	else if (via_interrupt_line_quirk)
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq & 15);
+	return 0;
+}
+
+int pci_vector_resources(int last, int nr_released)
+{
+	int count = nr_released;
+
+	int next = last;
+	int offset = (last % 8);
+
+	while (next < FIRST_SYSTEM_VECTOR) {
+		next += 8;
+#ifdef CONFIG_X86_64
+		if (next == IA32_SYSCALL_VECTOR)
+			continue;
+#else
+		if (next == SYSCALL_VECTOR)
+			continue;
+#endif
+		count++;
+		if (next >= FIRST_SYSTEM_VECTOR) {
+			if (offset%8) {
+				next = FIRST_DEVICE_VECTOR + offset;
+				offset++;
+				continue;
+			}
+			count--;
+		}
+	}
+
+	return count;
+}
diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c
new file mode 100644
index 00000000000..1492e375386
--- /dev/null
+++ b/arch/i386/pci/legacy.c
@@ -0,0 +1,54 @@
+/*
+ * legacy.c - traditional, old school PCI bus probing
+ */
+#include <linux/init.h>
+#include <linux/pci.h>
+#include "pci.h"
+
+/*
+ * Discover remaining PCI buses in case there are peer host bridges.
+ * We use the number of last PCI bus provided by the PCI BIOS.
+ */
+static void __devinit pcibios_fixup_peer_bridges(void)
+{
+	int n, devfn;
+
+	if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
+		return;
+	DBG("PCI: Peer bridge fixup\n");
+
+	for (n=0; n <= pcibios_last_bus; n++) {
+		u32 l;
+		if (pci_find_bus(0, n))
+			continue;
+		for (devfn = 0; devfn < 256; devfn += 8) {
+			if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
+			    l != 0x0000 && l != 0xffff) {
+				DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
+				printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
+				pci_scan_bus(n, &pci_root_ops, NULL);
+				break;
+			}
+		}
+	}
+}
+
+static int __init pci_legacy_init(void)
+{
+	if (!raw_pci_ops) {
+		printk("PCI: System does not support PCI\n");
+		return 0;
+	}
+
+	if (pcibios_scanned++)
+		return 0;
+
+	printk("PCI: Probing PCI hardware\n");
+	pci_root_bus = pcibios_scan_root(0);
+
+	pcibios_fixup_peer_bridges();
+
+	return 0;
+}
+
+subsys_initcall(pci_legacy_init);
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
new file mode 100644
index 00000000000..021a50aa51f
--- /dev/null
+++ b/arch/i386/pci/mmconfig.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2004 Matthew Wilcox <matthew@wil.cx>
+ * Copyright (C) 2004 Intel Corp.
+ *
+ * This code is released under the GNU General Public License version 2.
+ */
+
+/*
+ * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+u32 pci_mmcfg_base_addr;
+
+#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
+
+/* The base address of the last MMCONFIG device accessed */
+static u32 mmcfg_last_accessed_device;
+
+/*
+ * Functions for accessing PCI configuration space with MMCONFIG accesses
+ */
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+	u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+	if (dev_base != mmcfg_last_accessed_device) {
+		mmcfg_last_accessed_device = dev_base;
+		set_fixmap_nocache(FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
+			  unsigned int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		*value = readb(mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		*value = readw(mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		*value = readl(mmcfg_virt_addr + reg);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
+			   unsigned int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+
+	if ((bus > 255) || (devfn > 255) || (reg > 4095)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		writeb(value, mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		writew(value, mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		writel(value, mmcfg_virt_addr + reg);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static struct pci_raw_ops pci_mmcfg = {
+	.read =		pci_mmcfg_read,
+	.write =	pci_mmcfg_write,
+};
+
+static int __init pci_mmcfg_init(void)
+{
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+		goto out;
+	if (!pci_mmcfg_base_addr)
+		goto out;
+
+	/* Kludge for now. Don't use mmconfig on AMD systems because
+	   those have some busses where mmconfig doesn't work,
+	   and we don't parse ACPI MCFG well enough to handle that. 
+	   Remove when proper handling is added. */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		goto out; 
+
+	printk(KERN_INFO "PCI: Using MMCONFIG\n");
+	raw_pci_ops = &pci_mmcfg;
+	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+
+ out:
+	return 0;
+}
+
+arch_initcall(pci_mmcfg_init);
diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c
new file mode 100644
index 00000000000..9e369546189
--- /dev/null
+++ b/arch/i386/pci/numa.c
@@ -0,0 +1,130 @@
+/*
+ * numa.c - Low-level PCI access for NUMA-Q machines
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/nodemask.h>
+#include "pci.h"
+
+#define BUS2QUAD(global) (mp_bus_id_to_node[global])
+#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
+#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+
+#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
+	(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
+
+static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
+			     unsigned int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+
+	if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+
+	switch (len) {
+	case 1:
+		*value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+		break;
+	case 2:
+		*value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+		break;
+	case 4:
+		*value = inl_quad(0xCFC, BUS2QUAD(bus));
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
+			      unsigned int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+
+	if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+
+	switch (len) {
+	case 1:
+		outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+		break;
+	case 2:
+		outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+		break;
+	case 4:
+		outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+#undef PCI_CONF1_MQ_ADDRESS
+
+static struct pci_raw_ops pci_direct_conf1_mq = {
+	.read	= pci_conf1_mq_read,
+	.write	= pci_conf1_mq_write
+};
+
+
+static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+{
+	/*
+	 * i450NX -- Find and scan all secondary buses on all PXB's.
+	 */
+	int pxb, reg;
+	u8 busno, suba, subb;
+	int quad = BUS2QUAD(d->bus->number);
+
+	printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
+	reg = 0xd0;
+	for(pxb=0; pxb<2; pxb++) {
+		pci_read_config_byte(d, reg++, &busno);
+		pci_read_config_byte(d, reg++, &suba);
+		pci_read_config_byte(d, reg++, &subb);
+		DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
+		if (busno)
+			pci_scan_bus(QUADLOCAL2BUS(quad,busno), &pci_root_ops, NULL);	/* Bus A */
+		if (suba < subb)
+			pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), &pci_root_ops, NULL);	/* Bus B */
+	}
+	pcibios_last_bus = -1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
+
+static int __init pci_numa_init(void)
+{
+	int quad;
+
+	raw_pci_ops = &pci_direct_conf1_mq;
+
+	if (pcibios_scanned++)
+		return 0;
+
+	pci_root_bus = pcibios_scan_root(0);
+	if (num_online_nodes() > 1)
+		for_each_online_node(quad) {
+			if (quad == 0)
+				continue;
+			printk("Scanning PCI bus %d for quad %d\n", 
+				QUADLOCAL2BUS(quad,0), quad);
+			pci_scan_bus(QUADLOCAL2BUS(quad,0), 
+				&pci_root_ops, NULL);
+		}
+	return 0;
+}
+
+subsys_initcall(pci_numa_init);
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
new file mode 100644
index 00000000000..141421b673b
--- /dev/null
+++ b/arch/i386/pci/pcbios.c
@@ -0,0 +1,487 @@
+/*
+ * BIOS32 and PCI BIOS handling.
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+#include "pci-functions.h"
+
+
+/* BIOS32 signature: "_32_" */
+#define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+
+/* PCI signature: "PCI " */
+#define PCI_SIGNATURE		(('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
+
+/* PCI service signature: "$PCI" */
+#define PCI_SERVICE		(('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
+
+/* PCI BIOS hardware mechanism flags */
+#define PCIBIOS_HW_TYPE1		0x01
+#define PCIBIOS_HW_TYPE2		0x02
+#define PCIBIOS_HW_TYPE1_SPEC		0x10
+#define PCIBIOS_HW_TYPE2_SPEC		0x20
+
+/*
+ * This is the standard structure used to identify the entry point
+ * to the BIOS32 Service Directory, as documented in
+ * 	Standard BIOS 32-bit Service Directory Proposal
+ * 	Revision 0.4 May 24, 1993
+ * 	Phoenix Technologies Ltd.
+ *	Norwood, MA
+ * and the PCI BIOS specification.
+ */
+
+union bios32 {
+	struct {
+		unsigned long signature;	/* _32_ */
+		unsigned long entry;		/* 32 bit physical address */
+		unsigned char revision;		/* Revision level, 0 */
+		unsigned char length;		/* Length in paragraphs should be 01 */
+		unsigned char checksum;		/* All bytes must add up to zero */
+		unsigned char reserved[5]; 	/* Must be zero */
+	} fields;
+	char chars[16];
+};
+
+/*
+ * Physical address of the service directory.  I don't know if we're
+ * allowed to have more than one of these or not, so just in case
+ * we'll make pcibios_present() take a memory start parameter and store
+ * the array there.
+ */
+
+static struct {
+	unsigned long address;
+	unsigned short segment;
+} bios32_indirect = { 0, __KERNEL_CS };
+
+/*
+ * Returns the entry point for the given service, NULL on error
+ */
+
+static unsigned long bios32_service(unsigned long service)
+{
+	unsigned char return_code;	/* %al */
+	unsigned long address;		/* %ebx */
+	unsigned long length;		/* %ecx */
+	unsigned long entry;		/* %edx */
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__asm__("lcall *(%%edi); cld"
+		: "=a" (return_code),
+		  "=b" (address),
+		  "=c" (length),
+		  "=d" (entry)
+		: "0" (service),
+		  "1" (0),
+		  "D" (&bios32_indirect));
+	local_irq_restore(flags);
+
+	switch (return_code) {
+		case 0:
+			return address + entry;
+		case 0x80:	/* Not present */
+			printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
+			return 0;
+		default: /* Shouldn't happen */
+			printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
+				service, return_code);
+			return 0;
+	}
+}
+
+static struct {
+	unsigned long address;
+	unsigned short segment;
+} pci_indirect = { 0, __KERNEL_CS };
+
+static int pci_bios_present;
+
+static int __devinit check_pcibios(void)
+{
+	u32 signature, eax, ebx, ecx;
+	u8 status, major_ver, minor_ver, hw_mech;
+	unsigned long flags, pcibios_entry;
+
+	if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
+		pci_indirect.address = pcibios_entry + PAGE_OFFSET;
+
+		local_irq_save(flags);
+		__asm__(
+			"lcall *(%%edi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=d" (signature),
+			  "=a" (eax),
+			  "=b" (ebx),
+			  "=c" (ecx)
+			: "1" (PCIBIOS_PCI_BIOS_PRESENT),
+			  "D" (&pci_indirect)
+			: "memory");
+		local_irq_restore(flags);
+
+		status = (eax >> 8) & 0xff;
+		hw_mech = eax & 0xff;
+		major_ver = (ebx >> 8) & 0xff;
+		minor_ver = ebx & 0xff;
+		if (pcibios_last_bus < 0)
+			pcibios_last_bus = ecx & 0xff;
+		DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n",
+			status, hw_mech, major_ver, minor_ver, pcibios_last_bus);
+		if (status || signature != PCI_SIGNATURE) {
+			printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n",
+				status, signature);
+			return 0;
+		}
+		printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n",
+			major_ver, minor_ver, pcibios_entry, pcibios_last_bus);
+#ifdef CONFIG_PCI_DIRECT
+		if (!(hw_mech & PCIBIOS_HW_TYPE1))
+			pci_probe &= ~PCI_PROBE_CONF1;
+		if (!(hw_mech & PCIBIOS_HW_TYPE2))
+			pci_probe &= ~PCI_PROBE_CONF2;
+#endif
+		return 1;
+	}
+	return 0;
+}
+
+static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id,
+					unsigned short index, unsigned char *bus, unsigned char *device_fn)
+{
+	unsigned short bx;
+	unsigned short ret;
+
+	__asm__("lcall *(%%edi); cld\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=b" (bx),
+		  "=a" (ret)
+		: "1" (PCIBIOS_FIND_PCI_DEVICE),
+		  "c" (device_id),
+		  "d" (vendor),
+		  "S" ((int) index),
+		  "D" (&pci_indirect));
+	*bus = (bx >> 8) & 0xff;
+	*device_fn = bx & 0xff;
+	return (int) (ret & 0xff00) >> 8;
+}
+
+static int pci_bios_read(unsigned int seg, unsigned int bus,
+			 unsigned int devfn, int reg, int len, u32 *value)
+{
+	unsigned long result = 0;
+	unsigned long flags;
+	unsigned long bx = (bus << 8) | devfn;
+
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 255))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	switch (len) {
+	case 1:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=c" (*value),
+			  "=a" (result)
+			: "1" (PCIBIOS_READ_CONFIG_BYTE),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	case 2:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=c" (*value),
+			  "=a" (result)
+			: "1" (PCIBIOS_READ_CONFIG_WORD),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	case 4:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=c" (*value),
+			  "=a" (result)
+			: "1" (PCIBIOS_READ_CONFIG_DWORD),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_write(unsigned int seg, unsigned int bus,
+			  unsigned int devfn, int reg, int len, u32 value)
+{
+	unsigned long result = 0;
+	unsigned long flags;
+	unsigned long bx = (bus << 8) | devfn;
+
+	if ((bus > 255) || (devfn > 255) || (reg > 255)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	switch (len) {
+	case 1:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=a" (result)
+			: "0" (PCIBIOS_WRITE_CONFIG_BYTE),
+			  "c" (value),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	case 2:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=a" (result)
+			: "0" (PCIBIOS_WRITE_CONFIG_WORD),
+			  "c" (value),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	case 4:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=a" (result)
+			: "0" (PCIBIOS_WRITE_CONFIG_DWORD),
+			  "c" (value),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return (int)((result & 0xff00) >> 8);
+}
+
+
+/*
+ * Function table for BIOS32 access
+ */
+
+static struct pci_raw_ops pci_bios_access = {
+	.read =		pci_bios_read,
+	.write =	pci_bios_write
+};
+
+/*
+ * Try to find PCI BIOS.
+ */
+
+static struct pci_raw_ops * __devinit pci_find_bios(void)
+{
+	union bios32 *check;
+	unsigned char sum;
+	int i, length;
+
+	/*
+	 * Follow the standard procedure for locating the BIOS32 Service
+	 * directory by scanning the permissible address range from
+	 * 0xe0000 through 0xfffff for a valid BIOS32 structure.
+	 */
+
+	for (check = (union bios32 *) __va(0xe0000);
+	     check <= (union bios32 *) __va(0xffff0);
+	     ++check) {
+		if (check->fields.signature != BIOS32_SIGNATURE)
+			continue;
+		length = check->fields.length * 16;
+		if (!length)
+			continue;
+		sum = 0;
+		for (i = 0; i < length ; ++i)
+			sum += check->chars[i];
+		if (sum != 0)
+			continue;
+		if (check->fields.revision != 0) {
+			printk("PCI: unsupported BIOS32 revision %d at 0x%p\n",
+				check->fields.revision, check);
+			continue;
+		}
+		DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
+		if (check->fields.entry >= 0x100000) {
+			printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
+			return NULL;
+		} else {
+			unsigned long bios32_entry = check->fields.entry;
+			DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+			bios32_indirect.address = bios32_entry + PAGE_OFFSET;
+			if (check_pcibios())
+				return &pci_bios_access;
+		}
+		break;	/* Hopefully more than one BIOS32 cannot happen... */
+	}
+
+	return NULL;
+}
+
+/*
+ * Sort the device list according to PCI BIOS. Nasty hack, but since some
+ * fool forgot to define the `correct' device order in the PCI BIOS specs
+ * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels
+ * which used BIOS ordering, we are bound to do this...
+ */
+
+void __devinit pcibios_sort(void)
+{
+	LIST_HEAD(sorted_devices);
+	struct list_head *ln;
+	struct pci_dev *dev, *d;
+	int idx, found;
+	unsigned char bus, devfn;
+
+	DBG("PCI: Sorting device list...\n");
+	while (!list_empty(&pci_devices)) {
+		ln = pci_devices.next;
+		dev = pci_dev_g(ln);
+		idx = found = 0;
+		while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
+			idx++;
+			list_for_each(ln, &pci_devices) {
+				d = pci_dev_g(ln);
+				if (d->bus->number == bus && d->devfn == devfn) {
+					list_del(&d->global_list);
+					list_add_tail(&d->global_list, &sorted_devices);
+					if (d == dev)
+						found = 1;
+					break;
+				}
+			}
+			if (ln == &pci_devices) {
+				printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
+				/*
+				 * We must not continue scanning as several buggy BIOSes
+				 * return garbage after the last device. Grr.
+				 */
+				break;
+			}
+		}
+		if (!found) {
+			printk(KERN_WARNING "PCI: Device %s not found by BIOS\n",
+				pci_name(dev));
+			list_del(&dev->global_list);
+			list_add_tail(&dev->global_list, &sorted_devices);
+		}
+	}
+	list_splice(&sorted_devices, &pci_devices);
+}
+
+/*
+ *  BIOS Functions for IRQ Routing
+ */
+
+struct irq_routing_options {
+	u16 size;
+	struct irq_info *table;
+	u16 segment;
+} __attribute__((packed));
+
+struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
+{
+	struct irq_routing_options opt;
+	struct irq_routing_table *rt = NULL;
+	int ret, map;
+	unsigned long page;
+
+	if (!pci_bios_present)
+		return NULL;
+	page = __get_free_page(GFP_KERNEL);
+	if (!page)
+		return NULL;
+	opt.table = (struct irq_info *) page;
+	opt.size = PAGE_SIZE;
+	opt.segment = __KERNEL_DS;
+
+	DBG("PCI: Fetching IRQ routing table... ");
+	__asm__("push %%es\n\t"
+		"push %%ds\n\t"
+		"pop  %%es\n\t"
+		"lcall *(%%esi); cld\n\t"
+		"pop %%es\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=a" (ret),
+		  "=b" (map),
+		  "=m" (opt)
+		: "0" (PCIBIOS_GET_ROUTING_OPTIONS),
+		  "1" (0),
+		  "D" ((long) &opt),
+		  "S" (&pci_indirect),
+		  "m" (opt)
+		: "memory");
+	DBG("OK  ret=%d, size=%d, map=%x\n", ret, opt.size, map);
+	if (ret & 0xff00)
+		printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
+	else if (opt.size) {
+		rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL);
+		if (rt) {
+			memset(rt, 0, sizeof(struct irq_routing_table));
+			rt->size = opt.size + sizeof(struct irq_routing_table);
+			rt->exclusive_irqs = map;
+			memcpy(rt->slots, (void *) page, opt.size);
+			printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n");
+		}
+	}
+	free_page(page);
+	return rt;
+}
+
+
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
+{
+	int ret;
+
+	__asm__("lcall *(%%esi); cld\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=a" (ret)
+		: "0" (PCIBIOS_SET_PCI_HW_INT),
+		  "b" ((dev->bus->number << 8) | dev->devfn),
+		  "c" ((irq << 8) | (pin + 10)),
+		  "S" (&pci_indirect));
+	return !(ret & 0xff00);
+}
+
+static int __init pci_pcbios_init(void)
+{
+	if ((pci_probe & PCI_PROBE_BIOS) 
+		&& ((raw_pci_ops = pci_find_bios()))) {
+		pci_probe |= PCI_BIOS_SORT;
+		pci_bios_present = 1;
+	}
+	return 0;
+}
+
+arch_initcall(pci_pcbios_init);
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
new file mode 100644
index 00000000000..a8fc80ca69f
--- /dev/null
+++ b/arch/i386/pci/pci.h
@@ -0,0 +1,74 @@
+/*
+ *	Low-Level PCI Access for i386 machines.
+ *
+ *	(c) 1999 Martin Mares <mj@ucw.cz>
+ */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS		0x0001
+#define PCI_PROBE_CONF1		0x0002
+#define PCI_PROBE_CONF2		0x0004
+#define PCI_PROBE_MMCONF	0x0008
+#define PCI_PROBE_MASK		0x000f
+
+#define PCI_NO_SORT		0x0100
+#define PCI_BIOS_SORT		0x0200
+#define PCI_NO_CHECKS		0x0400
+#define PCI_USE_PIRQ_MASK	0x0800
+#define PCI_ASSIGN_ROMS		0x1000
+#define PCI_BIOS_IRQ_SCAN	0x2000
+#define PCI_ASSIGN_ALL_BUSSES	0x4000
+
+extern unsigned int pci_probe;
+
+/* pci-i386.c */
+
+extern unsigned int pcibios_max_latency;
+
+void pcibios_resource_survey(void);
+int pcibios_enable_resources(struct pci_dev *, int);
+
+/* pci-pc.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops pci_root_ops;
+
+/* pci-irq.c */
+
+struct irq_info {
+	u8 bus, devfn;			/* Bus, device and function */
+	struct {
+		u8 link;		/* IRQ line ID, chipset dependent, 0=not routed */
+		u16 bitmap;		/* Available IRQs */
+	} __attribute__((packed)) irq[4];
+	u8 slot;			/* Slot number, 0=onboard */
+	u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+	u32 signature;			/* PIRQ_SIGNATURE should be here */
+	u16 version;			/* PIRQ_VERSION */
+	u16 size;			/* Table size in bytes */
+	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
+	u16 exclusive_irqs;		/* IRQs devoted exclusively to PCI usage */
+	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of interrupt router */
+	u32 miniport_data;		/* Crap */
+	u8 rfu[11];
+	u8 checksum;			/* Modulo 256 checksum must give zero */
+	struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+extern int pcibios_scanned;
+extern spinlock_t pci_config_lock;
+
+extern int (*pcibios_enable_irq)(struct pci_dev *dev);
diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c
new file mode 100644
index 00000000000..6a924878443
--- /dev/null
+++ b/arch/i386/pci/visws.c
@@ -0,0 +1,110 @@
+/*
+ *	Low-Level PCI Support for SGI Visual Workstation
+ *
+ *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include "cobalt.h"
+#include "lithium.h"
+
+#include "pci.h"
+
+
+extern struct pci_raw_ops pci_direct_conf1;
+
+static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
+
+int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq;
+
+void __init pcibios_penalize_isa_irq(int irq) {}
+
+
+unsigned int pci_bus0, pci_bus1;
+
+static inline u8 bridge_swizzle(u8 pin, u8 slot) 
+{
+	return (((pin - 1) + slot) % 4) + 1;
+}
+
+static u8 __init visws_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	u8 pin = *pinp;
+
+	while (dev->bus->self) {	/* Move up the chain of bridges. */
+		pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn));
+		dev = dev->bus->self;
+	}
+	*pinp = pin;
+
+	return PCI_SLOT(dev->devfn);
+}
+
+static int __init visws_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+	int irq, bus = dev->bus->number;
+
+	pin--;
+
+	/* Nothing useful at PIIX4 pin 1 */
+	if (bus == pci_bus0 && slot == 4 && pin == 0)
+		return -1;
+
+	/* PIIX4 USB is on Bus 0, Slot 4, Line 3 */
+	if (bus == pci_bus0 && slot == 4 && pin == 3) {
+		irq = CO_IRQ(CO_APIC_PIIX4_USB);
+		goto out;
+	}
+
+	/* First pin spread down 1 APIC entry per slot */
+	if (pin == 0) {
+		irq = CO_IRQ((bus == pci_bus0 ? CO_APIC_PCIB_BASE0 :
+						CO_APIC_PCIA_BASE0) + slot);
+		goto out;
+	}
+
+	/* lines 1,2,3 from any slot is shared in this twirly pattern */
+	if (bus == pci_bus1) {
+		/* lines 1-3 from devices 0 1 rotate over 2 apic entries */
+		irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((slot + (pin - 1)) % 2));
+	} else { /* bus == pci_bus0 */
+		/* lines 1-3 from devices 0-3 rotate over 3 apic entries */
+		if (slot == 0)
+			slot = 3; /* same pattern */
+		irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((3 - slot) + (pin - 1) % 3));
+	}
+out:
+	printk(KERN_DEBUG "PCI: Bus %d Slot %d Line %d -> IRQ %d\n", bus, slot, pin, irq);
+	return irq;
+}
+
+void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
+static int __init pcibios_init(void)
+{
+	/* The VISWS supports configuration access type 1 only */
+	pci_probe = (pci_probe | PCI_PROBE_CONF1) &
+		    ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2);
+
+	pci_bus0 = li_pcib_read16(LI_PCI_BUSNUM) & 0xff;
+	pci_bus1 = li_pcia_read16(LI_PCI_BUSNUM) & 0xff;
+
+	printk(KERN_INFO "PCI: Lithium bridge A bus: %u, "
+		"bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0);
+
+	raw_pci_ops = &pci_direct_conf1;
+	pci_scan_bus(pci_bus0, &pci_root_ops, NULL);
+	pci_scan_bus(pci_bus1, &pci_root_ops, NULL);
+	pci_fixup_irqs(visws_swizzle, visws_map_irq);
+	pcibios_resource_survey();
+	return 0;
+}
+
+subsys_initcall(pcibios_init);
diff --git a/arch/i386/power/Makefile b/arch/i386/power/Makefile
new file mode 100644
index 00000000000..8cfa4e8a719
--- /dev/null
+++ b/arch/i386/power/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_PM)		+= cpu.o
+obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
new file mode 100644
index 00000000000..cf337c673d9
--- /dev/null
+++ b/arch/i386/power/cpu.c
@@ -0,0 +1,152 @@
+/*
+ * Suspend support specific for i386.
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/poll.h>
+#include <linux/delay.h>
+#include <linux/sysrq.h>
+#include <linux/proc_fs.h>
+#include <linux/irq.h>
+#include <linux/pm.h>
+#include <linux/device.h>
+#include <linux/suspend.h>
+#include <linux/acpi.h>
+#include <asm/uaccess.h>
+#include <asm/acpi.h>
+#include <asm/tlbflush.h>
+
+static struct saved_context saved_context;
+
+unsigned long saved_context_ebx;
+unsigned long saved_context_esp, saved_context_ebp;
+unsigned long saved_context_esi, saved_context_edi;
+unsigned long saved_context_eflags;
+
+extern void enable_sep_cpu(void *);
+
+void __save_processor_state(struct saved_context *ctxt)
+{
+	kernel_fpu_begin();
+
+	/*
+	 * descriptor tables
+	 */
+	asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
+	asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
+	asm volatile ("sldt %0" : "=m" (ctxt->ldt));
+	asm volatile ("str %0"  : "=m" (ctxt->tr));
+
+	/*
+	 * segment registers
+	 */
+	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
+	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
+	asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
+	asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+
+	/*
+	 * control registers 
+	 */
+	asm volatile ("movl %%cr0, %0" : "=r" (ctxt->cr0));
+	asm volatile ("movl %%cr2, %0" : "=r" (ctxt->cr2));
+	asm volatile ("movl %%cr3, %0" : "=r" (ctxt->cr3));
+	asm volatile ("movl %%cr4, %0" : "=r" (ctxt->cr4));
+}
+
+void save_processor_state(void)
+{
+	__save_processor_state(&saved_context);
+}
+
+static void
+do_fpu_end(void)
+{
+        /* restore FPU regs if necessary */
+	/* Do it out of line so that gcc does not move cr0 load to some stupid place */
+        kernel_fpu_end();
+	mxcsr_feature_mask_init();
+}
+
+
+static void fix_processor_context(void)
+{
+	int cpu = smp_processor_id();
+	struct tss_struct * t = &per_cpu(init_tss, cpu);
+
+	set_tss_desc(cpu,t);	/* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
+        per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff;
+
+	load_TR_desc();				/* This does ltr */
+	load_LDT(&current->active_mm->context);	/* This does lldt */
+
+	/*
+	 * Now maybe reload the debug registers
+	 */
+	if (current->thread.debugreg[7]){
+                loaddebug(&current->thread, 0);
+                loaddebug(&current->thread, 1);
+                loaddebug(&current->thread, 2);
+                loaddebug(&current->thread, 3);
+                /* no 4 and 5 */
+                loaddebug(&current->thread, 6);
+                loaddebug(&current->thread, 7);
+	}
+
+}
+
+void __restore_processor_state(struct saved_context *ctxt)
+{
+
+	/*
+	 * control registers
+	 */
+	asm volatile ("movl %0, %%cr4" :: "r" (ctxt->cr4));
+	asm volatile ("movl %0, %%cr3" :: "r" (ctxt->cr3));
+	asm volatile ("movl %0, %%cr2" :: "r" (ctxt->cr2));
+	asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0));
+
+	/*
+	 * segment registers
+	 */
+	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
+	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
+	asm volatile ("movw %0, %%gs" :: "r" (ctxt->gs));
+	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
+
+	/*
+	 * now restore the descriptor tables to their proper values
+	 * ltr is done i fix_processor_context().
+	 */
+	asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
+	asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
+	asm volatile ("lldt %0" :: "m" (ctxt->ldt));
+
+	/*
+	 * sysenter MSRs
+	 */
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		enable_sep_cpu(NULL);
+
+	fix_processor_context();
+	do_fpu_end();
+}
+
+void restore_processor_state(void)
+{
+	__restore_processor_state(&saved_context);
+}
+
+/* Needed by apm.c */
+EXPORT_SYMBOL(save_processor_state);
+EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S
new file mode 100644
index 00000000000..c4105286ff2
--- /dev/null
+++ b/arch/i386/power/swsusp.S
@@ -0,0 +1,73 @@
+.text
+
+/* Originally gcc generated, modified by hand
+ *
+ * This may not use any stack, nor any variable that is not "NoSave":
+ *
+ * Its rewriting one kernel image with another. What is stack in "old"
+ * image could very well be data page in "new" image, and overwriting
+ * your own stack under you is bad idea.
+ */
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/asm_offsets.h>
+
+	.text
+
+ENTRY(swsusp_arch_suspend)
+
+	movl %esp, saved_context_esp
+	movl %ebx, saved_context_ebx
+	movl %ebp, saved_context_ebp
+	movl %esi, saved_context_esi
+	movl %edi, saved_context_edi
+	pushfl ; popl saved_context_eflags
+
+	call swsusp_save
+	ret
+
+ENTRY(swsusp_arch_resume)
+	movl	$swsusp_pg_dir-__PAGE_OFFSET, %ecx
+	movl	%ecx, %cr3
+
+	movl	pagedir_nosave, %edx
+	.p2align 4,,7
+
+copy_loop:
+	testl	%edx, %edx
+	jz	done
+
+	movl	pbe_address(%edx), %esi
+	movl	pbe_orig_address(%edx), %edi
+
+	movl	$1024, %ecx
+	rep
+	movsl
+
+	movl	pbe_next(%edx), %edx
+	jmp	copy_loop
+	.p2align 4,,7
+
+done:
+	/* Flush TLB, including "global" things (vmalloc) */
+	movl	mmu_cr4_features, %eax
+	movl	%eax, %edx
+	andl	$~(1<<7), %edx;  # PGE
+	movl	%edx, %cr4;  # turn off PGE
+	movl	%cr3, %ecx;  # flush TLB
+	movl	%ecx, %cr3
+	movl	%eax, %cr4;  # turn PGE back on
+
+	movl saved_context_esp, %esp
+	movl saved_context_ebp, %ebp
+	movl saved_context_ebx, %ebx
+	movl saved_context_esi, %esi
+	movl saved_context_edi, %edi
+
+	pushl saved_context_eflags ; popfl
+
+	xorl	%eax, %eax
+
+	ret
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386