1207 files changed, 60311 insertions, 21027 deletions
diff --git a/.mailmap b/.mailmap
index dfab12f809e..eba9bf953ef 100644
--- a/.mailmap
+++ b/.mailmap
@@ -66,6 +66,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
 Koushik <raghavendra.koushik@neterion.com>
 Leonid I Ananiev <leonid.i.ananiev@intel.com>
 Linas Vepstas <linas@austin.ibm.com>
+Mark Brown <broonie@sirena.org.uk>
 Matthieu CASTET <castet.matthieu@free.fr>
 Michael Buesch <mb@bu3sch.de>
 Michael Buesch <mbuesch@freenet.de>
diff --git a/CREDITS b/CREDITS
index 4fad6717e56..b50db176755 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1658,14 +1658,14 @@ S: Chapel Hill, North Carolina 27514-4818
 S: USA
 
 N: Dave Jones
-E: davej@codemonkey.org.uk
+E: davej@redhat.com
 W: http://www.codemonkey.org.uk
-D: x86 errata/setup maintenance.
-D: AGPGART driver.
+D: Assorted VIA x86 support.
+D: 2.5 AGPGART overhaul.
 D: CPUFREQ maintenance.
-D: Backport/Forwardport merge monkey.
-D: Various Janitor work.
-S: United Kingdom
+D: Fedora kernel maintainence.
+D: Misc/Other.
+S: 314 Littleton Rd, Westford, MA 01886, USA
 
 N: Martin Josfsson
 E: gandalf@wlug.westbo.se
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index 4c63e586416..ae15d55350e 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = {
     </listitem>
     <listitem>
      <para>
-      Function names as strings (__FUNCTION__).
+      Function names as strings (__func__).
      </para>
     </listitem>
     <listitem>
diff --git a/Documentation/MSI-HOWTO.txt b/Documentation/MSI-HOWTO.txt
index a51f693c154..256defd7e17 100644
--- a/Documentation/MSI-HOWTO.txt
+++ b/Documentation/MSI-HOWTO.txt
@@ -236,10 +236,8 @@ software system can set different pages for controlling accesses to the
 MSI-X structure. The implementation of MSI support requires the PCI
 subsystem, not a device driver, to maintain full control of the MSI-X
 table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA.  A device driver is prohibited from requesting the MMIO
-address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
-will fail enabling MSI-X on its hardware device when it calls the function
-pci_enable_msix().
+table/MSI-X PBA.  A device driver should not access the MMIO address
+space of the MSI-X table/MSI-X PBA.
 
 5.3.2 API pci_enable_msix
 
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index 8d4dc6250c5..fd4907a2968 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -163,6 +163,10 @@ need pass only as many optional fields as necessary:
 	o class and classmask fields default to 0
 	o driver_data defaults to 0UL.
 
+Note that driver_data must match the value used by any of the pci_device_id
+entries defined in the driver. This makes the driver_data field mandatory
+if all the pci_device_id entries have a non-zero driver_data value.
+
 Once added, the driver probe routine will be invoked for any unclaimed
 PCI devices listed in its (newly updated) pci_ids list.
 
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index 16c251230c8..ddeb14beacc 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -203,22 +203,17 @@ to mmio_enabled.
 
 3.3 helper functions
 
-3.3.1 int pci_find_aer_capability(struct pci_dev *dev);
-pci_find_aer_capability locates the PCI Express AER capability
-in the device configuration space. If the device doesn't support
-PCI-Express AER, the function returns 0.
-
-3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 pci_enable_pcie_error_reporting enables the device to send error
 messages to root port when an error is detected. Note that devices
 don't enable the error reporting by default, so device drivers need
 call this function to enable it.
 
-3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 pci_disable_pcie_error_reporting disables the device to send error
 messages to root port when an error is detected.
 
-3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
 error status register.
 
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups/cgroups.txt
index d9014aa0eb6..d9014aa0eb6 100644
--- a/Documentation/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
new file mode 100644
index 00000000000..c50ab58b72e
--- /dev/null
+++ b/Documentation/cgroups/freezer-subsystem.txt
@@ -0,0 +1,99 @@
+	The cgroup freezer is useful to batch job management system which start
+and stop sets of tasks in order to schedule the resources of a machine
+according to the desires of a system administrator. This sort of program
+is often used on HPC clusters to schedule access to the cluster as a
+whole. The cgroup freezer uses cgroups to describe the set of tasks to
+be started/stopped by the batch job management system. It also provides
+a means to start and stop the tasks composing the job.
+
+	The cgroup freezer will also be useful for checkpointing running groups
+of tasks. The freezer allows the checkpoint code to obtain a consistent
+image of the tasks by attempting to force the tasks in a cgroup into a
+quiescent state. Once the tasks are quiescent another task can
+walk /proc or invoke a kernel interface to gather information about the
+quiesced tasks. Checkpointed tasks can be restarted later should a
+recoverable error occur. This also allows the checkpointed tasks to be
+migrated between nodes in a cluster by copying the gathered information
+to another node and restarting the tasks there.
+
+	Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
+and resuming tasks in userspace. Both of these signals are observable
+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
+SIGCONT is especially unsuitable since it can be caught by the task. Any
+programs designed to watch for SIGSTOP and SIGCONT could be broken by
+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
+demonstrate this problem using nested bash shells:
+
+	$ echo $$
+	16644
+	$ bash
+	$ echo $$
+	16690
+
+	From a second, unrelated bash shell:
+	$ kill -SIGSTOP 16690
+	$ kill -SIGCONT 16990
+
+	<at this point 16990 exits and causes 16644 to exit too>
+
+	This happens because bash can observe both signals and choose how it
+responds to them.
+
+	Another example of a program which catches and responds to these
+signals is gdb. In fact any program designed to use ptrace is likely to
+have a problem with this method of stopping and resuming tasks.
+
+	 In contrast, the cgroup freezer uses the kernel freezer code to
+prevent the freeze/unfreeze cycle from becoming visible to the tasks
+being frozen. This allows the bash example above and gdb to run as
+expected.
+
+	The freezer subsystem in the container filesystem defines a file named
+freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the
+cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup.
+Reading will return the current state.
+
+* Examples of usage :
+
+   # mkdir /containers/freezer
+   # mount -t cgroup -ofreezer freezer  /containers
+   # mkdir /containers/0
+   # echo $some_pid > /containers/0/tasks
+
+to get status of the freezer subsystem :
+
+   # cat /containers/0/freezer.state
+   THAWED
+
+to freeze all tasks in the container :
+
+   # echo FROZEN > /containers/0/freezer.state
+   # cat /containers/0/freezer.state
+   FREEZING
+   # cat /containers/0/freezer.state
+   FROZEN
+
+to unfreeze all tasks in the container :
+
+   # echo THAWED > /containers/0/freezer.state
+   # cat /containers/0/freezer.state
+   THAWED
+
+This is the basic mechanism which should do the right thing for user space task
+in a simple scenario.
+
+It's important to note that freezing can be incomplete. In that case we return
+EBUSY. This means that some tasks in the cgroup are busy doing something that
+prevents us from completely freezing the cgroup at this time. After EBUSY,
+the cgroup will remain partially frozen -- reflected by freezer.state reporting
+"FREEZING" when read. The state will remain "FREEZING" until one of these
+things happens:
+
+	1) Userspace cancels the freezing operation by writing "THAWED" to
+		the freezer.state file
+	2) Userspace retries the freezing operation by writing "FROZEN" to
+		the freezer.state file (writing "FREEZING" is not legal
+		and returns EIO)
+	3) The tasks that blocked the cgroup from entering the "FROZEN"
+		state disappear from the cgroup's set of tasks.
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 9b53d582736..1c07547d3f8 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -112,14 +112,22 @@ the per cgroup LRU.
 
 2.2.1 Accounting details
 
-All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted.
-RSS pages are accounted at the time of page_add_*_rmap() unless they've already
-been accounted for earlier. A file page will be accounted for as Page Cache;
-it's mapped into the page tables of a process, duplicate accounting is carefully
-avoided. Page Cache pages are accounted at the time of add_to_page_cache().
-The corresponding routines that remove a page from the page tables or removes
-a page from Page Cache is used to decrement the accounting counters of the
-cgroup.
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+(some pages which never be reclaimable and will not be on global LRU
+ are not accounted. we just accounts pages under usual vm management.)
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (radix-tree). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+A RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from radix-tree.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-lru because our purpose is to control amount
+of used pages. not-on-lru pages are tend to be out-of-control from vm view.
 
 2.3 Shared Page Accounting
 
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 47e568a9370..5c86c258c79 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -48,7 +48,7 @@ hooks, beyond what is already present, required to manage dynamic
 job placement on large systems.
 
 Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup.txt.
+Documentation/cgroups/cgroups.txt.
 
 Requests by a task, using the sched_setaffinity(2) system call to
 include CPUs in its CPU affinity mask, and using the mbind(2) and
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 295f26cd895..9dd2a3bb2ac 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -96,6 +96,11 @@ errors=remount-ro(*)	Remount the filesystem read-only on an error.
 errors=continue		Keep going on a filesystem error.
 errors=panic		Panic and halt the machine if an error occurs.
 
+data_err=ignore(*)	Just print an error message if an error occurs
+			in a file data buffer in ordered mode.
+data_err=abort		Abort the journal if an error occurs in a file
+			data buffer in ordered mode.
+
 grpid			Give objects the same group ID as their creator.
 bsdgroups
 
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index c032bf39e8b..bcceb99b81d 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1384,15 +1384,18 @@ causes the kernel to prefer to reclaim dentries and inodes.
 dirty_background_ratio
 ----------------------
 
-Contains, as a percentage of total system memory, the number of pages at which
-the pdflush background writeback daemon will start writing out dirty data.
+Contains, as a percentage of the dirtyable system memory (free pages + mapped
+pages + file cache, not including locked pages and HugePages), the number of
+pages at which the pdflush background writeback daemon will start writing out
+dirty data.
 
 dirty_ratio
 -----------------
 
-Contains, as a percentage of total system memory, the number of pages at which
-a process which is generating disk writes will itself start writing out dirty
-data.
+Contains, as a percentage of the dirtyable system memory (free pages + mapped
+pages + file cache, not including locked pages and HugePages), the number of
+pages at which a process which is generating disk writes will itself start
+writing out dirty data.
 
 dirty_writeback_centisecs
 -------------------------
@@ -2412,24 +2415,29 @@ will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
 of memory types. If a bit of the bitmask is set, memory segments of the
 corresponding memory type are dumped, otherwise they are not dumped.
 
-The following 4 memory types are supported:
+The following 7 memory types are supported:
   - (bit 0) anonymous private memory
   - (bit 1) anonymous shared memory
   - (bit 2) file-backed private memory
   - (bit 3) file-backed shared memory
   - (bit 4) ELF header pages in file-backed private memory areas (it is
             effective only if the bit 2 is cleared)
+  - (bit 5) hugetlb private memory
+  - (bit 6) hugetlb shared memory
 
   Note that MMIO pages such as frame buffer are never dumped and vDSO pages
   are always dumped regardless of the bitmask status.
 
-Default value of coredump_filter is 0x3; this means all anonymous memory
-segments are dumped.
+  Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
+  effected by bit 5-6.
+
+Default value of coredump_filter is 0x23; this means all anonymous memory
+segments and hugetlb private memory are dumped.
 
 If you don't want to dump all shared memory segments attached to pid 1234,
-write 1 to the process's proc file.
+write 0x21 to the process's proc file.
 
-  $ echo 0x1 > /proc/1234/coredump_filter
+  $ echo 0x21 > /proc/1234/coredump_filter
 
 When a new process is created, the process inherits the bitmask status from its
 parent. It is useful to set up coredump_filter before the program runs.
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index 6a0d70a22f0..dd84ea3c10d 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -86,6 +86,15 @@ norm_unmount (*)	commit on unmount; the journal is committed
 fast_unmount		do not commit on unmount; this option makes
 			unmount faster, but the next mount slower
 			because of the need to replay the journal.
+bulk_read		read more in one go to take advantage of flash
+			media that read faster sequentially
+no_bulk_read (*)	do not bulk-read
+no_chk_data_crc		skip checking of CRCs on data nodes in order to
+			improve read performance. Use this option only
+			if the flash media is highly reliable. The effect
+			of this option is that corruption of the contents
+			of a file can go unnoticed.
+chk_data_crc (*)	do not skip checking CRCs on data nodes
 
 
 Quick usage instructions
diff --git a/Documentation/ia64/xen.txt b/Documentation/ia64/xen.txt
new file mode 100644
index 00000000000..c61a99f7c8b
--- /dev/null
+++ b/Documentation/ia64/xen.txt
@@ -0,0 +1,183 @@
+       Recipe for getting/building/running Xen/ia64 with pv_ops
+       --------------------------------------------------------
+
+This recipe describes how to get xen-ia64 source and build it,
+and run domU with pv_ops.
+
+============
+Requirements
+============
+
+  - python
+  - mercurial
+    it (aka "hg") is an open-source source code
+    management software. See the below.
+    http://www.selenic.com/mercurial/wiki/
+  - git
+  - bridge-utils
+
+=================================
+Getting and Building Xen and Dom0
+=================================
+
+  My environment is;
+    Machine  : Tiger4
+    Domain0 OS  : RHEL5
+    DomainU OS  : RHEL5
+
+ 1. Download source
+    # hg clone http://xenbits.xensource.com/ext/ia64/xen-unstable.hg
+    # cd xen-unstable.hg
+    # hg clone http://xenbits.xensource.com/ext/ia64/linux-2.6.18-xen.hg
+
+ 2. # make world
+
+ 3. # make install-tools
+
+ 4. copy kernels and xen
+    # cp xen/xen.gz /boot/efi/efi/redhat/
+    # cp build-linux-2.6.18-xen_ia64/vmlinux.gz \
+      /boot/efi/efi/redhat/vmlinuz-2.6.18.8-xen
+
+ 5. make initrd for Dom0/DomU
+    # make -C linux-2.6.18-xen.hg ARCH=ia64 modules_install \
+      O=$(/bin/pwd)/build-linux-2.6.18-xen_ia64
+    # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6.18.8-xen.img \
+      2.6.18.8-xen --builtin mptspi --builtin mptbase \
+      --builtin mptscsih --builtin uhci-hcd --builtin ohci-hcd \
+      --builtin ehci-hcd
+
+================================
+Making a disk image for guest OS
+================================
+
+ 1. make file
+    # dd if=/dev/zero of=/root/rhel5.img bs=1M seek=4096 count=0
+    # mke2fs -F -j /root/rhel5.img
+    # mount -o loop /root/rhel5.img /mnt
+    # cp -ax /{dev,var,etc,usr,bin,sbin,lib} /mnt
+    # mkdir /mnt/{root,proc,sys,home,tmp}
+
+    Note: You may miss some device files. If so, please create them
+    with mknod. Or you can use tar instead of cp.
+
+ 2. modify DomU's fstab
+    # vi /mnt/etc/fstab
+       /dev/xvda1  /            ext3    defaults        1 1
+       none        /dev/pts     devpts  gid=5,mode=620  0 0
+       none        /dev/shm     tmpfs   defaults        0 0
+       none        /proc        proc    defaults        0 0
+       none        /sys         sysfs   defaults        0 0
+
+ 3. modify inittab
+    set runlevel to 3 to avoid X trying to start
+    # vi /mnt/etc/inittab
+       id:3:initdefault:
+    Start a getty on the hvc0 console
+       X0:2345:respawn:/sbin/mingetty hvc0
+    tty1-6 mingetty can be commented out
+
+ 4. add hvc0 into /etc/securetty
+    # vi /mnt/etc/securetty (add hvc0)
+
+ 5. umount
+    # umount /mnt
+
+FYI, virt-manager can also make a disk image for guest OS.
+It's GUI tools and easy to make it.
+
+==================
+Boot Xen & Domain0
+==================
+
+ 1. replace elilo
+    elilo of RHEL5 can boot Xen and Dom0.
+    If you use old elilo (e.g RHEL4), please download from the below
+    http://elilo.sourceforge.net/cgi-bin/blosxom
+    and copy into /boot/efi/efi/redhat/
+    # cp elilo-3.6-ia64.efi /boot/efi/efi/redhat/elilo.efi
+
+ 2. modify elilo.conf (like the below)
+    # vi /boot/efi/efi/redhat/elilo.conf
+     prompt
+     timeout=20
+     default=xen
+     relocatable
+
+     image=vmlinuz-2.6.18.8-xen
+             label=xen
+             vmm=xen.gz
+             initrd=initrd-2.6.18.8-xen.img
+             read-only
+             append=" -- rhgb root=/dev/sda2"
+
+The append options before "--" are for xen hypervisor,
+the options after "--" are for dom0.
+
+FYI, your machine may need console options like
+"com1=19200,8n1 console=vga,com1". For example,
+append="com1=19200,8n1 console=vga,com1 -- rhgb console=tty0 \
+console=ttyS0 root=/dev/sda2"
+
+=====================================
+Getting and Building domU with pv_ops
+=====================================
+
+ 1. get pv_ops tree
+    # git clone http://people.valinux.co.jp/~yamahata/xen-ia64/linux-2.6-xen-ia64.git/
+
+ 2. git branch (if necessary)
+    # cd linux-2.6-xen-ia64/
+    # git checkout -b your_branch origin/xen-ia64-domu-minimal-2008may19
+    (Note: The current branch is xen-ia64-domu-minimal-2008may19.
+    But you would find the new branch. You can see with
+    "git branch -r" to get the branch lists.
+    http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/
+    is also available. The tree is based on
+    git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6 test)
+
+
+ 3. copy .config for pv_ops of domU
+    # cp arch/ia64/configs/xen_domu_wip_defconfig .config
+
+ 4. make kernel with pv_ops
+    # make oldconfig
+    # make
+
+ 5. install the kernel and initrd
+    # cp vmlinux.gz /boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU
+    # make modules_install
+    # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img \
+      2.6.26-rc3xen-ia64-08941-g1b12161 --builtin mptspi \
+      --builtin mptbase --builtin mptscsih --builtin uhci-hcd \
+      --builtin ohci-hcd --builtin ehci-hcd
+
+========================
+Boot DomainU with pv_ops
+========================
+
+ 1. make config of DomU
+   # vi /etc/xen/rhel5
+     kernel = "/boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU"
+     ramdisk = "/boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img"
+     vcpus = 1
+     memory = 512
+     name = "rhel5"
+     disk = [ 'file:/root/rhel5.img,xvda1,w' ]
+     root = "/dev/xvda1 ro"
+     extra= "rhgb console=hvc0"
+
+ 2. After boot xen and dom0, start xend
+   # /etc/init.d/xend start
+   ( In the debugging case, # XEND_DEBUG=1 xend trace_start )
+
+ 3. start domU
+   # xm create -c rhel5
+
+=========
+Reference
+=========
+- Wiki of Xen/IA64 upstream merge
+  http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge
+
+Written by Akio Takebe <takebe_akio@jp.fujitsu.com> on 28 May 2008
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d4f4875fc7c..53ba7c7d82b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -101,6 +101,7 @@ parameter is applicable:
 	X86-64	X86-64 architecture is enabled.
 			More X86-64 boot options can be found in
 			Documentation/x86_64/boot-options.txt .
+	X86	Either 32bit or 64bit x86 (same as X86-32+X86-64)
 
 In addition, the following text indicates that the option:
 
@@ -690,7 +691,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See Documentation/block/as-iosched.txt and
 			Documentation/block/deadline-iosched.txt for details.
 
-	elfcorehdr=	[X86-32, X86_64]
+	elfcorehdr=	[IA64,PPC,SH,X86-32,X86_64]
 			Specifies physical address of start of kernel core
 			image elf header. Generally kexec loader will
 			pass this option to capture kernel.
@@ -796,6 +797,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			Defaults to the default architecture's huge page size
 			if not specified.
 
+	hlt		[BUGS=ARM,SH]
+
 	i8042.debug	[HW] Toggle i8042 debug mode
 	i8042.direct	[HW] Put keyboard port into non-translated mode
 	i8042.dumbkbd	[HW] Pretend that controller can only read data from
@@ -1211,6 +1214,10 @@ and is between 256 and 4096 characters. It is defined in the file
 	mem=nopentium	[BUGS=X86-32] Disable usage of 4MB pages for kernel
 			memory.
 
+	memchunk=nn[KMG]
+			[KNL,SH] Allow user to override the default size for
+			per-device physically contiguous DMA buffers.
+
 	memmap=exactmap	[KNL,X86-32,X86_64] Enable setting of an exact
 			E820 memory map, as specified by the user.
 			Such memmap=exactmap lines can be constructed based on
@@ -1393,6 +1400,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	nodisconnect	[HW,SCSI,M68K] Disables SCSI disconnects.
 
+	nodsp		[SH] Disable hardware DSP at boot time.
+
 	noefi		[X86-32,X86-64] Disable EFI runtime services support.
 
 	noexec		[IA-64]
@@ -1409,13 +1418,15 @@ and is between 256 and 4096 characters. It is defined in the file
 			noexec32=off: disable non-executable mappings
 				read implies executable mappings
 
+	nofpu		[SH] Disable hardware FPU at boot time.
+
 	nofxsr		[BUGS=X86-32] Disables x86 floating point extended
 			register save and restore. The kernel will only save
 			legacy floating-point registers on task switch.
 
 	noclflush	[BUGS=X86] Don't use the CLFLUSH instruction
 
-	nohlt		[BUGS=ARM]
+	nohlt		[BUGS=ARM,SH]
 
 	no-hlt		[BUGS=X86-32] Tells the kernel that the hlt
 			instruction doesn't work correctly and not to
@@ -1578,7 +1589,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See also Documentation/paride.txt.
 
 	pci=option[,option...]	[PCI] various PCI subsystem options:
-		off		[X86-32] don't probe for the PCI bus
+		off		[X86] don't probe for the PCI bus
 		bios		[X86-32] force use of PCI BIOS, don't access
 				the hardware directly. Use this if your machine
 				has a non-standard PCI host bridge.
@@ -1586,9 +1597,9 @@ and is between 256 and 4096 characters. It is defined in the file
 				hardware access methods are allowed. Use this
 				if you experience crashes upon bootup and you
 				suspect they are caused by the BIOS.
-		conf1		[X86-32] Force use of PCI Configuration
+		conf1		[X86] Force use of PCI Configuration
 				Mechanism 1.
-		conf2		[X86-32] Force use of PCI Configuration
+		conf2		[X86] Force use of PCI Configuration
 				Mechanism 2.
 		noaer		[PCIE] If the PCIEAER kernel config parameter is
 				enabled, this kernel boot option can be used to
@@ -1608,37 +1619,37 @@ and is between 256 and 4096 characters. It is defined in the file
 				this option if the kernel is unable to allocate
 				IRQs or discover secondary PCI buses on your
 				motherboard.
-		rom		[X86-32] Assign address space to expansion ROMs.
+		rom		[X86] Assign address space to expansion ROMs.
 				Use with caution as certain devices share
 				address decoders between ROMs and other
 				resources.
-		norom		[X86-32,X86_64] Do not assign address space to
+		norom		[X86] Do not assign address space to
 				expansion ROMs that do not already have
 				BIOS assigned address ranges.
-		irqmask=0xMMMM	[X86-32] Set a bit mask of IRQs allowed to be
+		irqmask=0xMMMM	[X86] Set a bit mask of IRQs allowed to be
 				assigned automatically to PCI devices. You can
 				make the kernel exclude IRQs of your ISA cards
 				this way.
-		pirqaddr=0xAAAAA	[X86-32] Specify the physical address
+		pirqaddr=0xAAAAA	[X86] Specify the physical address
 				of the PIRQ table (normally generated
 				by the BIOS) if it is outside the
 				F0000h-100000h range.
-		lastbus=N	[X86-32] Scan all buses thru bus #N. Can be
+		lastbus=N	[X86] Scan all buses thru bus #N. Can be
 				useful if the kernel is unable to find your
 				secondary buses and you want to tell it
 				explicitly which ones they are.
-		assign-busses	[X86-32] Always assign all PCI bus
+		assign-busses	[X86] Always assign all PCI bus
 				numbers ourselves, overriding
 				whatever the firmware may have done.
-		usepirqmask	[X86-32] Honor the possible IRQ mask stored
+		usepirqmask	[X86] Honor the possible IRQ mask stored
 				in the BIOS $PIR table. This is needed on
 				some systems with broken BIOSes, notably
 				some HP Pavilion N5400 and Omnibook XE3
 				notebooks. This will have no effect if ACPI
 				IRQ routing is enabled.
-		noacpi		[X86-32] Do not use ACPI for IRQ routing
+		noacpi		[X86] Do not use ACPI for IRQ routing
 				or for PCI scanning.
-		use_crs		[X86-32] Use _CRS for PCI resource
+		use_crs		[X86] Use _CRS for PCI resource
 				allocation.
 		routeirq	Do IRQ routing for all PCI devices.
 				This is normally done in pci_enable_device(),
@@ -1667,6 +1678,12 @@ and is between 256 and 4096 characters. It is defined in the file
 				reserved for the CardBus bridge's memory
 				window. The default value is 64 megabytes.
 
+	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
+			Management.
+		off	Disable ASPM.
+		force	Enable ASPM even on devices that claim not to support it.
+			WARNING: Forcing ASPM on may cause system lockups.
+
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 
 	pd.		[PARIDE]
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index d9f50a19fa0..089f6138fcd 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -50,10 +50,12 @@ Connecting a function (probe) to a marker is done by providing a probe (function
 to call) for the specific marker through marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe and make
-sure there is no caller left using the probe when it returns. Probe removal is
-preempt-safe because preemption is disabled around the probe call. See the
-"Probe example" section below for a sample probe module.
+is done through marker_probe_unregister(); it will disarm the probe.
+marker_synchronize_unregister() must be called before the end of the module exit
+function to make sure there is no caller left using the probe. This, and the
+fact that preemption is disabled around the probe call, make sure that probe
+removal and module unload are safe. See the "Probe example" section below for a
+sample probe module.
 
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
new file mode 100644
index 00000000000..bdf93b7f0f2
--- /dev/null
+++ b/Documentation/mtd/nand_ecc.txt
@@ -0,0 +1,714 @@
+Introduction
+============
+
+Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
+I felt there was room for optimisation. I bashed the code for a few hours
+performing tricks like table lookup removing superfluous code etc.
+After that the speed was increased by 35-40%.
+Still I was not too happy as I felt there was additional room for improvement.
+
+Bad! I was hooked.
+I decided to annotate my steps in this file. Perhaps it is useful to someone
+or someone learns something from it.
+
+
+The problem
+===========
+
+NAND flash (at least SLC one) typically has sectors of 256 bytes.
+However NAND flash is not extremely reliable so some error detection
+(and sometimes correction) is needed.
+
+This is done by means of a Hamming code. I'll try to explain it in
+laymans terms (and apologies to all the pro's in the field in case I do
+not use the right terminology, my coding theory class was almost 30
+years ago, and I must admit it was not one of my favourites).
+
+As I said before the ecc calculation is performed on sectors of 256
+bytes. This is done by calculating several parity bits over the rows and
+columns. The parity used is even parity which means that the parity bit = 1
+if the data over which the parity is calculated is 1 and the parity bit = 0
+if the data over which the parity is calculated is 0. So the total
+number of bits over the data over which the parity is calculated + the
+parity bit is even. (see wikipedia if you can't follow this).
+Parity is often calculated by means of an exclusive or operation,
+sometimes also referred to as xor. In C the operator for xor is ^
+
+Back to ecc.
+Let's give a small figure:
+
+byte   0:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp4 ... rp14
+byte   1:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp2 rp4 ... rp14
+byte   2:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp4 ... rp14
+byte   3:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp4 ... rp14
+byte   4:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp5 ... rp14
+....
+byte 254:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp5 ... rp15
+byte 255:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp5 ... rp15
+           cp1  cp0  cp1  cp0  cp1  cp0  cp1  cp0
+           cp3  cp3  cp2  cp2  cp3  cp3  cp2  cp2
+           cp5  cp5  cp5  cp5  cp4  cp4  cp4  cp4
+
+This figure represents a sector of 256 bytes.
+cp is my abbreviaton for column parity, rp for row parity.
+
+Let's start to explain column parity.
+cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
+so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
+cp2 is the parity over bit0, bit1, bit4 and bit5
+cp3 is the parity over bit2, bit3, bit6 and bit7.
+cp4 is the parity over bit0, bit1, bit2 and bit3.
+cp5 is the parity over bit4, bit5, bit6 and bit7.
+Note that each of cp0 .. cp5 is exactly one bit.
+
+Row parity actually works almost the same.
+rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
+rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
+rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
+(so handle two bytes, then skip 2 bytes).
+rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
+for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
+so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
+and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+The story now becomes quite boring. I guess you get the idea.
+rp6 covers 8 bytes then skips 8 etc
+rp7 skips 8 bytes then covers 8 etc
+rp8 covers 16 bytes then skips 16 etc
+rp9 skips 16 bytes then covers 16 etc
+rp10 covers 32 bytes then skips 32 etc
+rp11 skips 32 bytes then covers 32 etc
+rp12 covers 64 bytes then skips 64 etc
+rp13 skips 64 bytes then covers 64 etc
+rp14 covers 128 bytes then skips 128
+rp15 skips 128 bytes then covers 128
+
+In the end the parity bits are grouped together in three bytes as
+follows:
+ECC    Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
+ECC 0   rp07  rp06  rp05  rp04  rp03  rp02  rp01  rp00
+ECC 1   rp15  rp14  rp13  rp12  rp11  rp10  rp09  rp08
+ECC 2   cp5   cp4   cp3   cp2   cp1   cp0      1     1
+
+I detected after writing this that ST application note AN1823
+(http://www.st.com/stonline/books/pdf/docs/10123.pdf) gives a much
+nicer picture.(but they use line parity as term where I use row parity)
+Oh well, I'm graphically challenged, so suffer with me for a moment :-)
+And I could not reuse the ST picture anyway for copyright reasons.
+
+
+Attempt 0
+=========
+
+Implementing the parity calculation is pretty simple.
+In C pseudocode:
+for (i = 0; i < 256; i++)
+{
+    if (i & 0x01)
+       rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+    else
+       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+    if (i & 0x02)
+       rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
+    else
+       rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
+    if (i & 0x04)
+      rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
+    else
+      rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
+    if (i & 0x08)
+      rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
+    else
+      rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
+    if (i & 0x10)
+      rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
+    else
+      rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
+    if (i & 0x20)
+      rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
+    else
+    rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+    if (i & 0x40)
+      rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
+    else
+      rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
+    if (i & 0x80)
+      rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
+    else
+      rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
+    cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
+    cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
+    cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
+    cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
+    cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
+    cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
+}
+
+
+Analysis 0
+==========
+
+C does have bitwise operators but not really operators to do the above
+efficiently (and most hardware has no such instructions either).
+Therefore without implementing this it was clear that the code above was
+not going to bring me a Nobel prize :-)
+
+Fortunately the exclusive or operation is commutative, so we can combine
+the values in any order. So instead of calculating all the bits
+individually, let us try to rearrange things.
+For the column parity this is easy. We can just xor the bytes and in the
+end filter out the relevant bits. This is pretty nice as it will bring
+all cp calculation out of the if loop.
+
+Similarly we can first xor the bytes for the various rows.
+This leads to:
+
+
+Attempt 1
+=========
+
+const char parity[256] = {
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+};
+
+void ecc1(const unsigned char *buf, unsigned char *code)
+{
+    int i;
+    const unsigned char *bp = buf;
+    unsigned char cur;
+    unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+    unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+    unsigned char par;
+
+    par = 0;
+    rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+    rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+    rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+    rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+    for (i = 0; i < 256; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
+        if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
+        if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
+    }
+    code[0] =
+        (parity[rp7] << 7) |
+        (parity[rp6] << 6) |
+        (parity[rp5] << 5) |
+        (parity[rp4] << 4) |
+        (parity[rp3] << 3) |
+        (parity[rp2] << 2) |
+        (parity[rp1] << 1) |
+        (parity[rp0]);
+    code[1] =
+        (parity[rp15] << 7) |
+        (parity[rp14] << 6) |
+        (parity[rp13] << 5) |
+        (parity[rp12] << 4) |
+        (parity[rp11] << 3) |
+        (parity[rp10] << 2) |
+        (parity[rp9]  << 1) |
+        (parity[rp8]);
+    code[2] =
+        (parity[par & 0xf0] << 7) |
+        (parity[par & 0x0f] << 6) |
+        (parity[par & 0xcc] << 5) |
+        (parity[par & 0x33] << 4) |
+        (parity[par & 0xaa] << 3) |
+        (parity[par & 0x55] << 2);
+    code[0] = ~code[0];
+    code[1] = ~code[1];
+    code[2] = ~code[2];
+}
+
+Still pretty straightforward. The last three invert statements are there to
+give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
+all data is 0xff, so the checksum then matches.
+
+I also introduced the parity lookup. I expected this to be the fastest
+way to calculate the parity, but I will investigate alternatives later
+on.
+
+
+Analysis 1
+==========
+
+The code works, but is not terribly efficient. On my system it took
+almost 4 times as much time as the linux driver code. But hey, if it was
+*that* easy this would have been done long before.
+No pain. no gain.
+
+Fortunately there is plenty of room for improvement.
+
+In step 1 we moved from bit-wise calculation to byte-wise calculation.
+However in C we can also use the unsigned long data type and virtually
+every modern microprocessor supports 32 bit operations, so why not try
+to write our code in such a way that we process data in 32 bit chunks.
+
+Of course this means some modification as the row parity is byte by
+byte. A quick analysis:
+for the column parity we use the par variable. When extending to 32 bits
+we can in the end easily calculate p0 and p1 from it.
+(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
+respectively)
+also rp2 and rp3 can be easily retrieved from par as rp3 covers the
+first two bytes and rp2 the last two bytes.
+
+Note that of course now the loop is executed only 64 times (256/4).
+And note that care must taken wrt byte ordering. The way bytes are
+ordered in a long is machine dependent, and might affect us.
+Anyway, if there is an issue: this code is developed on x86 (to be
+precise: a DELL PC with a D920 Intel CPU)
+
+And of course the performance might depend on alignment, but I expect
+that the I/O buffers in the nand driver are aligned properly (and
+otherwise that should be fixed to get maximum performance).
+
+Let's give it a try...
+
+
+Attempt 2
+=========
+
+extern const char parity[256];
+
+void ecc2(const unsigned char *buf, unsigned char *code)
+{
+    int i;
+    const unsigned long *bp = (unsigned long *)buf;
+    unsigned long cur;
+    unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+    unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+    unsigned long par;
+
+    par = 0;
+    rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+    rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+    rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+    rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+    for (i = 0; i < 64; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+    }
+    /*
+       we need to adapt the code generation for the fact that rp vars are now
+       long; also the column parity calculation needs to be changed.
+       we'll bring rp4 to 15 back to single byte entities by shifting and
+       xoring
+    */
+    rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
+    rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
+    rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
+    rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
+    rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
+    rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
+    rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
+    rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
+    rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
+    rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
+    rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
+    rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
+    rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
+    rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
+    par ^= (par >> 16);
+    rp1 = (par >> 8); rp1 &= 0xff;
+    rp0 = (par & 0xff);
+    par ^= (par >> 8); par &= 0xff;
+
+    code[0] =
+        (parity[rp7] << 7) |
+        (parity[rp6] << 6) |
+        (parity[rp5] << 5) |
+        (parity[rp4] << 4) |
+        (parity[rp3] << 3) |
+        (parity[rp2] << 2) |
+        (parity[rp1] << 1) |
+        (parity[rp0]);
+    code[1] =
+        (parity[rp15] << 7) |
+        (parity[rp14] << 6) |
+        (parity[rp13] << 5) |
+        (parity[rp12] << 4) |
+        (parity[rp11] << 3) |
+        (parity[rp10] << 2) |
+        (parity[rp9]  << 1) |
+        (parity[rp8]);
+    code[2] =
+        (parity[par & 0xf0] << 7) |
+        (parity[par & 0x0f] << 6) |
+        (parity[par & 0xcc] << 5) |
+        (parity[par & 0x33] << 4) |
+        (parity[par & 0xaa] << 3) |
+        (parity[par & 0x55] << 2);
+    code[0] = ~code[0];
+    code[1] = ~code[1];
+    code[2] = ~code[2];
+}
+
+The parity array is not shown any more. Note also that for these
+examples I kinda deviated from my regular programming style by allowing
+multiple statements on a line, not using { } in then and else blocks
+with only a single statement and by using operators like ^=
+
+
+Analysis 2
+==========
+
+The code (of course) works, and hurray: we are a little bit faster than
+the linux driver code (about 15%). But wait, don't cheer too quickly.
+THere is more to be gained.
+If we look at e.g. rp14 and rp15 we see that we either xor our data with
+rp14 or with rp15. However we also have par which goes over all data.
+This means there is no need to calculate rp14 as it can be calculated from
+rp15 through rp14 = par ^ rp15;
+(or if desired we can avoid calculating rp15 and calculate it from
+rp14).  That is why some places refer to inverse parity.
+Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
+Effectively this means we can eliminate the else clause from the if
+statements. Also we can optimise the calculation in the end a little bit
+by going from long to byte first. Actually we can even avoid the table
+lookups
+
+Attempt 3
+=========
+
+Odd replaced:
+        if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+with
+        if (i & 0x01) rp5 ^= cur;
+        if (i & 0x02) rp7 ^= cur;
+        if (i & 0x04) rp9 ^= cur;
+        if (i & 0x08) rp11 ^= cur;
+        if (i & 0x10) rp13 ^= cur;
+        if (i & 0x20) rp15 ^= cur;
+
+        and outside the loop added:
+    rp4  = par ^ rp5;
+    rp6  = par ^ rp7;
+    rp8  = par ^ rp9;
+    rp10  = par ^ rp11;
+    rp12  = par ^ rp13;
+    rp14  = par ^ rp15;
+
+And after that the code takes about 30% more time, although the number of
+statements is reduced. This is also reflected in the assembly code.
+
+
+Analysis 3
+==========
+
+Very weird. Guess it has to do with caching or instruction parallellism
+or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
+observation was that this one is only 30% slower (according to time)
+executing the code as my 3Ghz D920 processor.
+
+Well, it was expected not to be easy so maybe instead move to a
+different track: let's move back to the code from attempt2 and do some
+loop unrolling. This will eliminate a few if statements. I'll try
+different amounts of unrolling to see what works best.
+
+
+Attempt 4
+=========
+
+Unrolled the loop 1, 2, 3 and 4 times.
+For 4 the code starts with:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        rp4 ^= cur;
+        rp6 ^= cur;
+        rp8 ^= cur;
+        rp10 ^= cur;
+        if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
+        cur = *bp++;
+        par ^= cur;
+        rp5 ^= cur;
+        rp6 ^= cur;
+        ...
+
+
+Analysis 4
+==========
+
+Unrolling once gains about 15%
+Unrolling twice keeps the gain at about 15%
+Unrolling three times gives a gain of 30% compared to attempt 2.
+Unrolling four times gives a marginal improvement compared to unrolling
+three times.
+
+I decided to proceed with a four time unrolled loop anyway. It was my gut
+feeling that in the next steps I would obtain additional gain from it.
+
+The next step was triggered by the fact that par contains the xor of all
+bytes and rp4 and rp5 each contain the xor of half of the bytes.
+So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
+that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
+eliminate rp5 (or rp4, but I already foresaw another optimisation).
+The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
+
+
+Attempt 5
+=========
+
+Effectively so all odd digit rp assignments in the loop were removed.
+This included the else clause of the if statements.
+Of course after the loop we need to correct things by adding code like:
+    rp5 = par ^ rp4;
+Also the initial assignments (rp5 = 0; etc) could be removed.
+Along the line I also removed the initialisation of rp0/1/2/3.
+
+
+Analysis 5
+==========
+
+Measurements showed this was a good move. The run-time roughly halved
+compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
+of the processor time compared to the current code in the linux kernel.
+
+However, still I thought there was more. I didn't like all the if
+statements. Why not keep a running parity and only keep the last if
+statement. Time for yet another version!
+
+
+Attempt 6
+=========
+
+THe code within the for loop was changed to:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= cur;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+	    par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+
+As you can see tmppar is used to accumulate the parity within a for
+iteration. In the last 3 statements is is added to par and, if needed,
+to rp12 and rp14.
+
+While making the changes I also found that I could exploit that tmppar
+contains the running parity for this iteration. So instead of having:
+rp4 ^= cur; rp6 = cur;
+I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
+statement. A similar change was done for rp8 and rp10
+
+
+Analysis 6
+==========
+
+Measuring this code again showed big gain. When executing the original
+linux code 1 million times, this took about 1 second on my system.
+(using time to measure the performance). After this iteration I was back
+to 0.075 sec. Actually I had to decide to start measuring over 10
+million interations in order not to loose too much accuracy. This one
+definitely seemed to be the jackpot!
+
+There is a little bit more room for improvement though. There are three
+places with statements:
+rp4 ^= cur; rp6 ^= cur;
+It seems more efficient to also maintain a variable rp4_6 in the while
+loop; This eliminates 3 statements per loop. Of course after the loop we
+need to correct by adding:
+    rp4 ^= rp4_6;
+    rp6 ^= rp4_6
+Furthermore there are 4 sequential assingments to rp8. This can be
+encoded slightly more efficient by saving tmppar before those 4 lines
+and later do rp8 = rp8 ^ tmppar ^ notrp8;
+(where notrp8 is the value of rp8 before those 4 lines).
+Again a use of the commutative property of xor.
+Time for a new test!
+
+
+Attempt 7
+=========
+
+The new code now looks like:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+	    notrp8 = tmppar;
+	    cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+	    rp8 = rp8 ^ tmppar ^ notrp8;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+	    par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+    rp4 ^= rp4_6;
+    rp6 ^= rp4_6;
+
+
+Not a big change, but every penny counts :-)
+
+
+Analysis 7
+==========
+
+Acutally this made things worse. Not very much, but I don't want to move
+into the wrong direction. Maybe something to investigate later. Could
+have to do with caching again.
+
+Guess that is what there is to win within the loop. Maybe unrolling one
+more time will help. I'll keep the optimisations from 7 for now.
+
+
+Attempt 8
+=========
+
+Unrolled the loop one more time.
+
+
+Analysis 8
+==========
+
+This makes things worse. Let's stick with attempt 6 and continue from there.
+Although it seems that the code within the loop cannot be optimised
+further there is still room to optimize the generation of the ecc codes.
+We can simply calcualate the total parity. If this is 0 then rp4 = rp5
+etc. If the parity is 1, then rp4 = !rp5;
+But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
+in the result byte and then do something like
+    code[0] |= (code[0] << 1);
+Lets test this.
+
+
+Attempt 9
+=========
+
+Changed the code but again this slightly degrades performance. Tried all
+kind of other things, like having dedicated parity arrays to avoid the
+shift after parity[rp7] << 7; No gain.
+Change the lookup using the parity array by using shift operators (e.g.
+replace parity[rp7] << 7 with:
+rp7 ^= (rp7 << 4);
+rp7 ^= (rp7 << 2);
+rp7 ^= (rp7 << 1);
+rp7 &= 0x80;
+No gain.
+
+The only marginal change was inverting the parity bits, so we can remove
+the last three invert statements.
+
+Ah well, pity this does not deliver more. Then again 10 million
+iterations using the linux driver code takes between 13 and 13.5
+seconds, whereas my code now takes about 0.73 seconds for those 10
+million iterations. So basically I've improved the performance by a
+factor 18 on my system. Not that bad. Of course on different hardware
+you will get different results. No warranties!
+
+But of course there is no such thing as a free lunch. The codesize almost
+tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
+
+
+Correcting errors
+=================
+
+For correcting errors I again used the ST application note as a starter,
+but I also peeked at the existing code.
+The algorithm itself is pretty straightforward. Just xor the given and
+the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
+are 1 we have one correctable bit error. If there is 1 bit 1, we have an
+error in the given ecc code.
+It proved to be fastest to do some table lookups. Performance gain
+introduced by this is about a factor 2 on my system when a repair had to
+be done, and 1% or so if no repair had to be done.
+Code size increased from 330 bytes to 686 bytes for this function.
+(gcc 4.2, -O3)
+
+
+Conclusion
+==========
+
+The gain when calculating the ecc is tremendous. Om my development hardware
+a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
+embedded system with a MIPS core a factor 7 was obtained.
+On  a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+5 (big endian mode, gcc 4.1.2, -O3)
+For correction not much gain could be obtained (as bitflips are rare). Then
+again there are also much less cycles spent there.
+
+It seems there is not much more gain possible in this, at least when
+programmed in C. Of course it might be possible to squeeze something more
+out of it with an assembler program, but due to pipeline behaviour etc
+this is very tricky (at least for intel hw).
+
+Author: Frans Meulenbroeks
+Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 5ce0952aa06..10a0263ebb3 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,7 +95,9 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running timers.
+'q'     - Will dump per CPU lists of all armed hrtimers (but NOT regular
+          timer_list timers) and detailed information about all
+          clockevent devices.
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
new file mode 100644
index 00000000000..5d354e16749
--- /dev/null
+++ b/Documentation/tracepoints.txt
@@ -0,0 +1,101 @@
+	             Using the Linux Kernel Tracepoints
+
+			    Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It provides
+examples of how to insert tracepoints in the kernel and connect probe functions
+to them and provides some examples of probe functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe) that you
+can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
+"off" (no probe is attached). When a tracepoint is "off" it has no effect,
+except for adding a tiny time penalty (checking a condition for a branch) and
+space penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+tracepoint is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the tracepoint
+site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a header
+file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/subsys.h :
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_eventname,
+	TPPTOTO(int firstarg, struct task_struct *p),
+	TPARGS(firstarg, p));
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include <trace/subsys.h>
+
+void somefct(void)
+{
+	...
+	trace_subsys_eventname(arg, task);
+	...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+    - subsys is the name of your subsystem.
+    - eventname is the name of the event to trace.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
+  called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a probe
+(function to call) for the specific tracepoint through
+register_trace_subsys_eventname().  Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe sure there is no
+caller left using the probe when it returns. Probe removal is preempt-safe
+because preemption is disabled around the probe call. See the "Probe example"
+section below for a sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the same
+tracepoint, but a single definition must be made of a given tracepoint name over
+all the kernel to make sure no type conflict will occur. Name mangling of the
+tracepoints is done using the prototypes to make sure typing is correct.
+Verification of probe type correctness is done at the registration site by the
+compiler. Tracepoints can be put in inline functions, inlined static functions,
+and unrolled loops as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention intended
+to limit collisions. Tracepoint names are global to the kernel: they are
+considered as being the same whether they are in the core kernel image or in
+modules.
+
+
+* Probe / tracepoint example
+
+See the example provided in samples/tracepoints/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe tracepoint-example (insmod order is not important)
+modprobe tracepoint-probe-example
+cat /proc/tracepoint-example (returns an expected error)
+rmmod tracepoint-example tracepoint-probe-example
+dmesg
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index a4afb560a45..5bbbe209622 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug
 $ echo mmiotrace > /debug/tracing/current_tracer
 $ cat /debug/tracing/trace_pipe > mydump.txt &
 Start X or whatever.
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 $ echo none > /debug/tracing/current_tracer
 Check for lost events.
 
@@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background.
 Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
 accesses to areas that are ioremapped while mmiotrace is active.
 
-[Unimplemented feature:]
 During tracing you can place comments (markers) into the trace by
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 This makes it easier to see which part of the (huge) trace corresponds to
 which action. It is recommended to place descriptive markers about what you
 do.
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
new file mode 100644
index 00000000000..125eed560e5
--- /dev/null
+++ b/Documentation/vm/unevictable-lru.txt
@@ -0,0 +1,615 @@
+
+This document describes the Linux memory management "Unevictable LRU"
+infrastructure and the use of this infrastructure to manage several types
+of "unevictable" pages.  The document attempts to provide the overall
+rationale behind this mechanism and the rationale for some of the design
+decisions that drove the implementation.  The latter design rationale is
+discussed in the context of an implementation description.  Admittedly, one
+can obtain the implementation details--the "what does it do?"--by reading the
+code.  One hopes that the descriptions below add value by provide the answer
+to "why does it do that?".
+
+Unevictable LRU Infrastructure:
+
+The Unevictable LRU adds an additional LRU list to track unevictable pages
+and to hide these pages from vmscan.  This mechanism is based on a patch by
+Larry Woodman of Red Hat to address several scalability problems with page
+reclaim in Linux.  The problems have been observed at customer sites on large
+memory x86_64 systems.  For example, a non-numal x86_64 platform with 128GB
+of main memory will have over 32 million 4k pages in a single zone.  When a
+large fraction of these pages are not evictable for any reason [see below],
+vmscan will spend a lot of time scanning the LRU lists looking for the small
+fraction of pages that are evictable.  This can result in a situation where
+all cpus are spending 100% of their time in vmscan for hours or days on end,
+with the system completely unresponsive.
+
+The Unevictable LRU infrastructure addresses the following classes of
+unevictable pages:
+
++ page owned by ramfs
++ page mapped into SHM_LOCKed shared memory regions
++ page mapped into VM_LOCKED [mlock()ed] vmas
+
+The infrastructure might be able to handle other conditions that make pages
+unevictable, either by definition or by circumstance, in the future.
+
+
+The Unevictable LRU List
+
+The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
+called the "unevictable" list and an associated page flag, PG_unevictable, to
+indicate that the page is being managed on the unevictable list.  The
+PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active
+flag in that it indicates on which LRU list a page resides when PG_lru is set.
+The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU
+Kconfig option.
+
+The Unevictable LRU infrastructure maintains unevictable pages on an additional
+LRU list for a few reasons:
+
+1) We get to "treat unevictable pages just like we treat other pages in the
+   system, which means we get to use the same code to manipulate them, the
+   same code to isolate them (for migrate, etc.), the same code to keep track
+   of the statistics, etc..." [Rik van Riel]
+
+2) We want to be able to migrate unevictable pages between nodes--for memory
+   defragmentation, workload management and memory hotplug.  The linux kernel
+   can only migrate pages that it can successfully isolate from the lru lists.
+   If we were to maintain pages elsewise than on an lru-like list, where they
+   can be found by isolate_lru_page(), we would prevent their migration, unless
+   we reworked migration code to find the unevictable pages.
+
+
+The unevictable LRU list does not differentiate between file backed and swap
+backed [anon] pages.  This differentiation is only important while the pages
+are, in fact, evictable.
+
+The unevictable LRU list benefits from the "arrayification" of the per-zone
+LRU lists and statistics originally proposed and posted by Christoph Lameter.
+
+The unevictable list does not use the lru pagevec mechanism. Rather,
+unevictable pages are placed directly on the page's zone's unevictable
+list under the zone lru_lock.  The reason for this is to prevent stranding
+of pages on the unevictable list when one task has the page isolated from the
+lru and other tasks are changing the "evictability" state of the page.
+
+
+Unevictable LRU and Memory Controller Interaction
+
+The memory controller data structure automatically gets a per zone unevictable
+lru list as a result of the "arrayification" of the per-zone LRU lists.  The
+memory controller tracks the movement of pages to and from the unevictable list.
+When a memory control group comes under memory pressure, the controller will
+not attempt to reclaim pages on the unevictable list.  This has a couple of
+effects.  Because the pages are "hidden" from reclaim on the unevictable list,
+the reclaim process can be more efficient, dealing only with pages that have
+a chance of being reclaimed.  On the other hand, if too many of the pages
+charged to the control group are unevictable, the evictable portion of the
+working set of the tasks in the control group may not fit into the available
+memory.  This can cause the control group to thrash or to oom-kill tasks.
+
+
+Unevictable LRU:  Detecting Unevictable Pages
+
+The function page_evictable(page, vma) in vmscan.c determines whether a
+page is evictable or not.  For ramfs pages and pages in SHM_LOCKed regions,
+page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's
+address space using a wrapper function.  Wrapper functions are used to set,
+clear and test the flag to reduce the requirement for #ifdef's throughout the
+source code.  AS_UNEVICTABLE is set on ramfs inode/mapping when it is created.
+This flag remains for the life of the inode.
+
+For shared memory regions, AS_UNEVICTABLE is set when an application
+successfully SHM_LOCKs the region and is removed when the region is
+SHM_UNLOCKed.  Note that shmctl(SHM_LOCK, ...) does not populate the page
+tables for the region as does, for example, mlock().   So, we make no special
+effort to push any pages in the SHM_LOCKed region to the unevictable list.
+Vmscan will do this when/if it encounters the pages during reclaim.  On
+SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the
+unevictable list if no other condition keeps them unevictable.  If a SHM_LOCKed
+region is destroyed, the pages are also "rescued" from the unevictable list in
+the process of freeing them.
+
+page_evictable() detects mlock()ed pages by testing an additional page flag,
+PG_mlocked via the PageMlocked() wrapper.  If the page is NOT mlocked, and a
+non-NULL vma is supplied, page_evictable() will check whether the vma is
+VM_LOCKED via is_mlocked_vma().  is_mlocked_vma() will SetPageMlocked() and
+update the appropriate statistics if the vma is VM_LOCKED.  This method allows
+efficient "culling" of pages in the fault path that are being faulted in to
+VM_LOCKED vmas.
+
+
+Unevictable Pages and Vmscan [shrink_*_list()]
+
+If unevictable pages are culled in the fault path, or moved to the unevictable
+list at mlock() or mmap() time, vmscan will never encounter the pages until
+they have become evictable again, for example, via munlock() and have been
+"rescued" from the unevictable list.  However, there may be situations where we
+decide, for the sake of expediency, to leave a unevictable page on one of the
+regular active/inactive LRU lists for vmscan to deal with.  Vmscan checks for
+such pages in all of the shrink_{active|inactive|page}_list() functions and
+will "cull" such pages that it encounters--that is, it diverts those pages to
+the unevictable list for the zone being scanned.
+
+There may be situations where a page is mapped into a VM_LOCKED vma, but the
+page is not marked as PageMlocked.  Such pages will make it all the way to
+shrink_page_list() where they will be detected when vmscan walks the reverse
+map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
+will cull the page at that point.
+
+Note that for anonymous pages, shrink_page_list() attempts to add the page to
+the swap cache before it tries to unmap the page.  To avoid this unnecessary
+consumption of swap space, shrink_page_list() calls try_to_munlock() to check
+whether any VM_LOCKED vmas map the page without attempting to unmap the page.
+If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page
+without consuming swap space.  try_to_munlock() will be described below.
+
+To "cull" an unevictable page, vmscan simply puts the page back on the lru
+list using putback_lru_page()--the inverse operation to isolate_lru_page()--
+after dropping the page lock.  Because the condition which makes the page
+unevictable may change once the page is unlocked, putback_lru_page() will
+recheck the unevictable state of a page that it places on the unevictable lru
+list.  If the page has become unevictable, putback_lru_page() removes it from
+the list and retries, including the page_unevictable() test.  Because such a
+race is a rare event and movement of pages onto the unevictable list should be
+rare, these extra evictabilty checks should not occur in the majority of calls
+to putback_lru_page().
+
+
+Mlocked Page:  Prior Work
+
+The "Unevictable Mlocked Pages" infrastructure is based on work originally
+posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
+Nick posted his patch as an alternative to a patch posted by Christoph
+Lameter to achieve the same objective--hiding mlocked pages from vmscan.
+In Nick's patch, he used one of the struct page lru list link fields as a count
+of VM_LOCKED vmas that map the page.  This use of the link field for a count
+prevented the management of the pages on an LRU list.  Thus, mlocked pages were
+not migratable as isolate_lru_page() could not find them and the lru list link
+field was not available to the migration subsystem.  Nick resolved this by
+putting mlocked pages back on the lru list before attempting to isolate them,
+thus abandoning the count of VM_LOCKED vmas.  When Nick's patch was integrated
+with the Unevictable LRU work, the count was replaced by walking the reverse
+map to determine whether any VM_LOCKED vmas mapped the page.  More on this
+below.
+
+
+Mlocked Pages:  Basic Management
+
+Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of
+unevictable pages.  When such a page has been "noticed" by the memory
+management subsystem, the page is marked with the PG_mlocked [PageMlocked()]
+flag.  A PageMlocked() page will be placed on the unevictable LRU list when
+it is added to the LRU.   Pages can be "noticed" by memory management in
+several places:
+
+1) in the mlock()/mlockall() system call handlers.
+2) in the mmap() system call handler when mmap()ing a region with the
+   MAP_LOCKED flag, or mmap()ing a region in a task that has called
+   mlockall() with the MCL_FUTURE flag.  Both of these conditions result
+   in the VM_LOCKED flag being set for the vma.
+3) in the fault path, if mlocked pages are "culled" in the fault path,
+   and when a VM_LOCKED stack segment is expanded.
+4) as mentioned above, in vmscan:shrink_page_list() with attempting to
+   reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock().
+
+Mlocked pages become unlocked and rescued from the unevictable list when:
+
+1) mapped in a range unlocked via the munlock()/munlockall() system calls.
+2) munmapped() out of the last VM_LOCKED vma that maps the page, including
+   unmapping at task exit.
+3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file.
+4) before a page is COWed in a VM_LOCKED vma.
+
+
+Mlocked Pages:  mlock()/mlockall() System Call Handling
+
+Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
+for each vma in the range specified by the call.  In the case of mlockall(),
+this is the entire active address space of the task.  Note that mlock_fixup()
+is used for both mlock()ing and munlock()ing a range of memory.  A call to
+mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED
+is treated as a no-op--mlock_fixup() simply returns.
+
+If the vma passes some filtering described in "Mlocked Pages:  Filtering Vmas"
+below, mlock_fixup() will attempt to merge the vma with its neighbors or split
+off a subset of the vma if the range does not cover the entire vma.  Once the
+vma has been merged or split or neither, mlock_fixup() will call
+__mlock_vma_pages_range() to fault in the pages via get_user_pages() and
+to mark the pages as mlocked via mlock_vma_page().
+
+Note that the vma being mlocked might be mapped with PROT_NONE.  In this case,
+get_user_pages() will be unable to fault in the pages.  That's OK.  If pages
+do end up getting faulted into this VM_LOCKED vma, we'll handle them in the
+fault path or in vmscan.
+
+Also note that a page returned by get_user_pages() could be truncated or
+migrated out from under us, while we're trying to mlock it.  To detect
+this, __mlock_vma_pages_range() tests the page_mapping after acquiring
+the page lock.  If the page is still associated with its mapping, we'll
+go ahead and call mlock_vma_page().  If the mapping is gone, we just
+unlock the page and move on.  Worse case, this results in page mapped
+in a VM_LOCKED vma remaining on a normal LRU list without being
+PageMlocked().  Again, vmscan will detect and cull such pages.
+
+mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will
+TestSetPageMlocked() for each page returned by get_user_pages().  We use
+TestSetPageMlocked() because the page might already be mlocked by another
+task/vma and we don't want to do extra work.  We especially do not want to
+count an mlocked page more than once in the statistics.  If the page was
+already mlocked, mlock_vma_page() is done.
+
+If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
+page from the LRU, as it is likely on the appropriate active or inactive list
+at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will
+putback the page--putback_lru_page()--which will notice that the page is now
+mlocked and divert the page to the zone's unevictable LRU list.  If
+mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
+it later if/when it attempts to reclaim the page.
+
+
+Mlocked Pages:  Filtering Special Vmas
+
+mlock_fixup() filters several classes of "special" vmas:
+
+1) vmas with VM_IO|VM_PFNMAP set are skipped entirely.  The pages behind
+   these mappings are inherently pinned, so we don't need to mark them as
+   mlocked.  In any case, most of the pages have no struct page in which to
+   so mark the page.  Because of this, get_user_pages() will fail for these
+   vmas, so there is no sense in attempting to visit them.
+
+2) vmas mapping hugetlbfs page are already effectively pinned into memory.
+   We don't need nor want to mlock() these pages.  However, to preserve the
+   prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup()
+   will call make_pages_present() in the hugetlbfs vma range to allocate the
+   huge pages and populate the ptes.
+
+3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
+   kernel pages, such as the vdso page, relay channel pages, etc.  These pages
+   are inherently unevictable and are not managed on the LRU lists.
+   mlock_fixup() treats these vmas the same as hugetlbfs vmas.  It calls
+   make_pages_present() to populate the ptes.
+
+Note that for all of these special vmas, mlock_fixup() does not set the
+VM_LOCKED flag.  Therefore, we won't have to deal with them later during
+munlock() or munmap()--for example, at task exit.  Neither does mlock_fixup()
+account these vmas against the task's "locked_vm".
+
+Mlocked Pages:  Downgrading the Mmap Semaphore.
+
+mlock_fixup() must be called with the mmap semaphore held for write, because
+it may have to merge or split vmas.  However, mlocking a large region of
+memory can take a long time--especially if vmscan must reclaim pages to
+satisfy the regions requirements.  Faulting in a large region with the mmap
+semaphore held for write can hold off other faults on the address space, in
+the case of a multi-threaded task.  It can also hold off scans of the task's
+address space via /proc.  While testing under heavy load, it was observed that
+the ps(1) command could be held off for many minutes while a large segment was
+mlock()ed down.
+
+To address this issue, and to make the system more responsive during mlock()ing
+of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
+during the call to __mlock_vma_pages_range().  This works fine.  However, the
+callers of mlock_fixup() expect the semaphore to be returned in write mode.
+So, mlock_fixup() "upgrades" the semphore to write mode.  Linux does not
+support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
+and reacquire it in write mode.  In a multi-threaded task, it is possible for
+the task memory map to change while the semaphore is dropped.  Therefore,
+mlock_fixup() looks up the vma at the range start address after reacquiring
+the semaphore in write mode and verifies that it still covers the original
+range.  If not, mlock_fixup() returns an error [-EAGAIN].  All callers of
+mlock_fixup() have been changed to deal with this new error condition.
+
+Note:  when munlocking a region, all of the pages should already be resident--
+unless we have racing threads mlocking() and munlocking() regions.  So,
+unlocking should not have to wait for page allocations nor faults  of any kind.
+Therefore mlock_fixup() does not downgrade the semaphore for munlock().
+
+
+Mlocked Pages:  munlock()/munlockall() System Call Handling
+
+The munlock() and munlockall() system calls are handled by the same functions--
+do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
+vs lock operation indicated by an argument.  So, these system calls are also
+handled by mlock_fixup().  Again, if called for an already munlock()ed vma,
+mlock_fixup() simply returns.  Because of the vma filtering discussed above,
+VM_LOCKED will not be set in any "special" vmas.  So, these vmas will be
+ignored for munlock.
+
+If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off
+the specified range.  The range is then munlocked via the function
+__mlock_vma_pages_range()--the same function used to mlock a vma range--
+passing a flag to indicate that munlock() is being performed.
+
+Because the vma access protections could have been changed to PROT_NONE after
+faulting in and mlocking some pages, get_user_pages() was unreliable for visiting
+these pages for munlocking.  Because we don't want to leave pages mlocked(),
+get_user_pages() was enhanced to accept a flag to ignore the permissions when
+fetching the pages--all of which should be resident as a result of previous
+mlock()ing.
+
+For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
+munlock_vma_page().  munlock_vma_page() unconditionally clears the PG_mlocked
+flag using TestClearPageMlocked().  As with mlock_vma_page(), munlock_vma_page()
+use the Test*PageMlocked() function to handle the case where the page might
+have already been unlocked by another task.  If the page was mlocked,
+munlock_vma_page() updates that zone statistics for the number of mlocked
+pages.  Note, however, that at this point we haven't checked whether the page
+is mapped by other VM_LOCKED vmas.
+
+We can't call try_to_munlock(), the function that walks the reverse map to check
+for other VM_LOCKED vmas, without first isolating the page from the LRU.
+try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
+not be on an lru list.  [More on these below.]  However, the call to
+isolate_lru_page() could fail, in which case we couldn't try_to_munlock().
+So, we go ahead and clear PG_mlocked up front, as this might be the only chance
+we have.  If we can successfully isolate the page, we go ahead and
+try_to_munlock(), which will restore the PG_mlocked flag and update the zone
+page statistics if it finds another vma holding the page mlocked.  If we fail
+to isolate the page, we'll have left a potentially mlocked page on the LRU.
+This is fine, because we'll catch it later when/if vmscan tries to reclaim the
+page.  This should be relatively rare.
+
+Mlocked Pages:  Migrating Them...
+
+A page that is being migrated has been isolated from the lru lists and is
+held locked across unmapping of the page, updating the page's mapping
+[address_space] entry and copying the contents and state, until the
+page table entry has been replaced with an entry that refers to the new
+page.  Linux supports migration of mlocked pages and other unevictable
+pages.  This involves simply moving the PageMlocked and PageUnevictable states
+from the old page to the new page.
+
+Note that page migration can race with mlocking or munlocking of the same
+page.  This has been discussed from the mlock/munlock perspective in the
+respective sections above.  Both processes [migration, m[un]locking], hold
+the page locked.  This provides the first level of synchronization.  Page
+migration zeros out the page_mapping of the old page before unlocking it,
+so m[un]lock can skip these pages by testing the page mapping under page
+lock.
+
+When completing page migration, we place the new and old pages back onto the
+lru after dropping the page lock.  The "unneeded" page--old page on success,
+new page on failure--will be freed when the reference count held by the
+migration process is released.  To ensure that we don't strand pages on the
+unevictable list because of a race between munlock and migration, page
+migration uses the putback_lru_page() function to add migrated pages back to
+the lru.
+
+
+Mlocked Pages:  mmap(MAP_LOCKED) System Call Handling
+
+In addition the the mlock()/mlockall() system calls, an application can request
+that a region of memory be mlocked using the MAP_LOCKED flag with the mmap()
+call.  Furthermore, any mmap() call or brk() call that expands the heap by a
+task that has previously called mlockall() with the MCL_FUTURE flag will result
+in the newly mapped memory being mlocked.  Before the unevictable/mlock changes,
+the kernel simply called make_pages_present() to allocate pages and populate
+the page table.
+
+To mlock a range of memory under the unevictable/mlock infrastructure, the
+mmap() handler and task address space expansion functions call
+mlock_vma_pages_range() specifying the vma and the address range to mlock.
+mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in
+"Mlocked Pages:  Filtering Vmas".  It will clear the VM_LOCKED flag, which will
+have already been set by the caller, in filtered vmas.  Thus these vma's need
+not be visited for munlock when the region is unmapped.
+
+For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
+fault/allocate the pages and mlock them.  Again, like mlock_fixup(),
+mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
+attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore
+back to write mode before returning.
+
+The callers of mlock_vma_pages_range() will have already added the memory
+range to be mlocked to the task's "locked_vm".  To account for filtered vmas,
+mlock_vma_pages_range() returns the number of pages NOT mlocked.  All of the
+callers then subtract a non-negative return value from the task's locked_vm.
+A negative return value represent an error--for example, from get_user_pages()
+attempting to fault in a vma with PROT_NONE access.  In this case, we leave
+the memory range accounted as locked_vm, as the protections could be changed
+later and pages allocated into that region.
+
+
+Mlocked Pages:  munmap()/exit()/exec() System Call Handling
+
+When unmapping an mlocked region of memory, whether by an explicit call to
+munmap() or via an internal unmap from exit() or exec() processing, we must
+munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
+Before the unevictable/mlock changes, mlocking did not mark the pages in any way,
+so unmapping them required no processing.
+
+To munlock a range of memory under the unevictable/mlock infrastructure, the
+munmap() hander and task address space tear down function call
+munlock_vma_pages_all().  The name reflects the observation that one always
+specifies the entire vma range when munlock()ing during unmap of a region.
+Because of the vma filtering when mlocking() regions, only "normal" vmas that
+actually contain mlocked pages will be passed to munlock_vma_pages_all().
+
+munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup()
+for the munlock case, calls __munlock_vma_pages_range() to walk the page table
+for the vma's memory range and munlock_vma_page() each resident page mapped by
+the vma.  This effectively munlocks the page, only if this is the last
+VM_LOCKED vma that maps the page.
+
+
+Mlocked Page:  try_to_unmap()
+
+[Note:  the code changes represented by this section are really quite small
+compared to the text to describe what happening and why, and to discuss the
+implications.]
+
+Pages can, of course, be mapped into multiple vmas.  Some of these vmas may
+have VM_LOCKED flag set.  It is possible for a page mapped into one or more
+VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one
+of the active or inactive LRU lists.  This could happen if, for example, a
+task in the process of munlock()ing the page could not isolate the page from
+the LRU.  As a result, vmscan/shrink_page_list() might encounter such a page
+as described in "Unevictable Pages and Vmscan [shrink_*_list()]".  To
+handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED
+vmas while it is walking a page's reverse map.
+
+try_to_unmap() is always called, by either vmscan for reclaim or for page
+migration, with the argument page locked and isolated from the LRU.  BUG_ON()
+assertions enforce this requirement.  Separate functions handle anonymous and
+mapped file pages, as these types of pages have different reverse map
+mechanisms.
+
+	try_to_unmap_anon()
+
+To unmap anonymous pages, each vma in the list anchored in the anon_vma must be
+visited--at least until a VM_LOCKED vma is encountered.  If the page is being
+unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked
+pages are migratable.  However, for reclaim, if the page is mapped into a
+VM_LOCKED vma, the scan stops.  try_to_unmap() attempts to acquire the mmap
+semphore of the mm_struct to which the vma belongs in read mode.  If this is
+successful, try_to_unmap() will mlock the page via mlock_vma_page()--we
+wouldn't have gotten to try_to_unmap() if the page were already mlocked--and
+will return SWAP_MLOCK, indicating that the page is unevictable.  If the
+mmap semaphore cannot be acquired, we are not sure whether the page is really
+unevictable or not.  In this case, try_to_unmap() will return SWAP_AGAIN.
+
+	try_to_unmap_file() -- linear mappings
+
+Unmapping of a mapped file page works the same, except that the scan visits
+all vmas that maps the page's index/page offset in the page's mapping's
+reverse map priority search tree.  It must also visit each vma in the page's
+mapping's non-linear list, if the list is non-empty.  As for anonymous pages,
+on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will
+attempt to acquire the associated mm_struct's mmap semaphore to mlock the page,
+returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not.
+
+	try_to_unmap_file() -- non-linear mappings
+
+If a page's mapping contains a non-empty non-linear mapping vma list, then
+try_to_un{map|lock}() must also visit each vma in that list to determine
+whether the page is mapped in a VM_LOCKED vma.  Again, the scan must visit
+all vmas in the non-linear list to ensure that the pages is not/should not be
+mlocked.  If a VM_LOCKED vma is found in the list, the scan could terminate.
+However, there is no easy way to determine whether the page is actually mapped
+in a given vma--either for unmapping or testing whether the VM_LOCKED vma
+actually pins the page.
+
+So, try_to_unmap_file() handles non-linear mappings by scanning a certain
+number of pages--a "cluster"--in each non-linear vma associated with the page's
+mapping, for each file mapped page that vmscan tries to unmap.  If this happens
+to unmap the page we're trying to unmap, try_to_unmap() will notice this on
+return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS.  Otherwise, it
+will return SWAP_AGAIN, causing vmscan to recirculate this page.  We take
+advantage of the cluster scan in try_to_unmap_cluster() as follows:
+
+For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap
+semaphore of the associated mm_struct for read without blocking.  If this
+attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will
+retain the mmap semaphore for the scan; otherwise it drops it here.  Then,
+for each page in the cluster, if we're holding the mmap semaphore for a locked
+vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page.  This
+call is a no-op if the page is already locked, but will mlock any pages in
+the non-linear mapping that happen to be unlocked.  If one of the pages so
+mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will
+return SWAP_MLOCK, rather than the default SWAP_AGAIN.  This will allow vmscan
+to cull the page, rather than recirculating it on the inactive list.  Again,
+if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns
+SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but
+couldn't be mlocked.
+
+
+Mlocked pages:  try_to_munlock() Reverse Map Scan
+
+TODO/FIXME:  a better name might be page_mlocked()--analogous to the
+page_referenced() reverse map walker--especially if we continue to call this
+from shrink_page_list().  See related TODO/FIXME below.
+
+When munlock_vma_page()--see "Mlocked Pages:  munlock()/munlockall() System
+Call Handling" above--tries to munlock a page, or when shrink_page_list()
+encounters an anonymous page that is not yet in the swap cache, they need to
+determine whether or not the page is mapped by any VM_LOCKED vma, without
+actually attempting to unmap all ptes from the page.  For this purpose, the
+unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
+try_to_munlock().
+
+try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
+mapped file pages with an additional argument specifing unlock versus unmap
+processing.  Again, these functions walk the respective reverse maps looking
+for VM_LOCKED vmas.  When such a vma is found for anonymous pages and file
+pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
+attempt to acquire the associated mmap semphore, mlock the page via
+mlock_vma_page() and return SWAP_MLOCK.  This effectively undoes the
+pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs
+shrink_page_list() that the anonymous page should be culled rather than added
+to the swap cache in preparation for a try_to_unmap() that will almost
+certainly fail.
+
+If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
+semaphore, it will return SWAP_AGAIN.  This will allow shrink_page_list()
+to recycle the page on the inactive list and hope that it has better luck
+with the page next time.
+
+For file pages mapped into non-linear vmas, the try_to_munlock() logic works
+slightly differently.  On encountering a VM_LOCKED non-linear vma that might
+map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking
+the page.  munlock_vma_page() will just leave the page unlocked and let
+vmscan deal with it--the usual fallback position.
+
+Note that try_to_munlock()'s reverse map walk must visit every vma in a pages'
+reverse map to determine that a page is NOT mapped into any VM_LOCKED vma.
+However, the scan can terminate when it encounters a VM_LOCKED vma and can
+successfully acquire the vma's mmap semphore for read and mlock the page.
+Although try_to_munlock() can be called many [very many!] times when
+munlock()ing a large region or tearing down a large address space that has been
+mlocked via mlockall(), overall this is a fairly rare event.  In addition,
+although shrink_page_list() calls try_to_munlock() for every anonymous page that
+it handles that is not yet in the swap cache, on average anonymous pages will
+have very short reverse map lists.
+
+Mlocked Page:  Page Reclaim in shrink_*_list()
+
+shrink_active_list() culls any obviously unevictable pages--i.e.,
+!page_evictable(page, NULL)--diverting these to the unevictable lru
+list.  However, shrink_active_list() only sees unevictable pages that
+made it onto the active/inactive lru lists.  Note that these pages do not
+have PageUnevictable set--otherwise, they would be on the unevictable list and
+shrink_active_list would never see them.
+
+Some examples of these unevictable pages on the LRU lists are:
+
+1) ramfs pages that have been placed on the lru lists when first allocated.
+
+2) SHM_LOCKed shared memory pages.  shmctl(SHM_LOCK) does not attempt to
+   allocate or fault in the pages in the shared memory region.  This happens
+   when an application accesses the page the first time after SHM_LOCKing
+   the segment.
+
+3) Mlocked pages that could not be isolated from the lru and moved to the
+   unevictable list in mlock_vma_page().
+
+3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't
+   acquire the vma's mmap semaphore to test the flags and set PageMlocked.
+   munlock_vma_page() was forced to let the page back on to the normal
+   LRU list for vmscan to handle.
+
+shrink_inactive_list() also culls any unevictable pages that it finds
+on the inactive lists, again diverting them to the appropriate zone's unevictable
+lru list.  shrink_inactive_list() should only see SHM_LOCKed pages that became
+SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
+pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
+the lru to recheck via try_to_munlock().  shrink_inactive_list() won't notice
+the latter, but will pass on to shrink_page_list().
+
+shrink_page_list() again culls obviously unevictable pages that it could
+encounter for similar reason to shrink_inactive_list().  As already discussed,
+shrink_page_list() proactively looks for anonymous pages that should have
+PG_mlocked set but don't--these would not be detected by page_evictable()--to
+avoid adding them to the swap cache unnecessarily.  File pages mapped into
+VM_LOCKED vmas but without PG_mlocked set will make it all the way to
+try_to_unmap().  shrink_page_list() will divert them to the unevictable list when
+try_to_unmap() returns SWAP_MLOCK, as discussed above.
+
+TODO/FIXME:  If we can enhance the swap cache to reliably remove entries
+with page_count(page) > 2, as long as all ptes are mapped to the page and
+not the swap entry, we can probably remove the call to try_to_munlock() in
+shrink_page_list() and just remove the page from the swap cache when
+try_to_unmap() returns SWAP_MLOCK.   Currently, remove_exclusive_swap_page()
+doesn't seem to allow that.
+
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 20852862104..a2afc494de4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1204,7 +1204,7 @@ S:	Maintained
 
 CPU FREQUENCY DRIVERS
 P:	Dave Jones
-M:	davej@codemonkey.org.uk
+M:	davej@redhat.com
 L:	cpufreq@vger.kernel.org
 W:	http://www.codemonkey.org.uk/projects/cpufreq/
 T:	git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
@@ -2934,9 +2934,9 @@ S:	Maintained
 
 NETEFFECT IWARP RNIC DRIVER (IW_NES)
 P:	Faisal Latif
-M:	flatif@neteffect.com
+M:	faisal.latif@intel.com
 P:	Chien Tung
-M:	ctung@neteffect.com
+M:	chien.tin.tung@intel.com
 L:	general@lists.openfabrics.org
 W:	http://www.neteffect.com
 S:	Supported
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index a0f642b6a4b..6110197757a 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,6 +70,7 @@ config AUTO_IRQ_AFFINITY
 	default y
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 
 menu "System setup"
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 15fda434442..d069526bd76 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -74,12 +74,14 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_UAC_SIGBUS		7
 #define TIF_MEMDIE		8
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
+#define TIF_FREEZE		16	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 /* Work to do on interrupt/exception return.  */
 #define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED)
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 04dcc5e5d4c..9cd8dca742a 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -655,7 +655,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
 
 	case 0x71:					/* RTC_PORT(1) */
 		rtc_access.index = index;
-		rtc_access.data = BCD_TO_BIN(b);
+		rtc_access.data = bcd2bin(b);
 		rtc_access.function = 0x48 + !write;	/* GET/PUT_TOY */
 
 #ifdef CONFIG_SMP
@@ -668,7 +668,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
 #else
 		__marvel_access_rtc(&rtc_access);
 #endif
-		ret = BIN_TO_BCD(rtc_access.data);
+		ret = bin2bcd(rtc_access.data);
 		break;
 
 	default:
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 99a7f19da13..a4555f49763 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct
 
 static irq_swizzle_t *sable_lynx_irq_swizzle;
 
-static void sable_lynx_init_irq(int nr_irqs);
+static void sable_lynx_init_irq(int nr_of_irqs);
 
 #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
 
@@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
 }
 
 static void __init
-sable_lynx_init_irq(int nr_irqs)
+sable_lynx_init_irq(int nr_of_irqs)
 {
 	long i;
 
-	for (i = 0; i < nr_irqs; ++i) {
+	for (i = 0; i < nr_of_irqs; ++i) {
 		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
 		irq_desc[i].chip = &sable_lynx_irq_type;
 	}
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 75480cab089..e6a231435cb 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -346,12 +346,12 @@ time_init(void)
 	year = CMOS_READ(RTC_YEAR);
 
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hour);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mon);
-		BCD_TO_BIN(year);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
 	}
 
 	/* PC-like is standard; used for year >= 70 */
@@ -525,7 +525,7 @@ set_rtc_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -543,8 +543,8 @@ set_rtc_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds,RTC_SECONDS);
 		CMOS_WRITE(real_minutes,RTC_MINUTES);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e1521f32e55..f504c801792 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -192,6 +192,8 @@ config VECTORS_BASE
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System Type"
 
 choice
@@ -354,7 +356,7 @@ config ARCH_IXP4XX
 	select GENERIC_GPIO
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
-	select ZONE_DMA if PCI
+	select DMABOUNCE if PCI
 	help
 	  Support for Intel's IXP4XX (XScale) family of processors.
 
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 2e32acca02f..86b5e698266 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -13,10 +13,10 @@ config ICST307
 config SA1111
 	bool
 	select DMABOUNCE if !ARCH_PXA
-	select ZONE_DMA if !ARCH_PXA
 
 config DMABOUNCE
 	bool
+	select ZONE_DMA
 
 config TIMER_ACORN
 	bool
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index fb86f248aab..47ccec95f3e 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -581,6 +581,7 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
 		goto out;
 	}
 
+#ifdef CONFIG_DMABOUNCE
 	/*
 	 * If the parent device has a DMA mask associated with it,
 	 * propagate it down to the children.
@@ -598,6 +599,7 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
 			}
 		}
 	}
+#endif
 
 out:
 	return ret;
@@ -937,7 +939,7 @@ static int sa1111_resume(struct platform_device *dev)
 #define sa1111_resume  NULL
 #endif
 
-static int sa1111_probe(struct platform_device *pdev)
+static int __devinit sa1111_probe(struct platform_device *pdev)
 {
 	struct resource *mem;
 	int irq;
diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig
index 8b7a431a8bf..9033d147f05 100644
--- a/arch/arm/configs/trizeps4_defconfig
+++ b/arch/arm/configs/trizeps4_defconfig
@@ -147,6 +147,7 @@ CONFIG_ARCH_PXA=y
 # CONFIG_MACH_MAINSTONE is not set
 # CONFIG_ARCH_PXA_IDP is not set
 # CONFIG_PXA_SHARPSL is not set
+CONFIG_TRIZEPS_PXA=y
 CONFIG_MACH_TRIZEPS4=y
 CONFIG_MACH_TRIZEPS4_CONXS=y
 # CONFIG_MACH_TRIZEPS4_ANY is not set
diff --git a/arch/arm/mach-clps711x/include/mach/memory.h b/arch/arm/mach-clps711x/include/mach/memory.h
index 71c2fa70c8e..98ec30c97bb 100644
--- a/arch/arm/mach-clps711x/include/mach/memory.h
+++ b/arch/arm/mach-clps711x/include/mach/memory.h
@@ -89,6 +89,8 @@
  * 	node 3:  0xd8000000 - 0xdfffffff
  */
 #define NODE_MEM_SIZE_BITS	24
+#define SECTION_SIZE_BITS	24
+#define MAX_PHYSMEM_BITS	32
 
 #endif
 
diff --git a/arch/arm/mach-iop13xx/include/mach/time.h b/arch/arm/mach-iop13xx/include/mach/time.h
index 49213d9d7ca..d6d52527589 100644
--- a/arch/arm/mach-iop13xx/include/mach/time.h
+++ b/arch/arm/mach-iop13xx/include/mach/time.h
@@ -41,7 +41,7 @@ static inline unsigned long iop13xx_core_freq(void)
 		return 1200000000;
 	default:
 		printk("%s: warning unknown frequency, defaulting to 800Mhz\n",
-			__FUNCTION__);
+			__func__);
 	}
 
 	return 800000000;
@@ -60,7 +60,7 @@ static inline unsigned long iop13xx_xsi_bus_ratio(void)
 		return 4;
 	default:
 		printk("%s: warning unknown ratio, defaulting to 2\n",
-			__FUNCTION__);
+			__func__);
 	}
 
 	return 2;
diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c
index b0653a87159..30451300751 100644
--- a/arch/arm/mach-ixp2000/ixdp2x00.c
+++ b/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = {
 	.unmask	= ixdp2x00_irq_unmask
 };
 
-void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
+void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs)
 {
 	unsigned int irq;
 
@@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne
 
 	board_irq_stat = stat_reg;
 	board_irq_mask = mask_reg;
-	board_irq_count = nr_irqs;
+	board_irq_count = nr_of_irqs;
 
 	*board_irq_mask = 0xffffffff;
 
diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig
index db8b5fe06c0..2c5a02b8520 100644
--- a/arch/arm/mach-ixp4xx/Kconfig
+++ b/arch/arm/mach-ixp4xx/Kconfig
@@ -167,11 +167,6 @@ config MACH_GTWX5715
 
 comment "IXP4xx Options"
 
-config DMABOUNCE
-	bool
-	default y
-	depends on PCI
-
 config IXP4XX_INDIRECT_PCI
 	bool "Use indirect PCI memory access"
 	depends on PCI
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 85cad05d8c5..0bb1fbd84cc 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -16,6 +16,7 @@
 #include <linux/mv643xx_eth.h>
 #include <linux/ata_platform.h>
 #include <linux/spi/orion_spi.h>
+#include <net/dsa.h>
 #include <asm/page.h>
 #include <asm/timex.h>
 #include <asm/mach/map.h>
@@ -152,6 +153,40 @@ void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 
 
 /*****************************************************************************
+ * Ethernet switch
+ ****************************************************************************/
+static struct resource kirkwood_switch_resources[] = {
+	{
+		.start	= 0,
+		.end	= 0,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device kirkwood_switch_device = {
+	.name		= "dsa",
+	.id		= 0,
+	.num_resources	= 0,
+	.resource	= kirkwood_switch_resources,
+};
+
+void __init kirkwood_ge00_switch_init(struct dsa_platform_data *d, int irq)
+{
+	if (irq != NO_IRQ) {
+		kirkwood_switch_resources[0].start = irq;
+		kirkwood_switch_resources[0].end = irq;
+		kirkwood_switch_device.num_resources = 1;
+	}
+
+	d->mii_bus = &kirkwood_ge00_shared.dev;
+	d->netdev = &kirkwood_ge00.dev;
+	kirkwood_switch_device.dev.platform_data = d;
+
+	platform_device_register(&kirkwood_switch_device);
+}
+
+
+/*****************************************************************************
  * SoC RTC
  ****************************************************************************/
 static struct resource kirkwood_rtc_resource = {
diff --git a/arch/arm/mach-kirkwood/common.h b/arch/arm/mach-kirkwood/common.h
index 8fa0f6a2763..5774632a67e 100644
--- a/arch/arm/mach-kirkwood/common.h
+++ b/arch/arm/mach-kirkwood/common.h
@@ -11,6 +11,7 @@
 #ifndef __ARCH_KIRKWOOD_COMMON_H
 #define __ARCH_KIRKWOOD_COMMON_H
 
+struct dsa_platform_data;
 struct mv643xx_eth_platform_data;
 struct mv_sata_platform_data;
 
@@ -29,6 +30,7 @@ void kirkwood_pcie_id(u32 *dev, u32 *rev);
 
 void kirkwood_ehci_init(void);
 void kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data);
+void kirkwood_ge00_switch_init(struct dsa_platform_data *d, int irq);
 void kirkwood_pcie_init(void);
 void kirkwood_rtc_init(void);
 void kirkwood_sata_init(struct mv_sata_platform_data *sata_data);
diff --git a/arch/arm/mach-kirkwood/rd88f6281-setup.c b/arch/arm/mach-kirkwood/rd88f6281-setup.c
index f785093e433..175054abd63 100644
--- a/arch/arm/mach-kirkwood/rd88f6281-setup.c
+++ b/arch/arm/mach-kirkwood/rd88f6281-setup.c
@@ -19,6 +19,7 @@
 #include <linux/ata_platform.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ethtool.h>
+#include <net/dsa.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/pci.h>
@@ -74,6 +75,15 @@ static struct mv643xx_eth_platform_data rd88f6281_ge00_data = {
 	.duplex		= DUPLEX_FULL,
 };
 
+static struct dsa_platform_data rd88f6281_switch_data = {
+	.port_names[0]	= "lan1",
+	.port_names[1]	= "lan2",
+	.port_names[2]	= "lan3",
+	.port_names[3]	= "lan4",
+	.port_names[4]	= "wan",
+	.port_names[5]	= "cpu",
+};
+
 static struct mv_sata_platform_data rd88f6281_sata_data = {
 	.n_ports	= 2,
 };
@@ -87,6 +97,7 @@ static void __init rd88f6281_init(void)
 
 	kirkwood_ehci_init();
 	kirkwood_ge00_init(&rd88f6281_ge00_data);
+	kirkwood_ge00_switch_init(&rd88f6281_switch_data, NO_IRQ);
 	kirkwood_rtc_init();
 	kirkwood_sata_init(&rd88f6281_sata_data);
 	kirkwood_uart0_init();
diff --git a/arch/arm/mach-mv78xx0/db78x00-bp-setup.c b/arch/arm/mach-mv78xx0/db78x00-bp-setup.c
index 49f434c39eb..2e285bbb7bb 100644
--- a/arch/arm/mach-mv78xx0/db78x00-bp-setup.c
+++ b/arch/arm/mach-mv78xx0/db78x00-bp-setup.c
@@ -13,6 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/ata_platform.h>
 #include <linux/mv643xx_eth.h>
+#include <linux/ethtool.h>
 #include <mach/mv78xx0.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -28,10 +29,14 @@ static struct mv643xx_eth_platform_data db78x00_ge01_data = {
 
 static struct mv643xx_eth_platform_data db78x00_ge10_data = {
 	.phy_addr	= MV643XX_ETH_PHY_NONE,
+	.speed		= SPEED_1000,
+	.duplex		= DUPLEX_FULL,
 };
 
 static struct mv643xx_eth_platform_data db78x00_ge11_data = {
 	.phy_addr	= MV643XX_ETH_PHY_NONE,
+	.speed		= SPEED_1000,
+	.duplex		= DUPLEX_FULL,
 };
 
 static struct mv_sata_platform_data db78x00_sata_data = {
diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index d354e0fe447..c40fc378a25 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -119,7 +119,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
 
 void __init omap_init_irq(void)
 {
-	unsigned long nr_irqs = 0;
+	unsigned long nr_of_irqs = 0;
 	unsigned int nr_banks = 0;
 	int i;
 
@@ -133,14 +133,14 @@ void __init omap_init_irq(void)
 
 		omap_irq_bank_init_one(bank);
 
-		nr_irqs += bank->nr_irqs;
+		nr_of_irqs += bank->nr_irqs;
 		nr_banks++;
 	}
 
 	printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
-	       nr_irqs, nr_banks, nr_banks > 1 ? "s" : "");
+	       nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : "");
 
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = 0; i < nr_of_irqs; i++) {
 		set_irq_chip(i, &omap_irq_chip);
 		set_irq_handler(i, handle_level_irq);
 		set_irq_flags(i, IRQF_VALID);
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 9625ef5975d..437065c25c9 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -19,6 +19,7 @@
 #include <linux/mv643xx_i2c.h>
 #include <linux/ata_platform.h>
 #include <linux/spi/orion_spi.h>
+#include <net/dsa.h>
 #include <asm/page.h>
 #include <asm/setup.h>
 #include <asm/timex.h>
@@ -198,6 +199,40 @@ void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
 
 
 /*****************************************************************************
+ * Ethernet switch
+ ****************************************************************************/
+static struct resource orion5x_switch_resources[] = {
+	{
+		.start	= 0,
+		.end	= 0,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device orion5x_switch_device = {
+	.name		= "dsa",
+	.id		= 0,
+	.num_resources	= 0,
+	.resource	= orion5x_switch_resources,
+};
+
+void __init orion5x_eth_switch_init(struct dsa_platform_data *d, int irq)
+{
+	if (irq != NO_IRQ) {
+		orion5x_switch_resources[0].start = irq;
+		orion5x_switch_resources[0].end = irq;
+		orion5x_switch_device.num_resources = 1;
+	}
+
+	d->mii_bus = &orion5x_eth_shared.dev;
+	d->netdev = &orion5x_eth.dev;
+	orion5x_switch_device.dev.platform_data = d;
+
+	platform_device_register(&orion5x_switch_device);
+}
+
+
+/*****************************************************************************
  * I2C
  ****************************************************************************/
 static struct mv64xxx_i2c_pdata orion5x_i2c_pdata = {
@@ -275,7 +310,8 @@ void __init orion5x_sata_init(struct mv_sata_platform_data *sata_data)
  * SPI
  ****************************************************************************/
 static struct orion_spi_info orion5x_spi_plat_data = {
-	.tclk		= 0,
+	.tclk			= 0,
+	.enable_clock_fix	= 1,
 };
 
 static struct resource orion5x_spi_resources[] = {
diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h
index 1f8b2da676a..a000c7c6ee9 100644
--- a/arch/arm/mach-orion5x/common.h
+++ b/arch/arm/mach-orion5x/common.h
@@ -1,6 +1,7 @@
 #ifndef __ARCH_ORION5X_COMMON_H
 #define __ARCH_ORION5X_COMMON_H
 
+struct dsa_platform_data;
 struct mv643xx_eth_platform_data;
 struct mv_sata_platform_data;
 
@@ -29,6 +30,7 @@ void orion5x_setup_pcie_wa_win(u32 base, u32 size);
 void orion5x_ehci0_init(void);
 void orion5x_ehci1_init(void);
 void orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data);
+void orion5x_eth_switch_init(struct dsa_platform_data *d, int irq);
 void orion5x_i2c_init(void);
 void orion5x_sata_init(struct mv_sata_platform_data *sata_data);
 void orion5x_spi_init(void);
diff --git a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
index 500cdadaf09..15f53235ee3 100644
--- a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
@@ -16,6 +16,7 @@
 #include <linux/mtd/physmap.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ethtool.h>
+#include <net/dsa.h>
 #include <asm/mach-types.h>
 #include <asm/gpio.h>
 #include <asm/leds.h>
@@ -93,6 +94,15 @@ static struct mv643xx_eth_platform_data rd88f5181l_fxo_eth_data = {
 	.duplex		= DUPLEX_FULL,
 };
 
+static struct dsa_platform_data rd88f5181l_fxo_switch_data = {
+	.port_names[0]	= "lan2",
+	.port_names[1]	= "lan1",
+	.port_names[2]	= "wan",
+	.port_names[3]	= "cpu",
+	.port_names[5]	= "lan4",
+	.port_names[7]	= "lan3",
+};
+
 static void __init rd88f5181l_fxo_init(void)
 {
 	/*
@@ -107,6 +117,7 @@ static void __init rd88f5181l_fxo_init(void)
 	 */
 	orion5x_ehci0_init();
 	orion5x_eth_init(&rd88f5181l_fxo_eth_data);
+	orion5x_eth_switch_init(&rd88f5181l_fxo_switch_data, NO_IRQ);
 	orion5x_uart0_init();
 
 	orion5x_setup_dev_boot_win(RD88F5181L_FXO_NOR_BOOT_BASE,
diff --git a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
index ebde8141649..8ad3934399d 100644
--- a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
@@ -17,6 +17,7 @@
 #include <linux/mv643xx_eth.h>
 #include <linux/ethtool.h>
 #include <linux/i2c.h>
+#include <net/dsa.h>
 #include <asm/mach-types.h>
 #include <asm/gpio.h>
 #include <asm/leds.h>
@@ -94,6 +95,15 @@ static struct mv643xx_eth_platform_data rd88f5181l_ge_eth_data = {
 	.duplex		= DUPLEX_FULL,
 };
 
+static struct dsa_platform_data rd88f5181l_ge_switch_data = {
+	.port_names[0]	= "lan2",
+	.port_names[1]	= "lan1",
+	.port_names[2]	= "wan",
+	.port_names[3]	= "cpu",
+	.port_names[5]	= "lan4",
+	.port_names[7]	= "lan3",
+};
+
 static struct i2c_board_info __initdata rd88f5181l_ge_i2c_rtc = {
 	I2C_BOARD_INFO("ds1338", 0x68),
 };
@@ -112,6 +122,7 @@ static void __init rd88f5181l_ge_init(void)
 	 */
 	orion5x_ehci0_init();
 	orion5x_eth_init(&rd88f5181l_ge_eth_data);
+	orion5x_eth_switch_init(&rd88f5181l_ge_switch_data, gpio_to_irq(8));
 	orion5x_i2c_init();
 	orion5x_uart0_init();
 
diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
index 40e04953909..262e25e4dac 100644
--- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
@@ -19,6 +19,7 @@
 #include <linux/spi/orion_spi.h>
 #include <linux/spi/flash.h>
 #include <linux/ethtool.h>
+#include <net/dsa.h>
 #include <asm/mach-types.h>
 #include <asm/gpio.h>
 #include <asm/leds.h>
@@ -34,6 +35,15 @@ static struct mv643xx_eth_platform_data rd88f6183ap_ge_eth_data = {
 	.duplex		= DUPLEX_FULL,
 };
 
+static struct dsa_platform_data rd88f6183ap_ge_switch_data = {
+	.port_names[0]	= "lan1",
+	.port_names[1]	= "lan2",
+	.port_names[2]	= "lan3",
+	.port_names[3]	= "lan4",
+	.port_names[4]	= "wan",
+	.port_names[5]	= "cpu",
+};
+
 static struct mtd_partition rd88f6183ap_ge_partitions[] = {
 	{
 		.name	= "kernel",
@@ -79,6 +89,7 @@ static void __init rd88f6183ap_ge_init(void)
 	 */
 	orion5x_ehci0_init();
 	orion5x_eth_init(&rd88f6183ap_ge_eth_data);
+	orion5x_eth_switch_init(&rd88f6183ap_ge_switch_data, gpio_to_irq(3));
 	spi_register_board_info(rd88f6183ap_ge_spi_slave_info,
 				ARRAY_SIZE(rd88f6183ap_ge_spi_slave_info));
 	orion5x_spi_init();
diff --git a/arch/arm/mach-orion5x/wrt350n-v2-setup.c b/arch/arm/mach-orion5x/wrt350n-v2-setup.c
index 9a4fd525646..cc8f8920086 100644
--- a/arch/arm/mach-orion5x/wrt350n-v2-setup.c
+++ b/arch/arm/mach-orion5x/wrt350n-v2-setup.c
@@ -15,6 +15,7 @@
 #include <linux/mtd/physmap.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ethtool.h>
+#include <net/dsa.h>
 #include <asm/mach-types.h>
 #include <asm/gpio.h>
 #include <asm/mach/arch.h>
@@ -105,6 +106,15 @@ static struct mv643xx_eth_platform_data wrt350n_v2_eth_data = {
 	.duplex		= DUPLEX_FULL,
 };
 
+static struct dsa_platform_data wrt350n_v2_switch_data = {
+	.port_names[0]	= "lan2",
+	.port_names[1]	= "lan1",
+	.port_names[2]	= "wan",
+	.port_names[3]	= "cpu",
+	.port_names[5]	= "lan3",
+	.port_names[7]	= "lan4",
+};
+
 static void __init wrt350n_v2_init(void)
 {
 	/*
@@ -119,6 +129,7 @@ static void __init wrt350n_v2_init(void)
 	 */
 	orion5x_ehci0_init();
 	orion5x_eth_init(&wrt350n_v2_eth_data);
+	orion5x_eth_switch_init(&wrt350n_v2_switch_data, NO_IRQ);
 	orion5x_uart0_init();
 
 	orion5x_setup_dev_boot_win(WRT350N_V2_NOR_BOOT_BASE,
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index f27f6b3d6e6..f781873431f 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -257,7 +257,6 @@ config MACH_ARMCORE
 	bool "CompuLab CM-X255/CM-X270 modules"
 	select PXA27x
 	select IWMMXT
-	select ZONE_DMA if PCI
 	select PXA25x
 	select PXA_SSP
 
diff --git a/arch/arm/mach-pxa/include/mach/irqs.h b/arch/arm/mach-pxa/include/mach/irqs.h
index 9c163e19ada..32bb4a2eb7f 100644
--- a/arch/arm/mach-pxa/include/mach/irqs.h
+++ b/arch/arm/mach-pxa/include/mach/irqs.h
@@ -9,7 +9,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#ifndef __ASM_MACH_IRQS_H
+#define __ASM_MACH_IRQS_H
 
 #ifdef CONFIG_PXA_HAVE_ISA_IRQS
 #define PXA_ISA_IRQ(x)	(x)
@@ -264,3 +265,5 @@
 #endif
 
 #endif /* CONFIG_PCI_HOST_ITE8152 */
+
+#endif /* __ASM_MACH_IRQS_H */
diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
index eb4b190b665..eb35fca9aea 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
@@ -4,6 +4,43 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
+struct pxa3xx_nand_timing {
+	unsigned int	tCH;  /* Enable signal hold time */
+	unsigned int	tCS;  /* Enable signal setup time */
+	unsigned int	tWH;  /* ND_nWE high duration */
+	unsigned int	tWP;  /* ND_nWE pulse time */
+	unsigned int	tRH;  /* ND_nRE high duration */
+	unsigned int	tRP;  /* ND_nRE pulse width */
+	unsigned int	tR;   /* ND_nWE high to ND_nRE low for read */
+	unsigned int	tWHR; /* ND_nWE high to ND_nRE low for status read */
+	unsigned int	tAR;  /* ND_ALE low to ND_nRE low delay */
+};
+
+struct pxa3xx_nand_cmdset {
+	uint16_t	read1;
+	uint16_t	read2;
+	uint16_t	program;
+	uint16_t	read_status;
+	uint16_t	read_id;
+	uint16_t	erase;
+	uint16_t	reset;
+	uint16_t	lock;
+	uint16_t	unlock;
+	uint16_t	lock_status;
+};
+
+struct pxa3xx_nand_flash {
+	const struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
+	const struct pxa3xx_nand_cmdset *cmdset;
+
+	uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
+	uint32_t page_size;	/* Page size in bytes (PAGE_SZ) */
+	uint32_t flash_width;	/* Width of Flash memory (DWIDTH_M) */
+	uint32_t dfc_width;	/* Width of flash controller(DWIDTH_C) */
+	uint32_t num_blocks;	/* Number of physical blocks in Flash */
+	uint32_t chip_id;
+};
+
 struct pxa3xx_nand_platform_data {
 
 	/* the data flash bus is shared between the Static Memory
@@ -12,8 +49,11 @@ struct pxa3xx_nand_platform_data {
 	 */
 	int	enable_arbiter;
 
-	struct mtd_partition *parts;
-	unsigned int	nr_parts;
+	const struct mtd_partition		*parts;
+	unsigned int				nr_parts;
+
+	const struct pxa3xx_nand_flash * 	flash;
+	size_t					num_flash;
 };
 
 extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info);
diff --git a/arch/arm/mach-pxa/include/mach/spitz.h b/arch/arm/mach-pxa/include/mach/spitz.h
index 31ac26b55bc..e8488dfb7e9 100644
--- a/arch/arm/mach-pxa/include/mach/spitz.h
+++ b/arch/arm/mach-pxa/include/mach/spitz.h
@@ -142,7 +142,7 @@
 
 #define SPITZ_SCP2_GPIO_BASE		(NR_BUILTIN_GPIO + 12)
 #define SPITZ_GPIO_IR_ON		(SPITZ_SCP2_GPIO_BASE + 0)
-#define SPITZ_GPIO_AKIN_PULLUP		(SPITZ_SCP2_GPIO_BASE + 1
+#define SPITZ_GPIO_AKIN_PULLUP		(SPITZ_SCP2_GPIO_BASE + 1)
 #define SPITZ_GPIO_RESERVED_1		(SPITZ_SCP2_GPIO_BASE + 2)
 #define SPITZ_GPIO_RESERVED_2		(SPITZ_SCP2_GPIO_BASE + 3)
 #define SPITZ_GPIO_RESERVED_3		(SPITZ_SCP2_GPIO_BASE + 4)
diff --git a/arch/arm/mach-pxa/include/mach/tosa.h b/arch/arm/mach-pxa/include/mach/tosa.h
index a72803f0461..8bce6d8615b 100644
--- a/arch/arm/mach-pxa/include/mach/tosa.h
+++ b/arch/arm/mach-pxa/include/mach/tosa.h
@@ -59,8 +59,6 @@
  * TC6393XB GPIOs
  */
 #define TOSA_TC6393XB_GPIO_BASE		(NR_BUILTIN_GPIO + 2 * 12)
-#define TOSA_TC6393XB_GPIO(i)		(TOSA_TC6393XB_GPIO_BASE + (i))
-#define TOSA_TC6393XB_GPIO_BIT(gpio)	(1 << (gpio - TOSA_TC6393XB_GPIO_BASE))
 
 #define TOSA_GPIO_TG_ON			(TOSA_TC6393XB_GPIO_BASE + 0)
 #define TOSA_GPIO_L_MUTE		(TOSA_TC6393XB_GPIO_BASE + 1)
diff --git a/arch/arm/mach-pxa/include/mach/zylonite.h b/arch/arm/mach-pxa/include/mach/zylonite.h
index 0d35ca04731..bf6785adccf 100644
--- a/arch/arm/mach-pxa/include/mach/zylonite.h
+++ b/arch/arm/mach-pxa/include/mach/zylonite.h
@@ -30,7 +30,7 @@ extern void zylonite_pxa300_init(void);
 static inline void zylonite_pxa300_init(void)
 {
 	if (cpu_is_pxa300() || cpu_is_pxa310())
-		panic("%s: PXA300/PXA310 not supported\n", __FUNCTION__);
+		panic("%s: PXA300/PXA310 not supported\n", __func__);
 }
 #endif
 
@@ -40,7 +40,7 @@ extern void zylonite_pxa320_init(void);
 static inline void zylonite_pxa320_init(void)
 {
 	if (cpu_is_pxa320())
-		panic("%s: PXA320 not supported\n", __FUNCTION__);
+		panic("%s: PXA320 not supported\n", __func__);
 }
 #endif
 
diff --git a/arch/arm/mach-pxa/pwm.c b/arch/arm/mach-pxa/pwm.c
index 316cd986da5..74e2ead8cee 100644
--- a/arch/arm/mach-pxa/pwm.c
+++ b/arch/arm/mach-pxa/pwm.c
@@ -60,7 +60,7 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
 	do_div(c, 1000000000);
 	period_cycles = c;
 
-	if (period_cycles < 0)
+	if (period_cycles < 1)
 		period_cycles = 1;
 	prescale = (period_cycles - 1) / 1024;
 	pv = period_cycles / (prescale + 1) - 1;
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 130e37e4ebd..a6c4694359c 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -706,16 +706,39 @@ static struct tmio_nand_data tosa_tc6393xb_nand_config = {
 	.badblock_pattern = &tosa_tc6393xb_nand_bbt,
 };
 
-static struct tc6393xb_platform_data tosa_tc6393xb_setup = {
+static int tosa_tc6393xb_setup(struct platform_device *dev)
+{
+	int rc;
+
+	rc = gpio_request(TOSA_GPIO_CARD_VCC_ON, "CARD_VCC_ON");
+	if (rc)
+		goto err_req;
+
+	rc = gpio_direction_output(TOSA_GPIO_CARD_VCC_ON, 1);
+	if (rc)
+		goto err_dir;
+
+	return rc;
+
+err_dir:
+	gpio_free(TOSA_GPIO_CARD_VCC_ON);
+err_req:
+	return rc;
+}
+
+static void tosa_tc6393xb_teardown(struct platform_device *dev)
+{
+	gpio_free(TOSA_GPIO_CARD_VCC_ON);
+}
+
+static struct tc6393xb_platform_data tosa_tc6393xb_data = {
 	.scr_pll2cr	= 0x0cc1,
 	.scr_gper	= 0x3300,
-	.scr_gpo_dsr	=
-		TOSA_TC6393XB_GPIO_BIT(TOSA_GPIO_CARD_VCC_ON),
-	.scr_gpo_doecr	=
-		TOSA_TC6393XB_GPIO_BIT(TOSA_GPIO_CARD_VCC_ON),
 
 	.irq_base	= IRQ_BOARD_START,
 	.gpio_base	= TOSA_TC6393XB_GPIO_BASE,
+	.setup		= tosa_tc6393xb_setup,
+	.teardown	= tosa_tc6393xb_teardown,
 
 	.enable		= tosa_tc6393xb_enable,
 	.disable	= tosa_tc6393xb_disable,
@@ -723,6 +746,8 @@ static struct tc6393xb_platform_data tosa_tc6393xb_setup = {
 	.resume		= tosa_tc6393xb_resume,
 
 	.nand_data	= &tosa_tc6393xb_nand_config,
+
+	.resume_restore = 1,
 };
 
 
@@ -730,7 +755,7 @@ static struct platform_device tc6393xb_device = {
 	.name	= "tc6393xb",
 	.id	= -1,
 	.dev	= {
-		.platform_data	= &tosa_tc6393xb_setup,
+		.platform_data	= &tosa_tc6393xb_data,
 	},
 	.num_resources	= ARRAY_SIZE(tc6393xb_resources),
 	.resource	= tc6393xb_resources,
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index a13dbf3c2c0..a72e3add743 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -399,7 +399,7 @@ static void trizeps4_irda_transceiver_mode(struct device *dev, int mode)
 	/* Switch mode */
 	if (mode & IR_SIRMODE)
 		trizeps_conxs_ircr &= ~ConXS_IRCR_MODE;	/* Slow mode */
-	else if (mode & IR_FIRMODE) {
+	else if (mode & IR_FIRMODE)
 		trizeps_conxs_ircr |= ConXS_IRCR_MODE;	/* Fast mode */
 
 	/* Switch power */
diff --git a/arch/arm/mach-s3c2443/clock.c b/arch/arm/mach-s3c2443/clock.c
index 2f60bf6b8d4..f854e7385e3 100644
--- a/arch/arm/mach-s3c2443/clock.c
+++ b/arch/arm/mach-s3c2443/clock.c
@@ -1033,8 +1033,7 @@ void __init s3c2443_init_clocks(int xtal)
 
 	fclk = pll / s3c2443_fclk_div(clkdiv0);
 	hclk = s3c2443_prediv_getrate(&clk_prediv);
-	hclk = hclk / s3c2443_get_hdiv(clkdiv0);
-	hclk = hclk / ((clkdiv0 & S3C2443_CLKDIV0_HALF_HCLK) ? 2 : 1);
+	hclk /= s3c2443_get_hdiv(clkdiv0);
  	pclk = hclk / ((clkdiv0 & S3C2443_CLKDIV0_HALF_PCLK) ? 2 : 1);
 
 	s3c24xx_setup_clocks(xtal, fclk, hclk, pclk);
diff --git a/arch/arm/mach-sa1100/include/mach/ide.h b/arch/arm/mach-sa1100/include/mach/ide.h
deleted file mode 100644
index 4c99c8f5e61..00000000000
--- a/arch/arm/mach-sa1100/include/mach/ide.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * arch/arm/mach-sa1100/include/mach/ide.h
- *
- * Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre
- *
- * 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl)
- *              Get rid of the special ide_init_hwif_ports() functions
- *              and make a generalised function that can be used by all
- *              architectures.
- */
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-
-#error "This code is broken and needs update to match with current ide support"
-
-
-/*
- * Set up a hw structure for a specified data port, control port and IRQ.
- * This should follow whatever the default interface uses.
- */
-static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
-				       unsigned long ctrl_port, int *irq)
-{
-	unsigned long reg = data_port;
-	int i;
-	int regincr = 1;
-
-	/* The Empeg board has the first two address lines unused */
-	if (machine_is_empeg())
-		regincr = 1 << 2;
-
-	/* The LART doesn't use A0 for IDE */
-	if (machine_is_lart())
-		regincr = 1 << 1;
-
-	memset(hw, 0, sizeof(*hw));
-
-	for (i = 0; i <= 7; i++) {
-		hw->io_ports_array[i] = reg;
-		reg += regincr;
-	}
-
-	hw->io_ports.ctl_addr = ctrl_port;
-
-	if (irq)
-		*irq = 0;
-}
-
-/*
- * This registers the standard ports for this architecture with the IDE
- * driver.
- */
-static __inline__ void
-ide_init_default_hwifs(void)
-{
-    if (machine_is_lart()) {
-#ifdef CONFIG_SA1100_LART
-        hw_regs_t hw;
-
-        /* Enable GPIO as interrupt line */
-        GPDR &= ~LART_GPIO_IDE;
-	set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING);
-
-        /* set PCMCIA interface timing */
-        MECR = 0x00060006;
-
-        /* init the interface */
-	ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL);
-        hw.irq = LART_IRQ_IDE;
-        ide_register_hw(&hw);
-#endif
-    }
-}
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index 33926c9fcda..5786adf1004 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -29,7 +29,7 @@ ENTRY(v4_flush_user_cache_all)
  *	Clean and invalidate the entire cache.
  */
 ENTRY(v4_flush_kern_cache_all)
-#ifdef CPU_CP15
+#ifdef CONFIG_CPU_CP15
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
 	mov	pc, lr
@@ -48,7 +48,7 @@ ENTRY(v4_flush_kern_cache_all)
  *	- flags	- vma_area_struct flags describing address space
  */
 ENTRY(v4_flush_user_cache_range)
-#ifdef CPU_CP15
+#ifdef CONFIG_CPU_CP15
 	mov	ip, #0
 	mcreq	p15, 0, ip, c7, c7, 0		@ flush ID cache
 	mov	pc, lr
@@ -116,7 +116,7 @@ ENTRY(v4_dma_inv_range)
  *	- end	 - virtual end address
  */
 ENTRY(v4_dma_flush_range)
-#ifdef CPU_CP15
+#ifdef CONFIG_CPU_CP15
 	mov	r0, #0
 	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
 #endif
diff --git a/arch/arm/plat-mxc/include/mach/mxc_nand.h b/arch/arm/plat-mxc/include/mach/mxc_nand.h
new file mode 100644
index 00000000000..2b972df22d1
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/mxc_nand.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __ASM_ARCH_NAND_H
+#define __ASM_ARCH_NAND_H
+
+struct mxc_nand_platform_data {
+	int width;	/* data bus width in bytes */
+	int hw_ecc;	/* 0 if supress hardware ECC */
+};
+#endif /* __ASM_ARCH_NAND_H */
diff --git a/arch/arm/plat-omap/include/mach/onenand.h b/arch/arm/plat-omap/include/mach/onenand.h
index d57f20226b2..4649d302c26 100644
--- a/arch/arm/plat-omap/include/mach/onenand.h
+++ b/arch/arm/plat-omap/include/mach/onenand.h
@@ -16,6 +16,10 @@ struct omap_onenand_platform_data {
 	int			gpio_irq;
 	struct mtd_partition	*parts;
 	int			nr_parts;
-	int                     (*onenand_setup)(void __iomem *);
+	int                     (*onenand_setup)(void __iomem *, int freq);
 	int			dma_channel;
 };
+
+int omap2_onenand_rephase(void);
+
+#define ONENAND_MAX_PARTITIONS 8
diff --git a/arch/arm/plat-s3c24xx/pwm-clock.c b/arch/arm/plat-s3c24xx/pwm-clock.c
index b8e854f1b1d..3fad68a1e6b 100644
--- a/arch/arm/plat-s3c24xx/pwm-clock.c
+++ b/arch/arm/plat-s3c24xx/pwm-clock.c
@@ -315,7 +315,7 @@ static int clk_pwm_tin_set_parent(struct clk *clk, struct clk *parent)
 	if (parent == s3c24xx_pwmclk_tclk(id))
 		bits = S3C2410_TCFG1_MUX_TCLK << shift;
 	else if (parent == s3c24xx_pwmclk_tdiv(id))
-		bits = clk_pwm_tdiv_bits(to_tdiv(clk)) << shift;
+		bits = clk_pwm_tdiv_bits(to_tdiv(parent)) << shift;
 	else
 		return -EINVAL;
 
diff --git a/arch/arm/plat-s3c24xx/pwm.c b/arch/arm/plat-s3c24xx/pwm.c
index feb770f2e84..ec56b88866c 100644
--- a/arch/arm/plat-s3c24xx/pwm.c
+++ b/arch/arm/plat-s3c24xx/pwm.c
@@ -56,7 +56,7 @@ static struct clk *clk_scaler[2];
 		}					\
 	}
 
-#define DEFINE_TIMER(_tmr_no, _irq)			\
+#define DEFINE_S3C_TIMER(_tmr_no, _irq)			\
 	.name		= "s3c24xx-pwm",		\
 	.id		= _tmr_no,			\
 	.num_resources	= TIMER_RESOURCE_SIZE,		\
@@ -67,11 +67,11 @@ static struct clk *clk_scaler[2];
  */
 
 struct platform_device s3c_device_timer[] = {
-	[0] = { DEFINE_TIMER(0, IRQ_TIMER0) },
-	[1] = { DEFINE_TIMER(1, IRQ_TIMER1) },
-	[2] = { DEFINE_TIMER(2, IRQ_TIMER2) },
-	[3] = { DEFINE_TIMER(3, IRQ_TIMER3) },
-	[4] = { DEFINE_TIMER(4, IRQ_TIMER4) },
+	[0] = { DEFINE_S3C_TIMER(0, IRQ_TIMER0) },
+	[1] = { DEFINE_S3C_TIMER(1, IRQ_TIMER1) },
+	[2] = { DEFINE_S3C_TIMER(2, IRQ_TIMER2) },
+	[3] = { DEFINE_S3C_TIMER(3, IRQ_TIMER3) },
+	[4] = { DEFINE_S3C_TIMER(4, IRQ_TIMER4) },
 };
 
 static inline int pwm_is_tdiv(struct pwm_device *pwm)
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 7c239a91627..33a5b2969eb 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -72,6 +72,8 @@ config GENERIC_BUG
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System Type and features"
 
 source "kernel/time/Kconfig"
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 294b25f9323..4442f8d2d42 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -96,6 +96,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 /* Note: The masks below must never span more than 16 bits! */
 
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index c36a6d59d6f..310477ba1bb 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
 	struct eic *eic;
 	struct resource *regs;
 	unsigned int i;
-	unsigned int nr_irqs;
+	unsigned int nr_of_irqs;
 	unsigned int int_irq;
 	int ret;
 	u32 pattern;
@@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev)
 	eic_writel(eic, IDR, ~0UL);
 	eic_writel(eic, MODE, ~0UL);
 	pattern = eic_readl(eic, MODE);
-	nr_irqs = fls(pattern);
+	nr_of_irqs = fls(pattern);
 
 	/* Trigger on low level unless overridden by driver */
 	eic_writel(eic, EDGE, 0UL);
@@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev)
 
 	eic->chip = &eic_chip;
 
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = 0; i < nr_of_irqs; i++) {
 		set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
 					 handle_level_irq);
 		set_irq_chip_data(eic->first_irq + i, eic);
@@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev)
 		 eic->regs, int_irq);
 	dev_info(&pdev->dev,
 		 "Handling %u external IRQs, starting with IRQ %u\n",
-		 nr_irqs, eic->first_irq);
+		 nr_of_irqs, eic->first_irq);
 
 	return 0;
 
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 8102c79aaa9..29e71ed6b8a 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -64,8 +64,11 @@ config HARDWARE_PM
 	depends on OPROFILE
 
 source "init/Kconfig"
+
 source "kernel/Kconfig.preempt"
 
+source "kernel/Kconfig.freezer"
+
 menu "Blackfin Processor Options"
 
 comment "Processor and Board Settings"
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index cb66c4da25c..b17aeea8d62 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -62,6 +62,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "General setup"
 
 source "fs/Kconfig.binfmt"
diff --git a/arch/cris/arch-v10/drivers/ds1302.c b/arch/cris/arch-v10/drivers/ds1302.c
index c9aa3904be0..3bdfaf43390 100644
--- a/arch/cris/arch-v10/drivers/ds1302.c
+++ b/arch/cris/arch-v10/drivers/ds1302.c
@@ -215,12 +215,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
 
 	local_irq_restore(flags);
 	
-	BCD_TO_BIN(rtc_tm->tm_sec);
-	BCD_TO_BIN(rtc_tm->tm_min);
-	BCD_TO_BIN(rtc_tm->tm_hour);
-	BCD_TO_BIN(rtc_tm->tm_mday);
-	BCD_TO_BIN(rtc_tm->tm_mon);
-	BCD_TO_BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	/*
 	 * Account for differences between how the RTC uses the values
@@ -295,12 +295,12 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			else
 				yrs -= 1900;	/* RTC (70, 71, ... 99) */
 
-			BIN_TO_BCD(sec);
-			BIN_TO_BCD(min);
-			BIN_TO_BCD(hrs);
-			BIN_TO_BCD(day);
-			BIN_TO_BCD(mon);
-			BIN_TO_BCD(yrs);
+			sec = bin2bcd(sec);
+			min = bin2bcd(min);
+			hrs = bin2bcd(hrs);
+			day = bin2bcd(day);
+			mon = bin2bcd(mon);
+			yrs = bin2bcd(yrs);
 
 			local_irq_save(flags);
 			CMOS_WRITE(yrs, RTC_YEAR);
diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c
index 8769dc91407..1e90c1a9c84 100644
--- a/arch/cris/arch-v10/drivers/pcf8563.c
+++ b/arch/cris/arch-v10/drivers/pcf8563.c
@@ -122,7 +122,7 @@ get_rtc_time(struct rtc_time *tm)
 		       "information is no longer guaranteed!\n", PCF8563_NAME);
 	}
 
-	tm->tm_year  = BCD_TO_BIN(tm->tm_year) +
+	tm->tm_year  = bcd2bin(tm->tm_year) +
 		       ((tm->tm_mon & 0x80) ? 100 : 0);
 	tm->tm_sec  &= 0x7F;
 	tm->tm_min  &= 0x7F;
@@ -131,11 +131,11 @@ get_rtc_time(struct rtc_time *tm)
 	tm->tm_wday &= 0x07; /* Not coded in BCD. */
 	tm->tm_mon  &= 0x1F;
 
-	BCD_TO_BIN(tm->tm_sec);
-	BCD_TO_BIN(tm->tm_min);
-	BCD_TO_BIN(tm->tm_hour);
-	BCD_TO_BIN(tm->tm_mday);
-	BCD_TO_BIN(tm->tm_mon);
+	tm->tm_sec = bcd2bin(tm->tm_sec);
+	tm->tm_min = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon = bcd2bin(tm->tm_mon);
 	tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
 }
 
@@ -282,12 +282,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		century = (tm.tm_year >= 2000) ? 0x80 : 0;
 		tm.tm_year = tm.tm_year % 100;
 
-		BIN_TO_BCD(tm.tm_year);
-		BIN_TO_BCD(tm.tm_mon);
-		BIN_TO_BCD(tm.tm_mday);
-		BIN_TO_BCD(tm.tm_hour);
-		BIN_TO_BCD(tm.tm_min);
-		BIN_TO_BCD(tm.tm_sec);
+		tm.tm_year = bin2bcd(tm.tm_year);
+		tm.tm_mon = bin2bcd(tm.tm_mon);
+		tm.tm_mday = bin2bcd(tm.tm_mday);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
+		tm.tm_min = bin2bcd(tm.tm_min);
+		tm.tm_sec = bin2bcd(tm.tm_sec);
 		tm.tm_mon |= century;
 
 		mutex_lock(&rtc_lock);
diff --git a/arch/cris/arch-v32/drivers/pcf8563.c b/arch/cris/arch-v32/drivers/pcf8563.c
index f263ab57122..f4478506e52 100644
--- a/arch/cris/arch-v32/drivers/pcf8563.c
+++ b/arch/cris/arch-v32/drivers/pcf8563.c
@@ -118,7 +118,7 @@ get_rtc_time(struct rtc_time *tm)
 		       "information is no longer guaranteed!\n", PCF8563_NAME);
 	}
 
-	tm->tm_year  = BCD_TO_BIN(tm->tm_year) +
+	tm->tm_year  = bcd2bin(tm->tm_year) +
 		       ((tm->tm_mon & 0x80) ? 100 : 0);
 	tm->tm_sec  &= 0x7F;
 	tm->tm_min  &= 0x7F;
@@ -127,11 +127,11 @@ get_rtc_time(struct rtc_time *tm)
 	tm->tm_wday &= 0x07; /* Not coded in BCD. */
 	tm->tm_mon  &= 0x1F;
 
-	BCD_TO_BIN(tm->tm_sec);
-	BCD_TO_BIN(tm->tm_min);
-	BCD_TO_BIN(tm->tm_hour);
-	BCD_TO_BIN(tm->tm_mday);
-	BCD_TO_BIN(tm->tm_mon);
+	tm->tm_sec = bcd2bin(tm->tm_sec);
+	tm->tm_min = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon = bcd2bin(tm->tm_mon);
 	tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
 }
 
@@ -279,12 +279,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		century = (tm.tm_year >= 2000) ? 0x80 : 0;
 		tm.tm_year = tm.tm_year % 100;
 
-		BIN_TO_BCD(tm.tm_year);
-		BIN_TO_BCD(tm.tm_mon);
-		BIN_TO_BCD(tm.tm_mday);
-		BIN_TO_BCD(tm.tm_hour);
-		BIN_TO_BCD(tm.tm_min);
-		BIN_TO_BCD(tm.tm_sec);
+		tm.tm_year = bin2bcd(tm.tm_year);
+		tm.tm_mon = bin2bcd(tm.tm_mon);
+		tm.tm_mday = bin2bcd(tm.tm_mday);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
+		tm.tm_min = bin2bcd(tm.tm_min);
+		tm.tm_sec = bin2bcd(tm.tm_sec);
 		tm.tm_mon |= century;
 
 		mutex_lock(&rtc_lock);
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index ff4c6aa75de..074fe7dea96 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -127,7 +127,7 @@ int set_rtc_mmss(unsigned long nowtime)
 		return 0;
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
-	BCD_TO_BIN(cmos_minutes);
+	cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -142,8 +142,8 @@ int set_rtc_mmss(unsigned long nowtime)
 	real_minutes %= 60;
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
-		BIN_TO_BCD(real_seconds);
-		BIN_TO_BCD(real_minutes);
+		real_seconds = bin2bcd(real_seconds);
+		real_minutes = bin2bcd(real_minutes);
 		CMOS_WRITE(real_seconds,RTC_SECONDS);
 		CMOS_WRITE(real_minutes,RTC_MINUTES);
 	} else {
@@ -170,12 +170,12 @@ get_cmos_time(void)
 	mon = CMOS_READ(RTC_MONTH);
 	year = CMOS_READ(RTC_YEAR);
 
-	BCD_TO_BIN(sec);
-	BCD_TO_BIN(min);
-	BCD_TO_BIN(hour);
-	BCD_TO_BIN(day);
-	BCD_TO_BIN(mon);
-	BCD_TO_BIN(year);
+	sec = bcd2bin(sec);
+	min = bcd2bin(min);
+	hour = bcd2bin(hour);
+	day = bcd2bin(day);
+	mon = bcd2bin(mon);
+	year = bcd2bin(year);
 
 	if ((year += 1900) < 1970)
 		year += 100;
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index a5aac1b0756..9d1552a9ee2 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -66,6 +66,8 @@ mainmenu "Fujitsu FR-V Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Fujitsu FR-V system setup"
 
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 7789c3d82d3..28f06fd9b7b 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -90,6 +90,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "arch/h8300/Kconfig.cpu"
 
 menu "Executable file formats"
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index aafd4d322ec..700014d2155 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
 					   TIF_NEED_RESCHED */
 #define TIF_MEMDIE		4
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
+#define TIF_FREEZE		16	/* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3b7aa38254a..27eec71429b 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -7,6 +7,8 @@ mainmenu "IA-64 Linux Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Processor type and features"
 
 config IA64
@@ -21,6 +23,7 @@ config IA64
 	select HAVE_KRETPROBES
 	select HAVE_DMA_ATTRS
 	select HAVE_KVM
+	select HAVE_ARCH_TRACEHOOK
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -108,6 +111,33 @@ config AUDIT_ARCH
 	bool
 	default y
 
+menuconfig PARAVIRT_GUEST
+	bool "Paravirtualized guest support"
+	help
+	  Say Y here to get to see options related to running Linux under
+	  various hypervisors.  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if PARAVIRT_GUEST
+
+config PARAVIRT
+	bool "Enable paravirtualization code"
+	depends on PARAVIRT_GUEST
+	default y
+	bool
+	default y
+	help
+	  This changes the kernel so it can modify itself when it is run
+	  under a hypervisor, potentially improving performance significantly
+	  over full virtualization.  However, when run without a hypervisor
+	  the kernel is theoretically slower and slightly larger.
+
+
+source "arch/ia64/xen/Kconfig"
+
+endif
+
 choice
 	prompt "System type"
 	default IA64_GENERIC
@@ -117,6 +147,7 @@ config IA64_GENERIC
 	select NUMA
 	select ACPI_NUMA
 	select SWIOTLB
+	select PCI_MSI
 	help
 	  This selects the system type of your hardware.  A "generic" kernel
 	  will run on any supported IA-64 system.  However, if you configure
@@ -124,11 +155,13 @@ config IA64_GENERIC
 
 	  generic		For any supported IA-64 system
 	  DIG-compliant		For DIG ("Developer's Interface Guide") compliant systems
+	  DIG+Intel+IOMMU	For DIG systems with Intel IOMMU
 	  HP-zx1/sx1000		For HP systems
 	  HP-zx1/sx1000+swiotlb	For HP systems with (broken) DMA-constrained devices.
 	  SGI-SN2		For SGI Altix systems
 	  SGI-UV		For SGI UV systems
 	  Ski-simulator		For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
+	  Xen-domU		For xen domU system
 
 	  If you don't know what to do, choose "generic".
 
@@ -136,6 +169,11 @@ config IA64_DIG
 	bool "DIG-compliant"
 	select SWIOTLB
 
+config IA64_DIG_VTD
+	bool "DIG+Intel+IOMMU"
+	select DMAR
+	select PCI_MSI
+
 config IA64_HP_ZX1
 	bool "HP-zx1/sx1000"
 	help
@@ -179,6 +217,10 @@ config IA64_HP_SIM
 	bool "Ski-simulator"
 	select SWIOTLB
 
+config IA64_XEN_GUEST
+	bool "Xen guest"
+	depends on XEN
+
 endchoice
 
 choice
@@ -581,6 +623,16 @@ source "drivers/pci/hotplug/Kconfig"
 
 source "drivers/pcmcia/Kconfig"
 
+config DMAR
+        bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
+        depends on IA64_GENERIC && ACPI && EXPERIMENTAL
+	help
+	  DMA remapping (DMAR) devices support enables independent address
+	  translations for Direct Memory Access (DMA) from devices.
+	  These DMA remapping devices are reported via ACPI tables
+	  and include PCI device scope covered by these DMA
+	  remapping devices.
+
 endmenu
 
 endif
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 905d25b13d5..58a7e46affd 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -53,12 +53,15 @@ libs-y				+= arch/ia64/lib/
 core-y				+= arch/ia64/kernel/ arch/ia64/mm/
 core-$(CONFIG_IA32_SUPPORT)	+= arch/ia64/ia32/
 core-$(CONFIG_IA64_DIG) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_DIG_VTD) 	+= arch/ia64/dig/
 core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
+core-$(CONFIG_IA64_XEN_GUEST)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
 core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
 core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
+core-$(CONFIG_XEN)		+= arch/ia64/xen/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
index 9f483976228..e05f9e1d3fa 100644
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -233,6 +233,8 @@ CONFIG_DMIID=y
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
 
+# CONFIG_DMAR is not set
+
 #
 # Power management and ACPI
 #
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 797acf9066c..c522edf23c6 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -172,6 +172,8 @@ CONFIG_DMIID=y
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
 
+# CONFIG_DMAR is not set
+
 #
 # Power management and ACPI
 #
diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile
index 971cd7870dd..5c0283830bd 100644
--- a/arch/ia64/dig/Makefile
+++ b/arch/ia64/dig/Makefile
@@ -6,4 +6,9 @@
 #
 
 obj-y := setup.o
+ifeq ($(CONFIG_DMAR), y)
+obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o dig_vtd_iommu.o
+else
 obj-$(CONFIG_IA64_GENERIC) += machvec.o
+endif
+obj-$(CONFIG_IA64_DIG_VTD) += dig_vtd_iommu.o
diff --git a/arch/ia64/dig/dig_vtd_iommu.c b/arch/ia64/dig/dig_vtd_iommu.c
new file mode 100644
index 00000000000..1c8a079017a
--- /dev/null
+++ b/arch/ia64/dig/dig_vtd_iommu.c
@@ -0,0 +1,59 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/intel-iommu.h>
+
+void *
+vtd_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		 gfp_t flags)
+{
+	return intel_alloc_coherent(dev, size, dma_handle, flags);
+}
+EXPORT_SYMBOL_GPL(vtd_alloc_coherent);
+
+void
+vtd_free_coherent(struct device *dev, size_t size, void *vaddr,
+		 dma_addr_t dma_handle)
+{
+	intel_free_coherent(dev, size, vaddr, dma_handle);
+}
+EXPORT_SYMBOL_GPL(vtd_free_coherent);
+
+dma_addr_t
+vtd_map_single_attrs(struct device *dev, void *addr, size_t size,
+		     int dir, struct dma_attrs *attrs)
+{
+	return intel_map_single(dev, (phys_addr_t)addr, size, dir);
+}
+EXPORT_SYMBOL_GPL(vtd_map_single_attrs);
+
+void
+vtd_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+		       int dir, struct dma_attrs *attrs)
+{
+	intel_unmap_single(dev, iova, size, dir);
+}
+EXPORT_SYMBOL_GPL(vtd_unmap_single_attrs);
+
+int
+vtd_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+		 int dir, struct dma_attrs *attrs)
+{
+	return intel_map_sg(dev, sglist, nents, dir);
+}
+EXPORT_SYMBOL_GPL(vtd_map_sg_attrs);
+
+void
+vtd_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+		   int nents, int dir, struct dma_attrs *attrs)
+{
+	intel_unmap_sg(dev, sglist, nents, dir);
+}
+EXPORT_SYMBOL_GPL(vtd_unmap_sg_attrs);
+
+int
+vtd_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vtd_dma_mapping_error);
diff --git a/arch/ia64/dig/machvec_vtd.c b/arch/ia64/dig/machvec_vtd.c
new file mode 100644
index 00000000000..7cd3eb471ca
--- /dev/null
+++ b/arch/ia64/dig/machvec_vtd.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		dig_vtd
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_dig_vtd.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4956be40d7b..d98f0f4ff83 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2070,14 +2070,13 @@ sba_init(void)
 	if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
 		return 0;
 
-#if defined(CONFIG_IA64_GENERIC) && defined(CONFIG_CRASH_DUMP) && \
-        defined(CONFIG_PROC_FS)
+#if defined(CONFIG_IA64_GENERIC)
 	/* If we are booting a kdump kernel, the sba_iommu will
 	 * cause devices that were not shutdown properly to MCA
 	 * as soon as they are turned back on.  Our only option for
 	 * a successful kdump kernel boot is to use the swiotlb.
 	 */
-	if (elfcorehdr_addr < ELFCORE_ADDR_MAX) {
+	if (is_kdump_kernel()) {
 		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
 			panic("Unable to initialize software I/O TLB:"
 				  " Try machvec=dig boot option");
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index 53505bb0477..a8cf1995885 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -108,6 +108,11 @@ GLOBAL_ENTRY(ia32_trace_syscall)
 	;;
 	st8 [r2]=r3				// initialize return code to -ENOSYS
 	br.call.sptk.few rp=syscall_trace_enter	// give parent a chance to catch syscall args
+	cmp.lt p6,p0=r8,r0			// check tracehook
+	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp	// r2 = &pt_regs.r8
+	;;
+(p6)	st8.spill [r2]=r8			// store return value in slot for r8
+(p6)	br.spnt.few .ret4
 .ret2:	// Need to reload arguments (they may be changed by the tracing process)
 	adds r2=IA64_PT_REGS_R1_OFFSET+16,sp	// r2 = &pt_regs.r1
 	adds r3=IA64_PT_REGS_R13_OFFSET+16,sp	// r3 = &pt_regs.r13
@@ -199,10 +204,10 @@ ia32_syscall_table:
 	data8 sys_setuid	/* 16-bit version */
 	data8 sys_getuid	/* 16-bit version */
 	data8 compat_sys_stime    /* 25 */
-	data8 sys32_ptrace
+	data8 compat_sys_ptrace
 	data8 sys32_alarm
 	data8 sys_ni_syscall
-	data8 sys32_pause
+	data8 sys_pause
 	data8 compat_sys_utime	  /* 30 */
 	data8 sys_ni_syscall	  /* old stty syscall holder */
 	data8 sys_ni_syscall	  /* old gtty syscall holder */
@@ -215,7 +220,7 @@ ia32_syscall_table:
 	data8 sys_mkdir
 	data8 sys_rmdir		  /* 40 */
 	data8 sys_dup
-	data8 sys32_pipe
+	data8 sys_pipe
 	data8 compat_sys_times
 	data8 sys_ni_syscall	  /* old prof syscall holder */
 	data8 sys32_brk		  /* 45 */
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index f4430bb4bbd..5e92ae00bdb 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1098,21 +1098,6 @@ sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len,
 	return ret;
 }
 
-asmlinkage long
-sys32_pipe (int __user *fd)
-{
-	int retval;
-	int fds[2];
-
-	retval = do_pipe_flags(fds, 0);
-	if (retval)
-		goto out;
-	if (copy_to_user(fd, fds, sizeof(fds)))
-		retval = -EFAULT;
-  out:
-	return retval;
-}
-
 asmlinkage unsigned long
 sys32_alarm (unsigned int seconds)
 {
@@ -1209,25 +1194,6 @@ sys32_waitpid (int pid, unsigned int *stat_addr, int options)
 	return compat_sys_wait4(pid, stat_addr, options, NULL);
 }
 
-static unsigned int
-ia32_peek (struct task_struct *child, unsigned long addr, unsigned int *val)
-{
-	size_t copied;
-	unsigned int ret;
-
-	copied = access_process_vm(child, addr, val, sizeof(*val), 0);
-	return (copied != sizeof(ret)) ? -EIO : 0;
-}
-
-static unsigned int
-ia32_poke (struct task_struct *child, unsigned long addr, unsigned int val)
-{
-
-	if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val))
-		return -EIO;
-	return 0;
-}
-
 /*
  *  The order in which registers are stored in the ptrace regs structure
  */
@@ -1525,49 +1491,15 @@ restore_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __u
 	return 0;
 }
 
-asmlinkage long
-sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+	compat_ulong_t caddr, compat_ulong_t cdata)
 {
-	struct task_struct *child;
-	unsigned int value, tmp;
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
+	unsigned int tmp;
 	long i, ret;
 
-	lock_kernel();
-	if (request == PTRACE_TRACEME) {
-		ret = ptrace_traceme();
-		goto out;
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child)) {
-		ret = PTR_ERR(child);
-		goto out;
-	}
-
-	if (request == PTRACE_ATTACH) {
-		ret = sys_ptrace(request, pid, addr, data);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	switch (request) {
-	      case PTRACE_PEEKTEXT:
-	      case PTRACE_PEEKDATA:	/* read word at location addr */
-		ret = ia32_peek(child, addr, &value);
-		if (ret == 0)
-			ret = put_user(value, (unsigned int __user *) compat_ptr(data));
-		else
-			ret = -EIO;
-		goto out_tsk;
-
-	      case PTRACE_POKETEXT:
-	      case PTRACE_POKEDATA:	/* write the word at location addr */
-		ret = ia32_poke(child, addr, data);
-		goto out_tsk;
-
 	      case PTRACE_PEEKUSR:	/* read word at addr in USER area */
 		ret = -EIO;
 		if ((addr & 3) || addr > 17*sizeof(int))
@@ -1632,27 +1564,9 @@ sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data)
 					    compat_ptr(data));
 		break;
 
-	      case PTRACE_GETEVENTMSG:   
-		ret = put_user(child->ptrace_message, (unsigned int __user *) compat_ptr(data));
-		break;
-
-	      case PTRACE_SYSCALL:	/* continue, stop after next syscall */
-	      case PTRACE_CONT:		/* restart after signal. */
-	      case PTRACE_KILL:
-	      case PTRACE_SINGLESTEP:	/* execute chile for one instruction */
-	      case PTRACE_DETACH:	/* detach a process */
-		ret = sys_ptrace(request, pid, addr, data);
-		break;
-
 	      default:
-		ret = ptrace_request(child, request, addr, data);
-		break;
-
+		return compat_ptrace_request(child, request, caddr, cdata);
 	}
-  out_tsk:
-	put_task_struct(child);
-  out:
-	unlock_kernel();
 	return ret;
 }
 
@@ -1704,14 +1618,6 @@ out:
 }
 
 asmlinkage int
-sys32_pause (void)
-{
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	return -ERESTARTNOHAND;
-}
-
-asmlinkage int
 sys32_msync (unsigned int start, unsigned int len, int flags)
 {
 	unsigned int addr;
diff --git a/arch/ia64/include/asm/break.h b/arch/ia64/include/asm/break.h
index f0340203989..e90c40ec9ed 100644
--- a/arch/ia64/include/asm/break.h
+++ b/arch/ia64/include/asm/break.h
@@ -20,4 +20,13 @@
  */
 #define __IA64_BREAK_SYSCALL		0x100000
 
+/*
+ * Xen specific break numbers:
+ */
+#define __IA64_XEN_HYPERCALL		0x1000
+/* [__IA64_XEN_HYPERPRIVOP_START, __IA64_XEN_HYPERPRIVOP_MAX] is used
+   for xen hyperprivops */
+#define __IA64_XEN_HYPERPRIVOP_START	0x1
+#define __IA64_XEN_HYPERPRIVOP_MAX	0x1a
+
 #endif /* _ASM_IA64_BREAK_H */
diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h
index afcfbda76e2..c8ce2719fee 100644
--- a/arch/ia64/include/asm/cacheflush.h
+++ b/arch/ia64/include/asm/cacheflush.h
@@ -34,6 +34,8 @@ do {						\
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
 extern void flush_icache_range (unsigned long start, unsigned long end);
+extern void clflush_cache_range(void *addr, int size);
+
 
 #define flush_icache_user_range(vma, page, user_addr, len)					\
 do {												\
diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h
index 3db6daf7f25..41ab85d66f3 100644
--- a/arch/ia64/include/asm/device.h
+++ b/arch/ia64/include/asm/device.h
@@ -10,6 +10,9 @@ struct dev_archdata {
 #ifdef CONFIG_ACPI
 	void	*acpi_handle;
 #endif
+#ifdef CONFIG_DMAR
+	void *iommu; /* hook for IOMMU specific extension */
+#endif
 };
 
 #endif /* _ASM_IA64_DEVICE_H */
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index 06ff1ba2146..bbab7e2b0fc 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -7,6 +7,49 @@
  */
 #include <asm/machvec.h>
 #include <linux/scatterlist.h>
+#include <asm/swiotlb.h>
+
+struct dma_mapping_ops {
+	int             (*mapping_error)(struct device *dev,
+					 dma_addr_t dma_addr);
+	void*           (*alloc_coherent)(struct device *dev, size_t size,
+				dma_addr_t *dma_handle, gfp_t gfp);
+	void            (*free_coherent)(struct device *dev, size_t size,
+				void *vaddr, dma_addr_t dma_handle);
+	dma_addr_t      (*map_single)(struct device *hwdev, unsigned long ptr,
+				size_t size, int direction);
+	void            (*unmap_single)(struct device *dev, dma_addr_t addr,
+				size_t size, int direction);
+	void            (*sync_single_for_cpu)(struct device *hwdev,
+				dma_addr_t dma_handle, size_t size,
+				int direction);
+	void            (*sync_single_for_device)(struct device *hwdev,
+				dma_addr_t dma_handle, size_t size,
+				int direction);
+	void            (*sync_single_range_for_cpu)(struct device *hwdev,
+				dma_addr_t dma_handle, unsigned long offset,
+				size_t size, int direction);
+	void            (*sync_single_range_for_device)(struct device *hwdev,
+				dma_addr_t dma_handle, unsigned long offset,
+				size_t size, int direction);
+	void            (*sync_sg_for_cpu)(struct device *hwdev,
+				struct scatterlist *sg, int nelems,
+				int direction);
+	void            (*sync_sg_for_device)(struct device *hwdev,
+				struct scatterlist *sg, int nelems,
+				int direction);
+	int             (*map_sg)(struct device *hwdev, struct scatterlist *sg,
+				int nents, int direction);
+	void            (*unmap_sg)(struct device *hwdev,
+				struct scatterlist *sg, int nents,
+				int direction);
+	int             (*dma_supported_op)(struct device *hwdev, u64 mask);
+	int		is_phys;
+};
+
+extern struct dma_mapping_ops *dma_ops;
+extern struct ia64_machine_vector ia64_mv;
+extern void set_iommu_machvec(void);
 
 #define dma_alloc_coherent(dev, size, handle, gfp)	\
 	platform_dma_alloc_coherent(dev, size, handle, (gfp) | GFP_DMA)
@@ -96,4 +139,11 @@ dma_cache_sync (struct device *dev, void *vaddr, size_t size,
 
 #define dma_is_consistent(d, h)	(1)	/* all we do is coherent memory... */
 
+static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
+{
+	return dma_ops;
+}
+
+
+
 #endif /* _ASM_IA64_DMA_MAPPING_H */
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
new file mode 100644
index 00000000000..5fb2bb93de3
--- /dev/null
+++ b/arch/ia64/include/asm/iommu.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_IA64_IOMMU_H
+#define _ASM_IA64_IOMMU_H 1
+
+#define cpu_has_x2apic 0
+/* 10 seconds */
+#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
+
+extern void pci_iommu_shutdown(void);
+extern void no_iommu_init(void);
+extern int force_iommu, no_iommu;
+extern int iommu_detected;
+extern void iommu_dma_init(void);
+extern void machvec_init(const char *name);
+extern int forbid_dac;
+
+#endif
diff --git a/arch/ia64/include/asm/kregs.h b/arch/ia64/include/asm/kregs.h
index aefcdfee7f2..39e65f6639f 100644
--- a/arch/ia64/include/asm/kregs.h
+++ b/arch/ia64/include/asm/kregs.h
@@ -32,7 +32,7 @@
 #define IA64_TR_CURRENT_STACK	1	/* dtr1: maps kernel's memory- & register-stacks */
 
 #define IA64_TR_ALLOC_BASE	2 	/* itr&dtr: Base of dynamic TR resource*/
-#define IA64_TR_ALLOC_MAX	32 	/* Max number for dynamic use*/
+#define IA64_TR_ALLOC_MAX	64 	/* Max number for dynamic use*/
 
 /* Processor status register bits: */
 #define IA64_PSR_BE_BIT		1
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 2b850ccafef..1ea28bcee33 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -120,6 +120,8 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
 #  include <asm/machvec_hpsim.h>
 # elif defined (CONFIG_IA64_DIG)
 #  include <asm/machvec_dig.h>
+# elif defined(CONFIG_IA64_DIG_VTD)
+#  include <asm/machvec_dig_vtd.h>
 # elif defined (CONFIG_IA64_HP_ZX1)
 #  include <asm/machvec_hpzx1.h>
 # elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
@@ -128,6 +130,8 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
 #  include <asm/machvec_sn2.h>
 # elif defined (CONFIG_IA64_SGI_UV)
 #  include <asm/machvec_uv.h>
+# elif defined (CONFIG_IA64_XEN_GUEST)
+#  include <asm/machvec_xen.h>
 # elif defined (CONFIG_IA64_GENERIC)
 
 # ifdef MACHVEC_PLATFORM_HEADER
diff --git a/arch/ia64/include/asm/machvec_dig_vtd.h b/arch/ia64/include/asm/machvec_dig_vtd.h
new file mode 100644
index 00000000000..3400b561e71
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_dig_vtd.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_IA64_MACHVEC_DIG_VTD_h
+#define _ASM_IA64_MACHVEC_DIG_VTD_h
+
+extern ia64_mv_setup_t			dig_setup;
+extern ia64_mv_dma_alloc_coherent	vtd_alloc_coherent;
+extern ia64_mv_dma_free_coherent	vtd_free_coherent;
+extern ia64_mv_dma_map_single_attrs	vtd_map_single_attrs;
+extern ia64_mv_dma_unmap_single_attrs	vtd_unmap_single_attrs;
+extern ia64_mv_dma_map_sg_attrs		vtd_map_sg_attrs;
+extern ia64_mv_dma_unmap_sg_attrs	vtd_unmap_sg_attrs;
+extern ia64_mv_dma_supported		iommu_dma_supported;
+extern ia64_mv_dma_mapping_error	vtd_dma_mapping_error;
+extern ia64_mv_dma_init			pci_iommu_alloc;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name				"dig_vtd"
+#define platform_setup				dig_setup
+#define platform_dma_init			pci_iommu_alloc
+#define platform_dma_alloc_coherent		vtd_alloc_coherent
+#define platform_dma_free_coherent		vtd_free_coherent
+#define platform_dma_map_single_attrs		vtd_map_single_attrs
+#define platform_dma_unmap_single_attrs		vtd_unmap_single_attrs
+#define platform_dma_map_sg_attrs		vtd_map_sg_attrs
+#define platform_dma_unmap_sg_attrs		vtd_unmap_sg_attrs
+#define platform_dma_sync_single_for_cpu	machvec_dma_sync_single
+#define platform_dma_sync_sg_for_cpu		machvec_dma_sync_sg
+#define platform_dma_sync_single_for_device	machvec_dma_sync_single
+#define platform_dma_sync_sg_for_device		machvec_dma_sync_sg
+#define platform_dma_supported			iommu_dma_supported
+#define platform_dma_mapping_error		vtd_dma_mapping_error
+
+#endif /* _ASM_IA64_MACHVEC_DIG_VTD_h */
diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h
index 7f21249fba3..ef964b28684 100644
--- a/arch/ia64/include/asm/machvec_init.h
+++ b/arch/ia64/include/asm/machvec_init.h
@@ -1,3 +1,4 @@
+#include <asm/iommu.h>
 #include <asm/machvec.h>
 
 extern ia64_mv_send_ipi_t ia64_send_ipi;
diff --git a/arch/ia64/include/asm/machvec_xen.h b/arch/ia64/include/asm/machvec_xen.h
new file mode 100644
index 00000000000..55f9228056c
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_xen.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_IA64_MACHVEC_XEN_h
+#define _ASM_IA64_MACHVEC_XEN_h
+
+extern ia64_mv_setup_t			dig_setup;
+extern ia64_mv_cpu_init_t		xen_cpu_init;
+extern ia64_mv_irq_init_t		xen_irq_init;
+extern ia64_mv_send_ipi_t		xen_platform_send_ipi;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name				"xen"
+#define platform_setup				dig_setup
+#define platform_cpu_init			xen_cpu_init
+#define platform_irq_init			xen_irq_init
+#define platform_send_ipi			xen_platform_send_ipi
+
+#endif /* _ASM_IA64_MACHVEC_XEN_h */
diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
index 7245a578159..6bc96ee5432 100644
--- a/arch/ia64/include/asm/meminit.h
+++ b/arch/ia64/include/asm/meminit.h
@@ -18,10 +18,11 @@
  * 	- crash dumping code reserved region
  * 	- Kernel memory map built from EFI memory map
  * 	- ELF core header
+ *	- xen start info if CONFIG_XEN
  *
  * More could be added if necessary
  */
-#define IA64_MAX_RSVD_REGIONS 8
+#define IA64_MAX_RSVD_REGIONS 9
 
 struct rsvd_region {
 	unsigned long start;	/* virtual address of beginning of element */
diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h
index c8efbf7b849..0a1026cca4f 100644
--- a/arch/ia64/include/asm/native/inst.h
+++ b/arch/ia64/include/asm/native/inst.h
@@ -36,8 +36,13 @@
 	;;					\
 	movl clob = PARAVIRT_POISON;		\
 	;;
+# define CLOBBER_PRED(pred_clob)		\
+	;;					\
+	cmp.eq pred_clob, p0 = r0, r0		\
+	;;
 #else
-# define CLOBBER(clob)		/* nothing */
+# define CLOBBER(clob)			/* nothing */
+# define CLOBBER_PRED(pred_clob)	/* nothing */
 #endif
 
 #define MOV_FROM_IFA(reg)	\
@@ -136,7 +141,8 @@
 
 #define SSM_PSR_I(pred, pred_clob, clob)	\
 (pred)	ssm psr.i				\
-	CLOBBER(clob)
+	CLOBBER(clob)				\
+	CLOBBER_PRED(pred_clob)
 
 #define RSM_PSR_I(pred, clob0, clob1)	\
 (pred)	rsm psr.i			\
diff --git a/arch/ia64/include/asm/native/pvchk_inst.h b/arch/ia64/include/asm/native/pvchk_inst.h
new file mode 100644
index 00000000000..b8e6eb1090d
--- /dev/null
+++ b/arch/ia64/include/asm/native/pvchk_inst.h
@@ -0,0 +1,263 @@
+#ifndef _ASM_NATIVE_PVCHK_INST_H
+#define _ASM_NATIVE_PVCHK_INST_H
+
+/******************************************************************************
+ * arch/ia64/include/asm/native/pvchk_inst.h
+ * Checker for paravirtualizations of privileged operations.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *      Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/**********************************************
+ * Instructions paravirtualized for correctness
+ **********************************************/
+
+/* "fc" and "thash" are privilege-sensitive instructions, meaning they
+ *  may have different semantics depending on whether they are executed
+ *  at PL0 vs PL!=0.  When paravirtualized, these instructions mustn't
+ *  be allowed to execute directly, lest incorrect semantics result.
+ */
+
+#define fc	.error "fc should not be used directly."
+#define thash	.error "thash should not be used directly."
+
+/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
+ * is not currently used (though it may be in a long-format VHPT system!)
+ * and the semantics of cover only change if psr.ic is off which is very
+ * rare (and currently non-existent outside of assembly code
+ */
+#define ttag	.error "ttag should not be used directly."
+#define cover	.error "cover should not be used directly."
+
+/* There are also privilege-sensitive registers.  These registers are
+ * readable at any privilege level but only writable at PL0.
+ */
+#define cpuid	.error "cpuid should not be used directly."
+#define pmd	.error "pmd should not be used directly."
+
+/*
+ * mov ar.eflag =
+ * mov = ar.eflag
+ */
+
+/**********************************************
+ * Instructions paravirtualized for performance
+ **********************************************/
+/*
+ * Those instructions include '.' which can't be handled by cpp.
+ * or can't be handled by cpp easily.
+ * They are handled by sed instead of cpp.
+ */
+
+/* for .S
+ * itc.i
+ * itc.d
+ *
+ * bsw.0
+ * bsw.1
+ *
+ * ssm psr.ic | PSR_DEFAULT_BITS
+ * ssm psr.ic
+ * rsm psr.ic
+ * ssm psr.i
+ * rsm psr.i
+ * rsm psr.i | psr.ic
+ * rsm psr.dt
+ * ssm psr.dt
+ *
+ * mov = cr.ifa
+ * mov = cr.itir
+ * mov = cr.isr
+ * mov = cr.iha
+ * mov = cr.ipsr
+ * mov = cr.iim
+ * mov = cr.iip
+ * mov = cr.ivr
+ * mov = psr
+ *
+ * mov cr.ifa =
+ * mov cr.itir =
+ * mov cr.iha =
+ * mov cr.ipsr =
+ * mov cr.ifs =
+ * mov cr.iip =
+ * mov cr.kr =
+ */
+
+/* for intrinsics
+ * ssm psr.i
+ * rsm psr.i
+ * mov = psr
+ * mov = ivr
+ * mov = tpr
+ * mov cr.itm =
+ * mov eoi =
+ * mov rr[] =
+ * mov = rr[]
+ * mov = kr
+ * mov kr =
+ * ptc.ga
+ */
+
+/*************************************************************
+ * define paravirtualized instrcution macros as nop to ingore.
+ * and check whether arguments are appropriate.
+ *************************************************************/
+
+/* check whether reg is a regular register */
+.macro is_rreg_in reg
+	.ifc "\reg", "r0"
+		nop 0
+		.exitm
+	.endif
+	;;
+	mov \reg = r0
+	;;
+.endm
+#define IS_RREG_IN(reg)	is_rreg_in reg ;
+
+#define IS_RREG_OUT(reg)			\
+	;;					\
+	mov reg = r0				\
+	;;
+
+#define IS_RREG_CLOB(reg)	IS_RREG_OUT(reg)
+
+/* check whether pred is a predicate register */
+#define IS_PRED_IN(pred)			\
+	;;					\
+	(pred)	nop 0				\
+	;;
+
+#define IS_PRED_OUT(pred)			\
+	;;					\
+	cmp.eq pred, p0 = r0, r0		\
+	;;
+
+#define IS_PRED_CLOB(pred)	IS_PRED_OUT(pred)
+
+
+#define DO_SAVE_MIN(__COVER, SAVE_IFS, EXTRA, WORKAROUND)	\
+	nop 0
+#define MOV_FROM_IFA(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_ITIR(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_ISR(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IHA(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IPSR(pred, reg)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IIM(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IIP(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IVR(reg, clob)			\
+	IS_RREG_OUT(reg)			\
+	IS_RREG_CLOB(clob)
+#define MOV_FROM_PSR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg)			\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IFA(reg, clob)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_ITIR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IHA(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IPSR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IFS(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IIP(reg, clob)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_KR(kr, reg, clob0, clob1)	\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define ITC_I(pred, reg, clob)			\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define ITC_D(pred, reg, clob)			\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define ITC_I_AND_D(pred_i, pred_d, reg, clob)	\
+	IS_PRED_IN(pred_i)			\
+	IS_PRED_IN(pred_d)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define THASH(pred, reg0, reg1, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg0)			\
+	IS_RREG_IN(reg1)			\
+	IS_RREG_CLOB(clob)
+#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1)	\
+	IS_RREG_CLOB(clob0)					\
+	IS_RREG_CLOB(clob1)
+#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1)	\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define RSM_PSR_IC(clob)			\
+	IS_RREG_CLOB(clob)
+#define SSM_PSR_I(pred, pred_clob, clob)	\
+	IS_PRED_IN(pred)			\
+	IS_PRED_CLOB(pred_clob)			\
+	IS_RREG_CLOB(clob)
+#define RSM_PSR_I(pred, clob0, clob1)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define RSM_PSR_I_IC(clob0, clob1, clob2)	\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)			\
+	IS_RREG_CLOB(clob2)
+#define RSM_PSR_DT				\
+	nop 0
+#define SSM_PSR_DT_AND_SRLZ_I			\
+	nop 0
+#define BSW_0(clob0, clob1, clob2)		\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)			\
+	IS_RREG_CLOB(clob2)
+#define BSW_1(clob0, clob1)			\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define COVER					\
+	nop 0
+#define RFI					\
+	br.ret.sptk.many rp /* defining nop causes dependency error */
+
+#endif /* _ASM_NATIVE_PVCHK_INST_H */
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
index 660cab04483..2bf3636473f 100644
--- a/arch/ia64/include/asm/paravirt.h
+++ b/arch/ia64/include/asm/paravirt.h
@@ -117,7 +117,7 @@ static inline void paravirt_post_smp_prepare_boot_cpu(void)
 struct pv_iosapic_ops {
 	void (*pcat_compat_init)(void);
 
-	struct irq_chip *(*get_irq_chip)(unsigned long trigger);
+	struct irq_chip *(*__get_irq_chip)(unsigned long trigger);
 
 	unsigned int (*__read)(char __iomem *iosapic, unsigned int reg);
 	void (*__write)(char __iomem *iosapic, unsigned int reg, u32 val);
@@ -135,7 +135,7 @@ iosapic_pcat_compat_init(void)
 static inline struct irq_chip*
 iosapic_get_irq_chip(unsigned long trigger)
 {
-	return pv_iosapic_ops.get_irq_chip(trigger);
+	return pv_iosapic_ops.__get_irq_chip(trigger);
 }
 
 static inline unsigned int
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 0149097b736..1d660d89db0 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,16 +95,8 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
 				enum pci_mmap_state mmap_state, int write_combine);
 #define HAVE_PCI_LEGACY
 extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
-				      struct vm_area_struct *vma);
-extern ssize_t pci_read_legacy_io(struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t off, size_t count);
-extern ssize_t pci_write_legacy_io(struct kobject *kobj,
-				   struct bin_attribute *bin_attr,
-				   char *buf, loff_t off, size_t count);
-extern int pci_mmap_legacy_mem(struct kobject *kobj,
-			       struct bin_attribute *attr,
-			       struct vm_area_struct *vma);
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
 
 #define pci_get_legacy_mem platform_pci_get_legacy_mem
 #define pci_legacy_read platform_pci_legacy_read
@@ -164,4 +156,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 	return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14);
 }
 
+#ifdef CONFIG_DMAR
+extern void pci_iommu_alloc(void);
+#endif
 #endif /* _ASM_IA64_PCI_H */
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index 15f8dcfe6ee..6417c1ecb44 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -240,6 +240,12 @@ struct switch_stack {
  */
 # define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
 
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+	/* FIXME: should this be bspstore + nr_dirty regs? */
+	return regs->ar_bspstore;
+}
+
 #define regs_return_value(regs) ((regs)->r8)
 
 /* Conserve space in histogram by encoding slot bits in address
@@ -319,6 +325,8 @@ struct switch_stack {
   #define arch_has_block_step()   (1)
   extern void user_enable_block_step(struct task_struct *);
 
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+
 #endif /* !__KERNEL__ */
 
 /* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
new file mode 100644
index 00000000000..44ef9ef8f5b
--- /dev/null
+++ b/arch/ia64/include/asm/pvclock-abi.h
@@ -0,0 +1,48 @@
+/*
+ * same structure to x86's
+ * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
+ * For now, define same duplicated definitions.
+ */
+
+#ifndef _ASM_IA64__PVCLOCK_ABI_H
+#define _ASM_IA64__PVCLOCK_ABI_H
+#ifndef __ASSEMBLY__
+
+/*
+ * These structs MUST NOT be changed.
+ * They are the ABI between hypervisor and guest OS.
+ * Both Xen and KVM are using this.
+ *
+ * pvclock_vcpu_time_info holds the system time and the tsc timestamp
+ * of the last update. So the guest can use the tsc delta to get a
+ * more precise system time.  There is one per virtual cpu.
+ *
+ * pvclock_wall_clock references the point in time when the system
+ * time was zero (usually boot time), thus the guest calculates the
+ * current wall clock by adding the system time.
+ *
+ * Protocol for the "version" fields is: hypervisor raises it (making
+ * it uneven) before it starts updating the fields and raises it again
+ * (making it even) when it is done.  Thus the guest can make sure the
+ * time values it got are consistent by checking the version before
+ * and after reading them.
+ */
+
+struct pvclock_vcpu_time_info {
+	u32   version;
+	u32   pad0;
+	u64   tsc_timestamp;
+	u64   system_time;
+	u32   tsc_to_system_mul;
+	s8    tsc_shift;
+	u8    pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+	u32   version;
+	u32   sec;
+	u32   nsec;
+} __attribute__((__packed__));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
new file mode 100644
index 00000000000..fb79423834d
--- /dev/null
+++ b/arch/ia64/include/asm/swiotlb.h
@@ -0,0 +1,56 @@
+#ifndef ASM_IA64__SWIOTLB_H
+#define ASM_IA64__SWIOTLB_H
+
+#include <linux/dma-mapping.h>
+
+/* SWIOTLB interface */
+
+extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr,
+				     size_t size, int dir);
+extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t flags);
+extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+				 size_t size, int dir);
+extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
+					dma_addr_t dev_addr,
+					size_t size, int dir);
+extern void swiotlb_sync_single_for_device(struct device *hwdev,
+					   dma_addr_t dev_addr,
+					   size_t size, int dir);
+extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev,
+					      dma_addr_t dev_addr,
+					      unsigned long offset,
+					      size_t size, int dir);
+extern void swiotlb_sync_single_range_for_device(struct device *hwdev,
+						 dma_addr_t dev_addr,
+						 unsigned long offset,
+						 size_t size, int dir);
+extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
+				    struct scatterlist *sg, int nelems,
+				    int dir);
+extern void swiotlb_sync_sg_for_device(struct device *hwdev,
+				       struct scatterlist *sg, int nelems,
+				       int dir);
+extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
+			  int nents, int direction);
+extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+			     int nents, int direction);
+extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
+extern void swiotlb_free_coherent(struct device *hwdev, size_t size,
+				  void *vaddr, dma_addr_t dma_handle);
+extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
+extern void swiotlb_init(void);
+
+extern int swiotlb_force;
+
+#ifdef CONFIG_SWIOTLB
+extern int swiotlb;
+extern void pci_swiotlb_init(void);
+#else
+#define swiotlb 0
+static inline void pci_swiotlb_init(void)
+{
+}
+#endif
+
+#endif /* ASM_IA64__SWIOTLB_H */
diff --git a/arch/ia64/include/asm/sync_bitops.h b/arch/ia64/include/asm/sync_bitops.h
new file mode 100644
index 00000000000..593c12eeb27
--- /dev/null
+++ b/arch/ia64/include/asm/sync_bitops.h
@@ -0,0 +1,51 @@
+#ifndef _ASM_IA64_SYNC_BITOPS_H
+#define _ASM_IA64_SYNC_BITOPS_H
+
+/*
+ * Copyright (C) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *
+ * Based on synch_bitops.h which Dan Magenhaimer wrote.
+ *
+ * bit operations which provide guaranteed strong synchronisation
+ * when communicating with Xen or other guest OSes running on other CPUs.
+ */
+
+static inline void sync_set_bit(int nr, volatile void *addr)
+{
+	set_bit(nr, addr);
+}
+
+static inline void sync_clear_bit(int nr, volatile void *addr)
+{
+	clear_bit(nr, addr);
+}
+
+static inline void sync_change_bit(int nr, volatile void *addr)
+{
+	change_bit(nr, addr);
+}
+
+static inline int sync_test_and_set_bit(int nr, volatile void *addr)
+{
+	return test_and_set_bit(nr, addr);
+}
+
+static inline int sync_test_and_clear_bit(int nr, volatile void *addr)
+{
+	return test_and_clear_bit(nr, addr);
+}
+
+static inline int sync_test_and_change_bit(int nr, volatile void *addr)
+{
+	return test_and_change_bit(nr, addr);
+}
+
+static inline int sync_test_bit(int nr, const volatile void *addr)
+{
+	return test_bit(nr, addr);
+}
+
+#define sync_cmpxchg(ptr, old, new)				\
+	((__typeof__(*(ptr)))cmpxchg_acq((ptr), (old), (new)))
+
+#endif /* _ASM_IA64_SYNC_BITOPS_H */
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
new file mode 100644
index 00000000000..2f758a42f94
--- /dev/null
+++ b/arch/ia64/include/asm/syscall.h
@@ -0,0 +1,163 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Intel Corp.  Shaohua Li <shaohua.li@intel.com>
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <linux/sched.h>
+#include <linux/err.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	if ((long)regs->cr_ifs < 0) /* Not a syscall */
+		return -1;
+
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(regs))
+		return regs->r1;
+#endif
+
+	return regs->r15;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(regs))
+		regs->r8 = regs->r1;
+#endif
+
+	/* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(regs))
+		return regs->r8;
+#endif
+
+	return regs->r10 == -1 ? regs->r8:0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->r8;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(regs)) {
+		regs->r8 = (long) error ? error : val;
+		return;
+	}
+#endif
+
+	if (error) {
+		/* error < 0, but ia64 uses > 0 return value */
+		regs->r8 = -error;
+		regs->r10 = -1;
+	} else {
+		regs->r8 = val;
+		regs->r10 = 0;
+	}
+}
+
+extern void ia64_syscall_get_set_arguments(struct task_struct *task,
+	struct pt_regs *regs, unsigned int i, unsigned int n,
+	unsigned long *args, int rw);
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(regs)) {
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			*args++ = regs->r13;
+		case 5:
+			if (!n--) break;
+			*args++ = regs->r15;
+		case 4:
+			if (!n--) break;
+			*args++ = regs->r14;
+		case 3:
+			if (!n--) break;
+			*args++ = regs->r10;
+		case 2:
+			if (!n--) break;
+			*args++ = regs->r9;
+		case 1:
+			if (!n--) break;
+			*args++ = regs->r11;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+			break;
+		}
+
+		return;
+	}
+#endif
+	ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(regs)) {
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			regs->r13 = *args++;
+		case 5:
+			if (!n--) break;
+			regs->r15 = *args++;
+		case 4:
+			if (!n--) break;
+			regs->r14 = *args++;
+		case 3:
+			if (!n--) break;
+			regs->r10 = *args++;
+		case 2:
+			if (!n--) break;
+			regs->r9 = *args++;
+		case 1:
+			if (!n--) break;
+			regs->r11 = *args++;
+		case 0:
+			if (!n--) break;
+		}
+
+		return;
+	}
+#endif
+	ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+}
+#endif	/* _ASM_SYSCALL_H */
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 7c60fcdd2ef..ae6922626bf 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -87,9 +87,6 @@ struct thread_info {
 #define alloc_task_struct()	((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
 #define free_task_struct(tsk)	free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
 
-#define tsk_set_notify_resume(tsk) \
-	set_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME)
-extern void tsk_clear_notify_resume(struct task_struct *tsk);
 #endif /* !__ASSEMBLY */
 
 /*
diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h
index 05a6baf8a47..4e03cfe74a0 100644
--- a/arch/ia64/include/asm/timex.h
+++ b/arch/ia64/include/asm/timex.h
@@ -39,4 +39,6 @@ get_cycles (void)
 	return ret;
 }
 
+extern void ia64_cpu_local_tick (void);
+
 #endif /* _ASM_IA64_TIMEX_H */
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index d535833aab5..f791576355a 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -337,6 +337,7 @@
 # define __ARCH_WANT_SYS_NICE
 # define __ARCH_WANT_SYS_OLD_GETRLIMIT
 # define __ARCH_WANT_SYS_OLDUMOUNT
+# define __ARCH_WANT_SYS_PAUSE
 # define __ARCH_WANT_SYS_SIGPENDING
 # define __ARCH_WANT_SYS_SIGPROCMASK
 # define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h
new file mode 100644
index 00000000000..73248781fba
--- /dev/null
+++ b/arch/ia64/include/asm/xen/events.h
@@ -0,0 +1,50 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/events.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#ifndef _ASM_IA64_XEN_EVENTS_H
+#define _ASM_IA64_XEN_EVENTS_H
+
+enum ipi_vector {
+	XEN_RESCHEDULE_VECTOR,
+	XEN_IPI_VECTOR,
+	XEN_CMCP_VECTOR,
+	XEN_CPEP_VECTOR,
+
+	XEN_NR_IPIS,
+};
+
+static inline int xen_irqs_disabled(struct pt_regs *regs)
+{
+	return !(ia64_psr(regs)->i);
+}
+
+static inline void xen_do_IRQ(int irq, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+	__do_IRQ(irq);
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+#define irq_ctx_init(cpu)	do { } while (0)
+
+#endif /* _ASM_IA64_XEN_EVENTS_H */
diff --git a/arch/ia64/include/asm/xen/grant_table.h b/arch/ia64/include/asm/xen/grant_table.h
new file mode 100644
index 00000000000..2b1fae0e2d1
--- /dev/null
+++ b/arch/ia64/include/asm/xen/grant_table.h
@@ -0,0 +1,29 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/grant_table.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _ASM_IA64_XEN_GRANT_TABLE_H
+#define _ASM_IA64_XEN_GRANT_TABLE_H
+
+struct vm_struct *xen_alloc_vm_area(unsigned long size);
+void xen_free_vm_area(struct vm_struct *area);
+
+#endif /* _ASM_IA64_XEN_GRANT_TABLE_H */
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
new file mode 100644
index 00000000000..96fc62366aa
--- /dev/null
+++ b/arch/ia64/include/asm/xen/hypercall.h
@@ -0,0 +1,265 @@
+/******************************************************************************
+ * hypercall.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _ASM_IA64_XEN_HYPERCALL_H
+#define _ASM_IA64_XEN_HYPERCALL_H
+
+#include <xen/interface/xen.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/sched.h>
+#include <asm/xen/xcom_hcall.h>
+struct xencomm_handle;
+extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
+				 unsigned long a3, unsigned long a4,
+				 unsigned long a5, unsigned long cmd);
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+
+#define _hypercall0(type, name)					\
+({								\
+	long __res;						\
+	__res = __hypercall(0, 0, 0, 0, 0, __HYPERVISOR_##name);\
+	(type)__res;						\
+})
+
+#define _hypercall1(type, name, a1)				\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			     0, 0, 0, 0, __HYPERVISOR_##name);	\
+	(type)__res;						\
+})
+
+#define _hypercall2(type, name, a1, a2)				\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			    (unsigned long)a2,			\
+			    0, 0, 0, __HYPERVISOR_##name);	\
+	(type)__res;						\
+})
+
+#define _hypercall3(type, name, a1, a2, a3)			\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			    (unsigned long)a2,			\
+			    (unsigned long)a3,			\
+			    0, 0, __HYPERVISOR_##name);		\
+	(type)__res;						\
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4)			\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			    (unsigned long)a2,			\
+			    (unsigned long)a3,			\
+			    (unsigned long)a4,			\
+			    0, __HYPERVISOR_##name);		\
+	(type)__res;						\
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			    (unsigned long)a2,			\
+			    (unsigned long)a3,			\
+			    (unsigned long)a4,			\
+			    (unsigned long)a5,			\
+			    __HYPERVISOR_##name);		\
+	(type)__res;						\
+})
+
+
+static inline int
+xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, sched_op_new, cmd, arg);
+}
+
+static inline long
+HYPERVISOR_set_timer_op(u64 timeout)
+{
+	unsigned long timeout_hi = (unsigned long)(timeout >> 32);
+	unsigned long timeout_lo = (unsigned long)timeout;
+	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
+}
+
+static inline int
+xencomm_arch_hypercall_multicall(struct xencomm_handle *call_list,
+				 int nr_calls)
+{
+	return _hypercall2(int, multicall, call_list, nr_calls);
+}
+
+static inline int
+xencomm_arch_hypercall_memory_op(unsigned int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, memory_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_event_channel_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, event_channel_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_xen_version(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_console_io(int cmd, int count,
+				  struct xencomm_handle *str)
+{
+	return _hypercall3(int, console_io, cmd, count, str);
+}
+
+static inline int
+xencomm_arch_hypercall_physdev_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, physdev_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_grant_table_op(unsigned int cmd,
+				      struct xencomm_handle *uop,
+				      unsigned int count)
+{
+	return _hypercall3(int, grant_table_op, cmd, uop, count);
+}
+
+int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
+
+extern int xencomm_arch_hypercall_suspend(struct xencomm_handle *arg);
+
+static inline int
+xencomm_arch_hypercall_callback_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, callback_op, cmd, arg);
+}
+
+static inline long
+xencomm_arch_hypercall_vcpu_op(int cmd, int cpu, void *arg)
+{
+	return _hypercall3(long, vcpu_op, cmd, cpu, arg);
+}
+
+static inline int
+HYPERVISOR_physdev_op(int cmd, void *arg)
+{
+	switch (cmd) {
+	case PHYSDEVOP_eoi:
+		return _hypercall1(int, ia64_fast_eoi,
+				   ((struct physdev_eoi *)arg)->irq);
+	default:
+		return xencomm_hypercall_physdev_op(cmd, arg);
+	}
+}
+
+static inline long
+xencomm_arch_hypercall_opt_feature(struct xencomm_handle *arg)
+{
+	return _hypercall1(long, opt_feature, arg);
+}
+
+/* for balloon driver */
+#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0)
+
+/* Use xencomm to do hypercalls.  */
+#define HYPERVISOR_sched_op xencomm_hypercall_sched_op
+#define HYPERVISOR_event_channel_op xencomm_hypercall_event_channel_op
+#define HYPERVISOR_callback_op xencomm_hypercall_callback_op
+#define HYPERVISOR_multicall xencomm_hypercall_multicall
+#define HYPERVISOR_xen_version xencomm_hypercall_xen_version
+#define HYPERVISOR_console_io xencomm_hypercall_console_io
+#define HYPERVISOR_memory_op xencomm_hypercall_memory_op
+#define HYPERVISOR_suspend xencomm_hypercall_suspend
+#define HYPERVISOR_vcpu_op xencomm_hypercall_vcpu_op
+#define HYPERVISOR_opt_feature xencomm_hypercall_opt_feature
+
+/* to compile gnttab_copy_grant_page() in drivers/xen/core/gnttab.c */
+#define HYPERVISOR_mmu_update(req, count, success_count, domid) ({ BUG(); 0; })
+
+static inline int
+HYPERVISOR_shutdown(
+	unsigned int reason)
+{
+	struct sched_shutdown sched_shutdown = {
+		.reason = reason
+	};
+
+	int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+
+	return rc;
+}
+
+/* for netfront.c, netback.c */
+#define MULTI_UVMFLAGS_INDEX 0 /* XXX any value */
+
+static inline void
+MULTI_update_va_mapping(
+	struct multicall_entry *mcl, unsigned long va,
+	pte_t new_val, unsigned long flags)
+{
+	mcl->op = __HYPERVISOR_update_va_mapping;
+	mcl->result = 0;
+}
+
+static inline void
+MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd,
+	void *uop, unsigned int count)
+{
+	mcl->op = __HYPERVISOR_grant_table_op;
+	mcl->args[0] = cmd;
+	mcl->args[1] = (unsigned long)uop;
+	mcl->args[2] = count;
+}
+
+static inline void
+MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
+		 int count, int *success_count, domid_t domid)
+{
+	mcl->op = __HYPERVISOR_mmu_update;
+	mcl->args[0] = (unsigned long)req;
+	mcl->args[1] = count;
+	mcl->args[2] = (unsigned long)success_count;
+	mcl->args[3] = domid;
+}
+
+#endif /* _ASM_IA64_XEN_HYPERCALL_H */
diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h
new file mode 100644
index 00000000000..7a804e80fc6
--- /dev/null
+++ b/arch/ia64/include/asm/xen/hypervisor.h
@@ -0,0 +1,89 @@
+/******************************************************************************
+ * hypervisor.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _ASM_IA64_XEN_HYPERVISOR_H
+#define _ASM_IA64_XEN_HYPERVISOR_H
+
+#ifdef CONFIG_XEN
+
+#include <linux/init.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>	/* to compile feature.c */
+#include <xen/features.h>		/* to comiple xen-netfront.c */
+#include <asm/xen/hypercall.h>
+
+/* xen_domain_type is set before executing any C code by early_xen_setup */
+enum xen_domain_type {
+	XEN_NATIVE,
+	XEN_PV_DOMAIN,
+	XEN_HVM_DOMAIN,
+};
+
+extern enum xen_domain_type xen_domain_type;
+
+#define xen_domain()		(xen_domain_type != XEN_NATIVE)
+#define xen_pv_domain()		(xen_domain_type == XEN_PV_DOMAIN)
+#define xen_initial_domain()	(xen_pv_domain() && \
+				 (xen_start_info->flags & SIF_INITDOMAIN))
+#define xen_hvm_domain()	(xen_domain_type == XEN_HVM_DOMAIN)
+
+/* deprecated. remove this */
+#define is_running_on_xen()	(xen_domain_type == XEN_PV_DOMAIN)
+
+extern struct shared_info *HYPERVISOR_shared_info;
+extern struct start_info *xen_start_info;
+
+void __init xen_setup_vcpu_info_placement(void);
+void force_evtchn_callback(void);
+
+/* for drivers/xen/balloon/balloon.c */
+#ifdef CONFIG_XEN_SCRUB_PAGES
+#define scrub_pages(_p, _n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+#else
+#define scrub_pages(_p, _n) ((void)0)
+#endif
+
+/* For setup_arch() in arch/ia64/kernel/setup.c */
+void xen_ia64_enable_opt_feature(void);
+
+#else /* CONFIG_XEN */
+
+#define xen_domain()		(0)
+#define xen_pv_domain()		(0)
+#define xen_initial_domain()	(0)
+#define xen_hvm_domain()	(0)
+#define is_running_on_xen()	(0)	/* deprecated. remove this */
+#endif
+
+#define is_initial_xendomain()	(0)	/* deprecated. remove this */
+
+#endif /* _ASM_IA64_XEN_HYPERVISOR_H */
diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h
new file mode 100644
index 00000000000..19c2ae1d878
--- /dev/null
+++ b/arch/ia64/include/asm/xen/inst.h
@@ -0,0 +1,458 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <asm/xen/privop.h>
+
+#define ia64_ivt				xen_ivt
+#define DO_SAVE_MIN				XEN_DO_SAVE_MIN
+
+#define __paravirt_switch_to			xen_switch_to
+#define __paravirt_leave_syscall		xen_leave_syscall
+#define __paravirt_work_processed_syscall	xen_work_processed_syscall
+#define __paravirt_leave_kernel			xen_leave_kernel
+#define __paravirt_pending_syscall_end		xen_work_pending_syscall_end
+#define __paravirt_work_processed_syscall_target \
+						xen_work_processed_syscall
+
+#define MOV_FROM_IFA(reg)	\
+	movl reg = XSI_IFA;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_ITIR(reg)	\
+	movl reg = XSI_ITIR;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_ISR(reg)	\
+	movl reg = XSI_ISR;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_IHA(reg)	\
+	movl reg = XSI_IHA;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_IPSR(pred, reg)	\
+(pred)	movl reg = XSI_IPSR;		\
+	;;				\
+(pred)	ld8 reg = [reg]
+
+#define MOV_FROM_IIM(reg)	\
+	movl reg = XSI_IIM;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_IIP(reg)	\
+	movl reg = XSI_IIP;	\
+	;;			\
+	ld8 reg = [reg]
+
+.macro __MOV_FROM_IVR reg, clob
+	.ifc "\reg", "r8"
+		XEN_HYPER_GET_IVR
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		XEN_HYPER_GET_IVR
+		;;
+		mov \reg = r8
+		.exitm
+	.endif
+
+	mov \clob = r8
+	;;
+	XEN_HYPER_GET_IVR
+	;;
+	mov \reg = r8
+	;;
+	mov r8 = \clob
+.endm
+#define MOV_FROM_IVR(reg, clob)	__MOV_FROM_IVR reg, clob
+
+.macro __MOV_FROM_PSR pred, reg, clob
+	.ifc "\reg", "r8"
+		(\pred)	XEN_HYPER_GET_PSR;
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		(\pred)	XEN_HYPER_GET_PSR
+		;;
+		(\pred)	mov \reg = r8
+		.exitm
+	.endif
+
+	(\pred)	mov \clob = r8
+	(\pred)	XEN_HYPER_GET_PSR
+	;;
+	(\pred)	mov \reg = r8
+	(\pred)	mov r8 = \clob
+.endm
+#define MOV_FROM_PSR(pred, reg, clob)	__MOV_FROM_PSR pred, reg, clob
+
+
+#define MOV_TO_IFA(reg, clob)	\
+	movl clob = XSI_IFA;	\
+	;;			\
+	st8 [clob] = reg	\
+
+#define MOV_TO_ITIR(pred, reg, clob)	\
+(pred)	movl clob = XSI_ITIR;		\
+	;;				\
+(pred)	st8 [clob] = reg
+
+#define MOV_TO_IHA(pred, reg, clob)	\
+(pred)	movl clob = XSI_IHA;		\
+	;;				\
+(pred)	st8 [clob] = reg
+
+#define MOV_TO_IPSR(pred, reg, clob)	\
+(pred)	movl clob = XSI_IPSR;		\
+	;;				\
+(pred)	st8 [clob] = reg;		\
+	;;
+
+#define MOV_TO_IFS(pred, reg, clob)	\
+(pred)	movl clob = XSI_IFS;		\
+	;;				\
+(pred)	st8 [clob] = reg;		\
+	;;
+
+#define MOV_TO_IIP(reg, clob)	\
+	movl clob = XSI_IIP;	\
+	;;			\
+	st8 [clob] = reg
+
+.macro ____MOV_TO_KR kr, reg, clob0, clob1
+	.ifc "\clob0", "r9"
+		.error "clob0 \clob0 must not be r9"
+	.endif
+	.ifc "\clob1", "r8"
+		.error "clob1 \clob1 must not be r8"
+	.endif
+
+	.ifnc "\reg", "r9"
+		.ifnc "\clob1", "r9"
+			mov \clob1 = r9
+		.endif
+		mov r9 = \reg
+	.endif
+	.ifnc "\clob0", "r8"
+		mov \clob0 = r8
+	.endif
+	mov r8 = \kr
+	;;
+	XEN_HYPER_SET_KR
+
+	.ifnc "\reg", "r9"
+		.ifnc "\clob1", "r9"
+			mov r9 = \clob1
+		.endif
+	.endif
+	.ifnc "\clob0", "r8"
+		mov r8 = \clob0
+	.endif
+.endm
+
+.macro __MOV_TO_KR kr, reg, clob0, clob1
+	.ifc "\clob0", "r9"
+		____MOV_TO_KR \kr, \reg, \clob1, \clob0
+		.exitm
+	.endif
+	.ifc "\clob1", "r8"
+		____MOV_TO_KR \kr, \reg, \clob1, \clob0
+		.exitm
+	.endif
+
+	____MOV_TO_KR \kr, \reg, \clob0, \clob1
+.endm
+
+#define MOV_TO_KR(kr, reg, clob0, clob1) \
+	__MOV_TO_KR IA64_KR_ ## kr, reg, clob0, clob1
+
+
+.macro __ITC_I pred, reg, clob
+	.ifc "\reg", "r8"
+		(\pred)	XEN_HYPER_ITC_I
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		(\pred)	mov r8 = \reg
+		;;
+		(\pred)	XEN_HYPER_ITC_I
+		.exitm
+	.endif
+
+	(\pred)	mov \clob = r8
+	(\pred)	mov r8 = \reg
+	;;
+	(\pred)	XEN_HYPER_ITC_I
+	;;
+	(\pred)	mov r8 = \clob
+	;;
+.endm
+#define ITC_I(pred, reg, clob)	__ITC_I pred, reg, clob
+
+.macro __ITC_D pred, reg, clob
+	.ifc "\reg", "r8"
+		(\pred)	XEN_HYPER_ITC_D
+		;;
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		(\pred)	mov r8 = \reg
+		;;
+		(\pred)	XEN_HYPER_ITC_D
+		;;
+		.exitm
+	.endif
+
+	(\pred)	mov \clob = r8
+	(\pred)	mov r8 = \reg
+	;;
+	(\pred)	XEN_HYPER_ITC_D
+	;;
+	(\pred)	mov r8 = \clob
+	;;
+.endm
+#define ITC_D(pred, reg, clob)	__ITC_D pred, reg, clob
+
+.macro __ITC_I_AND_D pred_i, pred_d, reg, clob
+	.ifc "\reg", "r8"
+		(\pred_i)XEN_HYPER_ITC_I
+		;;
+		(\pred_d)XEN_HYPER_ITC_D
+		;;
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		mov r8 = \reg
+		;;
+		(\pred_i)XEN_HYPER_ITC_I
+		;;
+		(\pred_d)XEN_HYPER_ITC_D
+		;;
+		.exitm
+	.endif
+
+	mov \clob = r8
+	mov r8 = \reg
+	;;
+	(\pred_i)XEN_HYPER_ITC_I
+	;;
+	(\pred_d)XEN_HYPER_ITC_D
+	;;
+	mov r8 = \clob
+	;;
+.endm
+#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \
+	__ITC_I_AND_D pred_i, pred_d, reg, clob
+
+.macro __THASH pred, reg0, reg1, clob
+	.ifc "\reg0", "r8"
+		(\pred)	mov r8 = \reg1
+		(\pred)	XEN_HYPER_THASH
+		.exitm
+	.endc
+	.ifc "\reg1", "r8"
+		(\pred)	XEN_HYPER_THASH
+		;;
+		(\pred)	mov \reg0 = r8
+		;;
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		(\pred)	mov r8 = \reg1
+		(\pred)	XEN_HYPER_THASH
+		;;
+		(\pred)	mov \reg0 = r8
+		;;
+		.exitm
+	.endif
+
+	(\pred)	mov \clob = r8
+	(\pred)	mov r8 = \reg1
+	(\pred)	XEN_HYPER_THASH
+	;;
+	(\pred)	mov \reg0 = r8
+	(\pred)	mov r8 = \clob
+	;;
+.endm
+#define THASH(pred, reg0, reg1, clob) __THASH pred, reg0, reg1, clob
+
+#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1)	\
+	mov clob0 = 1;						\
+	movl clob1 = XSI_PSR_IC;				\
+	;;							\
+	st4 [clob1] = clob0					\
+	;;
+
+#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1)	\
+	;;					\
+	srlz.d;					\
+	mov clob1 = 1;				\
+	movl clob0 = XSI_PSR_IC;		\
+	;;					\
+	st4 [clob0] = clob1
+
+#define RSM_PSR_IC(clob)	\
+	movl clob = XSI_PSR_IC;	\
+	;;			\
+	st4 [clob] = r0;	\
+	;;
+
+/* pred will be clobbered */
+#define MASK_TO_PEND_OFS    (-1)
+#define SSM_PSR_I(pred, pred_clob, clob)				\
+(pred)	movl clob = XSI_PSR_I_ADDR					\
+	;;								\
+(pred)	ld8 clob = [clob]						\
+	;;								\
+	/* if (pred) vpsr.i = 1 */					\
+	/* if (pred) (vcpu->vcpu_info->evtchn_upcall_mask)=0 */		\
+(pred)	st1 [clob] = r0, MASK_TO_PEND_OFS				\
+	;;								\
+	/* if (vcpu->vcpu_info->evtchn_upcall_pending) */		\
+(pred)	ld1 clob = [clob]						\
+	;;								\
+(pred)	cmp.ne.unc pred_clob, p0 = clob, r0				\
+	;;								\
+(pred_clob)XEN_HYPER_SSM_I	/* do areal ssm psr.i */
+
+#define RSM_PSR_I(pred, clob0, clob1)	\
+	movl clob0 = XSI_PSR_I_ADDR;	\
+	mov clob1 = 1;			\
+	;;				\
+	ld8 clob0 = [clob0];		\
+	;;				\
+(pred)	st1 [clob0] = clob1
+
+#define RSM_PSR_I_IC(clob0, clob1, clob2)		\
+	movl clob0 = XSI_PSR_I_ADDR;			\
+	movl clob1 = XSI_PSR_IC;			\
+	;;						\
+	ld8 clob0 = [clob0];				\
+	mov clob2 = 1;					\
+	;;						\
+	/* note: clears both vpsr.i and vpsr.ic! */	\
+	st1 [clob0] = clob2;				\
+	st4 [clob1] = r0;				\
+	;;
+
+#define RSM_PSR_DT		\
+	XEN_HYPER_RSM_PSR_DT
+
+#define SSM_PSR_DT_AND_SRLZ_I	\
+	XEN_HYPER_SSM_PSR_DT
+
+#define BSW_0(clob0, clob1, clob2)			\
+	;;						\
+	/* r16-r31 all now hold bank1 values */		\
+	mov clob2 = ar.unat;				\
+	movl clob0 = XSI_BANK1_R16;			\
+	movl clob1 = XSI_BANK1_R16 + 8;			\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r16, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r17, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r18, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r19, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r20, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r21, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r22, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r23, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r24, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r25, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r26, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r27, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r28, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r29, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r30, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r31, 16;		\
+	;;						\
+	mov clob1 = ar.unat;				\
+	movl clob0 = XSI_B1NAT;				\
+	;;						\
+	st8 [clob0] = clob1;				\
+	mov ar.unat = clob2;				\
+	movl clob0 = XSI_BANKNUM;			\
+	;;						\
+	st4 [clob0] = r0
+
+
+	/* FIXME: THIS CODE IS NOT NaT SAFE! */
+#define XEN_BSW_1(clob)			\
+	mov clob = ar.unat;		\
+	movl r30 = XSI_B1NAT;		\
+	;;				\
+	ld8 r30 = [r30];		\
+	mov r31 = 1;			\
+	;;				\
+	mov ar.unat = r30;		\
+	movl r30 = XSI_BANKNUM;		\
+	;;				\
+	st4 [r30] = r31;		\
+	movl r30 = XSI_BANK1_R16;	\
+	movl r31 = XSI_BANK1_R16+8;	\
+	;;				\
+	ld8.fill r16 = [r30], 16;	\
+	ld8.fill r17 = [r31], 16;	\
+	;;				\
+	ld8.fill r18 = [r30], 16;	\
+	ld8.fill r19 = [r31], 16;	\
+	;;				\
+	ld8.fill r20 = [r30], 16;	\
+	ld8.fill r21 = [r31], 16;	\
+	;;				\
+	ld8.fill r22 = [r30], 16;	\
+	ld8.fill r23 = [r31], 16;	\
+	;;				\
+	ld8.fill r24 = [r30], 16;	\
+	ld8.fill r25 = [r31], 16;	\
+	;;				\
+	ld8.fill r26 = [r30], 16;	\
+	ld8.fill r27 = [r31], 16;	\
+	;;				\
+	ld8.fill r28 = [r30], 16;	\
+	ld8.fill r29 = [r31], 16;	\
+	;;				\
+	ld8.fill r30 = [r30];		\
+	ld8.fill r31 = [r31];		\
+	;;				\
+	mov ar.unat = clob
+
+#define BSW_1(clob0, clob1)	XEN_BSW_1(clob1)
+
+
+#define COVER	\
+	XEN_HYPER_COVER
+
+#define RFI			\
+	XEN_HYPER_RFI;		\
+	dv_serialize_data
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
new file mode 100644
index 00000000000..f00fab40854
--- /dev/null
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -0,0 +1,346 @@
+/******************************************************************************
+ * arch-ia64/hypervisor-if.h
+ *
+ * Guest OS interface to IA64 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright by those who contributed. (in alphabetical order)
+ *
+ * Anthony Xu <anthony.xu@intel.com>
+ * Eddie Dong <eddie.dong@intel.com>
+ * Fred Yang <fred.yang@intel.com>
+ * Kevin Tian <kevin.tian@intel.com>
+ * Alex Williamson <alex.williamson@hp.com>
+ * Chris Wright <chrisw@sous-sol.org>
+ * Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
+ * Dietmar Hahn <dietmar.hahn@fujitsu-siemens.com>
+ * Hollis Blanchard <hollisb@us.ibm.com>
+ * Isaku Yamahata <yamahata@valinux.co.jp>
+ * Jan Beulich <jbeulich@novell.com>
+ * John Levon <john.levon@sun.com>
+ * Kazuhiro Suzuki <kaz@jp.fujitsu.com>
+ * Keir Fraser <keir.fraser@citrix.com>
+ * Kouya Shimura <kouya@jp.fujitsu.com>
+ * Masaki Kanno <kanno.masaki@jp.fujitsu.com>
+ * Matt Chapman <matthewc@hp.com>
+ * Matthew Chapman <matthewc@hp.com>
+ * Samuel Thibault <samuel.thibault@eu.citrix.com>
+ * Tomonari Horikoshi <t.horikoshi@jp.fujitsu.com>
+ * Tristan Gingold <tgingold@free.fr>
+ * Tsunehisa Doi <Doi.Tsunehisa@jp.fujitsu.com>
+ * Yutaka Ezaki <yutaka.ezaki@jp.fujitsu.com>
+ * Zhang Xin <xing.z.zhang@intel.com>
+ * Zhang xiantao <xiantao.zhang@intel.com>
+ * dan.magenheimer@hp.com
+ * ian.pratt@cl.cam.ac.uk
+ * michael.fetterman@cl.cam.ac.uk
+ */
+
+#ifndef _ASM_IA64_XEN_INTERFACE_H
+#define _ASM_IA64_XEN_INTERFACE_H
+
+#define __DEFINE_GUEST_HANDLE(name, type)	\
+	typedef struct { type *p; } __guest_handle_ ## name
+
+#define DEFINE_GUEST_HANDLE_STRUCT(name)	\
+	__DEFINE_GUEST_HANDLE(name, struct name)
+#define DEFINE_GUEST_HANDLE(name)	__DEFINE_GUEST_HANDLE(name, name)
+#define GUEST_HANDLE(name)		__guest_handle_ ## name
+#define GUEST_HANDLE_64(name)		GUEST_HANDLE(name)
+#define set_xen_guest_handle(hnd, val)	do { (hnd).p = val; } while (0)
+
+#ifndef __ASSEMBLY__
+/* Guest handles for primitive C types. */
+__DEFINE_GUEST_HANDLE(uchar, unsigned char);
+__DEFINE_GUEST_HANDLE(uint, unsigned int);
+__DEFINE_GUEST_HANDLE(ulong, unsigned long);
+__DEFINE_GUEST_HANDLE(u64, unsigned long);
+DEFINE_GUEST_HANDLE(char);
+DEFINE_GUEST_HANDLE(int);
+DEFINE_GUEST_HANDLE(long);
+DEFINE_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_GUEST_HANDLE(xen_pfn_t);
+#define PRI_xen_pfn	"lx"
+#endif
+
+/* Arch specific VIRQs definition */
+#define VIRQ_ITC	VIRQ_ARCH_0	/* V. Virtual itc timer */
+#define VIRQ_MCA_CMC	VIRQ_ARCH_1	/* MCA cmc interrupt */
+#define VIRQ_MCA_CPE	VIRQ_ARCH_2	/* MCA cpe interrupt */
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+/* keep sizeof(struct shared_page) <= PAGE_SIZE.
+ * this is checked in arch/ia64/xen/hypervisor.c. */
+#define MAX_VIRT_CPUS	64
+
+#ifndef __ASSEMBLY__
+
+#define INVALID_MFN	(~0UL)
+
+union vac {
+	unsigned long value;
+	struct {
+		int a_int:1;
+		int a_from_int_cr:1;
+		int a_to_int_cr:1;
+		int a_from_psr:1;
+		int a_from_cpuid:1;
+		int a_cover:1;
+		int a_bsw:1;
+		long reserved:57;
+	};
+};
+
+union vdc {
+	unsigned long value;
+	struct {
+		int d_vmsw:1;
+		int d_extint:1;
+		int d_ibr_dbr:1;
+		int d_pmc:1;
+		int d_to_pmd:1;
+		int d_itm:1;
+		long reserved:58;
+	};
+};
+
+struct mapped_regs {
+	union vac vac;
+	union vdc vdc;
+	unsigned long virt_env_vaddr;
+	unsigned long reserved1[29];
+	unsigned long vhpi;
+	unsigned long reserved2[95];
+	union {
+		unsigned long vgr[16];
+		unsigned long bank1_regs[16];	/* bank1 regs (r16-r31)
+						   when bank0 active */
+	};
+	union {
+		unsigned long vbgr[16];
+		unsigned long bank0_regs[16];	/* bank0 regs (r16-r31)
+						   when bank1 active */
+	};
+	unsigned long vnat;
+	unsigned long vbnat;
+	unsigned long vcpuid[5];
+	unsigned long reserved3[11];
+	unsigned long vpsr;
+	unsigned long vpr;
+	unsigned long reserved4[76];
+	union {
+		unsigned long vcr[128];
+		struct {
+			unsigned long dcr;	/* CR0 */
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;	/* CR8 */
+			unsigned long rsv2[7];
+			unsigned long ipsr;	/* CR16 */
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;	/* CR24 */
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;	/* CR64 */
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;	/* CR72 */
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;	/* CR80 */
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
+	union {
+		unsigned long reserved5[128];
+		struct {
+			unsigned long precover_ifs;
+			unsigned long unat;	/* not sure if this is needed
+						   until NaT arch is done */
+			int interrupt_collection_enabled; /* virtual psr.ic */
+
+			/* virtual interrupt deliverable flag is
+			 * evtchn_upcall_mask in shared info area now.
+			 * interrupt_mask_addr is the address
+			 * of evtchn_upcall_mask for current vcpu
+			 */
+			unsigned char *interrupt_mask_addr;
+			int pending_interruption;
+			unsigned char vpsr_pp;
+			unsigned char vpsr_dfh;
+			unsigned char hpsr_dfh;
+			unsigned char hpsr_mfh;
+			unsigned long reserved5_1[4];
+			int metaphysical_mode;	/* 1 = use metaphys mapping
+						   0 = use virtual */
+			int banknum;		/* 0 or 1, which virtual
+						   register bank is active */
+			unsigned long rrs[8];	/* region registers */
+			unsigned long krs[8];	/* kernel registers */
+			unsigned long tmp[16];	/* temp registers
+						   (e.g. for hyperprivops) */
+		};
+	};
+};
+
+struct arch_vcpu_info {
+	/* nothing */
+};
+
+/*
+ * This structure is used for magic page in domain pseudo physical address
+ * space and the result of XENMEM_machine_memory_map.
+ * As the XENMEM_machine_memory_map result,
+ * xen_memory_map::nr_entries indicates the size in bytes
+ * including struct xen_ia64_memmap_info. Not the number of entries.
+ */
+struct xen_ia64_memmap_info {
+	uint64_t efi_memmap_size;	/* size of EFI memory map */
+	uint64_t efi_memdesc_size;	/* size of an EFI memory map
+					 * descriptor */
+	uint32_t efi_memdesc_version;	/* memory descriptor version */
+	void *memdesc[0];		/* array of efi_memory_desc_t */
+};
+
+struct arch_shared_info {
+	/* PFN of the start_info page.	*/
+	unsigned long start_info_pfn;
+
+	/* Interrupt vector for event channel.	*/
+	int evtchn_vector;
+
+	/* PFN of memmap_info page */
+	unsigned int memmap_info_num_pages;	/* currently only = 1 case is
+						   supported. */
+	unsigned long memmap_info_pfn;
+
+	uint64_t pad[31];
+};
+
+struct xen_callback {
+	unsigned long ip;
+};
+typedef struct xen_callback xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/* Size of the shared_info area (this is not related to page size).  */
+#define XSI_SHIFT			14
+#define XSI_SIZE			(1 << XSI_SHIFT)
+/* Log size of mapped_regs area (64 KB - only 4KB is used).  */
+#define XMAPPEDREGS_SHIFT		12
+#define XMAPPEDREGS_SIZE		(1 << XMAPPEDREGS_SHIFT)
+/* Offset of XASI (Xen arch shared info) wrt XSI_BASE.	*/
+#define XMAPPEDREGS_OFS			XSI_SIZE
+
+/* Hyperprivops.  */
+#define HYPERPRIVOP_START		0x1
+#define HYPERPRIVOP_RFI			(HYPERPRIVOP_START + 0x0)
+#define HYPERPRIVOP_RSM_DT		(HYPERPRIVOP_START + 0x1)
+#define HYPERPRIVOP_SSM_DT		(HYPERPRIVOP_START + 0x2)
+#define HYPERPRIVOP_COVER		(HYPERPRIVOP_START + 0x3)
+#define HYPERPRIVOP_ITC_D		(HYPERPRIVOP_START + 0x4)
+#define HYPERPRIVOP_ITC_I		(HYPERPRIVOP_START + 0x5)
+#define HYPERPRIVOP_SSM_I		(HYPERPRIVOP_START + 0x6)
+#define HYPERPRIVOP_GET_IVR		(HYPERPRIVOP_START + 0x7)
+#define HYPERPRIVOP_GET_TPR		(HYPERPRIVOP_START + 0x8)
+#define HYPERPRIVOP_SET_TPR		(HYPERPRIVOP_START + 0x9)
+#define HYPERPRIVOP_EOI			(HYPERPRIVOP_START + 0xa)
+#define HYPERPRIVOP_SET_ITM		(HYPERPRIVOP_START + 0xb)
+#define HYPERPRIVOP_THASH		(HYPERPRIVOP_START + 0xc)
+#define HYPERPRIVOP_PTC_GA		(HYPERPRIVOP_START + 0xd)
+#define HYPERPRIVOP_ITR_D		(HYPERPRIVOP_START + 0xe)
+#define HYPERPRIVOP_GET_RR		(HYPERPRIVOP_START + 0xf)
+#define HYPERPRIVOP_SET_RR		(HYPERPRIVOP_START + 0x10)
+#define HYPERPRIVOP_SET_KR		(HYPERPRIVOP_START + 0x11)
+#define HYPERPRIVOP_FC			(HYPERPRIVOP_START + 0x12)
+#define HYPERPRIVOP_GET_CPUID		(HYPERPRIVOP_START + 0x13)
+#define HYPERPRIVOP_GET_PMD		(HYPERPRIVOP_START + 0x14)
+#define HYPERPRIVOP_GET_EFLAG		(HYPERPRIVOP_START + 0x15)
+#define HYPERPRIVOP_SET_EFLAG		(HYPERPRIVOP_START + 0x16)
+#define HYPERPRIVOP_RSM_BE		(HYPERPRIVOP_START + 0x17)
+#define HYPERPRIVOP_GET_PSR		(HYPERPRIVOP_START + 0x18)
+#define HYPERPRIVOP_SET_RR0_TO_RR4	(HYPERPRIVOP_START + 0x19)
+#define HYPERPRIVOP_MAX			(0x1a)
+
+/* Fast and light hypercalls.  */
+#define __HYPERVISOR_ia64_fast_eoi	__HYPERVISOR_arch_1
+
+/* Xencomm macros.  */
+#define XENCOMM_INLINE_MASK		0xf800000000000000UL
+#define XENCOMM_INLINE_FLAG		0x8000000000000000UL
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Optimization features.
+ * The hypervisor may do some special optimizations for guests. This hypercall
+ * can be used to switch on/of these special optimizations.
+ */
+#define __HYPERVISOR_opt_feature	0x700UL
+
+#define XEN_IA64_OPTF_OFF		0x0
+#define XEN_IA64_OPTF_ON		0x1
+
+/*
+ * If this feature is switched on, the hypervisor inserts the
+ * tlb entries without calling the guests traphandler.
+ * This is useful in guests using region 7 for identity mapping
+ * like the linux kernel does.
+ */
+#define XEN_IA64_OPTF_IDENT_MAP_REG7	1
+
+/* Identity mapping of region 4 addresses in HVM. */
+#define XEN_IA64_OPTF_IDENT_MAP_REG4	2
+
+/* Identity mapping of region 5 addresses in HVM. */
+#define XEN_IA64_OPTF_IDENT_MAP_REG5	3
+
+#define XEN_IA64_OPTF_IDENT_MAP_NOT_SET	 (0)
+
+struct xen_ia64_opt_feature {
+	unsigned long cmd;	/* Which feature */
+	unsigned char on;	/* Switch feature on/off */
+	union {
+		struct {
+			/* The page protection bit mask of the pte.
+			 * This will be or'ed with the pte. */
+			unsigned long pgprot;
+			unsigned long key;	/* A protection key for itir.*/
+		};
+	};
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_XEN_INTERFACE_H */
diff --git a/arch/ia64/include/asm/xen/irq.h b/arch/ia64/include/asm/xen/irq.h
new file mode 100644
index 00000000000..a9045098300
--- /dev/null
+++ b/arch/ia64/include/asm/xen/irq.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/irq.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _ASM_IA64_XEN_IRQ_H
+#define _ASM_IA64_XEN_IRQ_H
+
+/*
+ * The flat IRQ space is divided into two regions:
+ *  1. A one-to-one mapping of real physical IRQs. This space is only used
+ *     if we have physical device-access privilege. This region is at the
+ *     start of the IRQ space so that existing device drivers do not need
+ *     to be modified to translate physical IRQ numbers into our IRQ space.
+ *  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+ *     are bound using the provided bind/unbind functions.
+ */
+
+#define XEN_PIRQ_BASE		0
+#define XEN_NR_PIRQS		256
+
+#define XEN_DYNIRQ_BASE		(XEN_PIRQ_BASE + XEN_NR_PIRQS)
+#define XEN_NR_DYNIRQS		(NR_CPUS * 8)
+
+#define XEN_NR_IRQS		(XEN_NR_PIRQS + XEN_NR_DYNIRQS)
+
+#endif /* _ASM_IA64_XEN_IRQ_H */
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
new file mode 100644
index 00000000000..4d92d9bbda7
--- /dev/null
+++ b/arch/ia64/include/asm/xen/minstate.h
@@ -0,0 +1,134 @@
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *	psr.ic: off
+ *	r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *	psr.ic: off
+ *	 r2 = points to &pt_regs.r16
+ *	 r8 = contents of ar.ccv
+ *	 r9 = contents of ar.csd
+ *	r10 = contents of ar.ssd
+ *	r11 = FPSR_DEFAULT
+ *	r12 = kernel sp (kernel virtual address)
+ *	r13 = points to current task_struct (kernel virtual address)
+ *	p15 = TRUE if psr.i is set in cr.ipsr
+ *	predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *		preserved
+ * CONFIG_XEN note: p6/p7 are not preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define XEN_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND)					\
+	mov r16=IA64_KR(CURRENT);	/* M */							\
+	mov r27=ar.rsc;			/* M */							\
+	mov r20=r1;			/* A */							\
+	mov r25=ar.unat;		/* M */							\
+	MOV_FROM_IPSR(p0,r29);		/* M */							\
+	MOV_FROM_IIP(r28);		/* M */							\
+	mov r21=ar.fpsr;		/* M */							\
+	mov r26=ar.pfs;			/* I */							\
+	__COVER;			/* B;; (or nothing) */					\
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
+	;;											\
+	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
+	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
+	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
+	/* switch from user to kernel RBS: */							\
+	;;											\
+	invala;				/* M */							\
+	/* SAVE_IFS;*/ /* see xen special handling below */					\
+	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
+	;;											\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
+	adds r16=PT(CR_IPSR),r1;								\
+	;;											\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
+	st8 [r16]=r29;		/* save cr.ipsr */						\
+	;;											\
+	lfetch.fault.excl.nt1 [r17];								\
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
+	mov r29=b0										\
+	;;											\
+	WORKAROUND;										\
+	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
+	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
+(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r8,16;								\
+.mem.offset 8,0; st8.spill [r17]=r9,16;								\
+        ;;											\
+.mem.offset 0,0; st8.spill [r16]=r10,24;							\
+	movl r8=XSI_PRECOVER_IFS;								\
+.mem.offset 8,0; st8.spill [r17]=r11,24;							\
+        ;;											\
+	/* xen special handling for possibly lazy cover */					\
+	/* SAVE_MIN case in dispatch_ia32_handler: mov r30=r0 */				\
+	ld8 r30=[r8];										\
+(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
+	st8 [r16]=r28,16;	/* save cr.iip */						\
+	;;											\
+	st8 [r17]=r30,16;	/* save cr.ifs */						\
+	mov r8=ar.ccv;										\
+	mov r9=ar.csd;										\
+	mov r10=ar.ssd;										\
+	movl r11=FPSR_DEFAULT;   /* L-unit */							\
+	;;											\
+	st8 [r16]=r25,16;	/* save ar.unat */						\
+	st8 [r17]=r26,16;	/* save ar.pfs */						\
+	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
+	;;											\
+	st8 [r16]=r27,16;	/* save ar.rsc */						\
+(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
+(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
+	;;			/* avoid RAW on r16 & r17 */					\
+(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
+	st8 [r17]=r31,16;	/* save predicates */						\
+(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
+	;;											\
+	st8 [r16]=r29,16;	/* save b0 */							\
+	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
+	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
+.mem.offset 8,0; st8.spill [r17]=r12,16;							\
+	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r13,16;							\
+.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
+	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r15,16;							\
+.mem.offset 8,0; st8.spill [r17]=r14,16;							\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r2,16;								\
+.mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
+	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
+	;;											\
+	EXTRA;											\
+	movl r1=__gp;		/* establish kernel global pointer */				\
+	;;											\
+	ACCOUNT_SYS_ENTER									\
+	BSW_1(r3,r14);	/* switch back to bank 1 (must be last in insn group) */		\
+	;;
diff --git a/arch/ia64/include/asm/xen/page.h b/arch/ia64/include/asm/xen/page.h
new file mode 100644
index 00000000000..03441a780b5
--- /dev/null
+++ b/arch/ia64/include/asm/xen/page.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/page.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _ASM_IA64_XEN_PAGE_H
+#define _ASM_IA64_XEN_PAGE_H
+
+#define INVALID_P2M_ENTRY	(~0UL)
+
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+	return mfn;
+}
+
+static inline unsigned long pfn_to_mfn(unsigned long pfn)
+{
+	return pfn;
+}
+
+#define phys_to_machine_mapping_valid(_x)	(1)
+
+static inline void *mfn_to_virt(unsigned long mfn)
+{
+	return __va(mfn << PAGE_SHIFT);
+}
+
+static inline unsigned long virt_to_mfn(void *virt)
+{
+	return __pa(virt) >> PAGE_SHIFT;
+}
+
+/* for tpmfront.c */
+static inline unsigned long virt_to_machine(void *virt)
+{
+	return __pa(virt);
+}
+
+static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	/* nothing */
+}
+
+#define pte_mfn(_x)	pte_pfn(_x)
+#define mfn_pte(_x, _y)	__pte_ma(0)		/* unmodified use */
+#define __pte_ma(_x)	((pte_t) {(_x)})        /* unmodified use */
+
+#endif /* _ASM_IA64_XEN_PAGE_H */
diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h
new file mode 100644
index 00000000000..71ec7546e10
--- /dev/null
+++ b/arch/ia64/include/asm/xen/privop.h
@@ -0,0 +1,129 @@
+#ifndef _ASM_IA64_XEN_PRIVOP_H
+#define _ASM_IA64_XEN_PRIVOP_H
+
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ * Paravirtualizations of privileged operations for Xen/ia64
+ *
+ *
+ * inline privop and paravirt_alt support
+ * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ */
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>		/* arch-ia64.h requires uint64_t */
+#endif
+#include <asm/xen/interface.h>
+
+/* At 1 MB, before per-cpu space but still addressable using addl instead
+   of movl. */
+#define XSI_BASE			0xfffffffffff00000
+
+/* Address of mapped regs.  */
+#define XMAPPEDREGS_BASE		(XSI_BASE + XSI_SIZE)
+
+#ifdef __ASSEMBLY__
+#define XEN_HYPER_RFI			break HYPERPRIVOP_RFI
+#define XEN_HYPER_RSM_PSR_DT		break HYPERPRIVOP_RSM_DT
+#define XEN_HYPER_SSM_PSR_DT		break HYPERPRIVOP_SSM_DT
+#define XEN_HYPER_COVER			break HYPERPRIVOP_COVER
+#define XEN_HYPER_ITC_D			break HYPERPRIVOP_ITC_D
+#define XEN_HYPER_ITC_I			break HYPERPRIVOP_ITC_I
+#define XEN_HYPER_SSM_I			break HYPERPRIVOP_SSM_I
+#define XEN_HYPER_GET_IVR		break HYPERPRIVOP_GET_IVR
+#define XEN_HYPER_THASH			break HYPERPRIVOP_THASH
+#define XEN_HYPER_ITR_D			break HYPERPRIVOP_ITR_D
+#define XEN_HYPER_SET_KR		break HYPERPRIVOP_SET_KR
+#define XEN_HYPER_GET_PSR		break HYPERPRIVOP_GET_PSR
+#define XEN_HYPER_SET_RR0_TO_RR4	break HYPERPRIVOP_SET_RR0_TO_RR4
+
+#define XSI_IFS				(XSI_BASE + XSI_IFS_OFS)
+#define XSI_PRECOVER_IFS		(XSI_BASE + XSI_PRECOVER_IFS_OFS)
+#define XSI_IFA				(XSI_BASE + XSI_IFA_OFS)
+#define XSI_ISR				(XSI_BASE + XSI_ISR_OFS)
+#define XSI_IIM				(XSI_BASE + XSI_IIM_OFS)
+#define XSI_ITIR			(XSI_BASE + XSI_ITIR_OFS)
+#define XSI_PSR_I_ADDR			(XSI_BASE + XSI_PSR_I_ADDR_OFS)
+#define XSI_PSR_IC			(XSI_BASE + XSI_PSR_IC_OFS)
+#define XSI_IPSR			(XSI_BASE + XSI_IPSR_OFS)
+#define XSI_IIP				(XSI_BASE + XSI_IIP_OFS)
+#define XSI_B1NAT			(XSI_BASE + XSI_B1NATS_OFS)
+#define XSI_BANK1_R16			(XSI_BASE + XSI_BANK1_R16_OFS)
+#define XSI_BANKNUM			(XSI_BASE + XSI_BANKNUM_OFS)
+#define XSI_IHA				(XSI_BASE + XSI_IHA_OFS)
+#endif
+
+#ifndef __ASSEMBLY__
+
+/************************************************/
+/* Instructions paravirtualized for correctness */
+/************************************************/
+
+/* "fc" and "thash" are privilege-sensitive instructions, meaning they
+ *  may have different semantics depending on whether they are executed
+ *  at PL0 vs PL!=0.  When paravirtualized, these instructions mustn't
+ *  be allowed to execute directly, lest incorrect semantics result. */
+extern void xen_fc(unsigned long addr);
+extern unsigned long xen_thash(unsigned long addr);
+
+/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
+ * is not currently used (though it may be in a long-format VHPT system!)
+ * and the semantics of cover only change if psr.ic is off which is very
+ * rare (and currently non-existent outside of assembly code */
+
+/* There are also privilege-sensitive registers.  These registers are
+ * readable at any privilege level but only writable at PL0. */
+extern unsigned long xen_get_cpuid(int index);
+extern unsigned long xen_get_pmd(int index);
+
+extern unsigned long xen_get_eflag(void);	/* see xen_ia64_getreg */
+extern void xen_set_eflag(unsigned long);	/* see xen_ia64_setreg */
+
+/************************************************/
+/* Instructions paravirtualized for performance */
+/************************************************/
+
+/* Xen uses memory-mapped virtual privileged registers for access to many
+ * performance-sensitive privileged registers.  Some, like the processor
+ * status register (psr), are broken up into multiple memory locations.
+ * Others, like "pend", are abstractions based on privileged registers.
+ * "Pend" is guaranteed to be set if reading cr.ivr would return a
+ * (non-spurious) interrupt. */
+#define XEN_MAPPEDREGS ((struct mapped_regs *)XMAPPEDREGS_BASE)
+
+#define XSI_PSR_I			\
+	(*XEN_MAPPEDREGS->interrupt_mask_addr)
+#define xen_get_virtual_psr_i()		\
+	(!XSI_PSR_I)
+#define xen_set_virtual_psr_i(_val)	\
+	({ XSI_PSR_I = (uint8_t)(_val) ? 0 : 1; })
+#define xen_set_virtual_psr_ic(_val)	\
+	({ XEN_MAPPEDREGS->interrupt_collection_enabled = _val ? 1 : 0; })
+#define xen_get_virtual_pend()		\
+	(*(((uint8_t *)XEN_MAPPEDREGS->interrupt_mask_addr) - 1))
+
+/* Although all privileged operations can be left to trap and will
+ * be properly handled by Xen, some are frequent enough that we use
+ * hyperprivops for performance. */
+extern unsigned long xen_get_psr(void);
+extern unsigned long xen_get_ivr(void);
+extern unsigned long xen_get_tpr(void);
+extern void xen_hyper_ssm_i(void);
+extern void xen_set_itm(unsigned long);
+extern void xen_set_tpr(unsigned long);
+extern void xen_eoi(unsigned long);
+extern unsigned long xen_get_rr(unsigned long index);
+extern void xen_set_rr(unsigned long index, unsigned long val);
+extern void xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+			       unsigned long val2, unsigned long val3,
+			       unsigned long val4);
+extern void xen_set_kr(unsigned long index, unsigned long val);
+extern void xen_ptcga(unsigned long addr, unsigned long size);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_XEN_PRIVOP_H */
diff --git a/arch/ia64/include/asm/xen/xcom_hcall.h b/arch/ia64/include/asm/xen/xcom_hcall.h
new file mode 100644
index 00000000000..20b2950c71b
--- /dev/null
+++ b/arch/ia64/include/asm/xen/xcom_hcall.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2006 Tristan Gingold <tristan.gingold@bull.net>, Bull SAS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _ASM_IA64_XEN_XCOM_HCALL_H
+#define _ASM_IA64_XEN_XCOM_HCALL_H
+
+/* These function creates inline or mini descriptor for the parameters and
+   calls the corresponding xencomm_arch_hypercall_X.
+   Architectures should defines HYPERVISOR_xxx as xencomm_hypercall_xxx unless
+   they want to use their own wrapper.  */
+extern int xencomm_hypercall_console_io(int cmd, int count, char *str);
+
+extern int xencomm_hypercall_event_channel_op(int cmd, void *op);
+
+extern int xencomm_hypercall_xen_version(int cmd, void *arg);
+
+extern int xencomm_hypercall_physdev_op(int cmd, void *op);
+
+extern int xencomm_hypercall_grant_table_op(unsigned int cmd, void *op,
+					    unsigned int count);
+
+extern int xencomm_hypercall_sched_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_multicall(void *call_list, int nr_calls);
+
+extern int xencomm_hypercall_callback_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_memory_op(unsigned int cmd, void *arg);
+
+extern int xencomm_hypercall_suspend(unsigned long srec);
+
+extern long xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg);
+
+extern long xencomm_hypercall_opt_feature(void *arg);
+
+#endif /* _ASM_IA64_XEN_XCOM_HCALL_H */
diff --git a/arch/ia64/include/asm/xen/xencomm.h b/arch/ia64/include/asm/xen/xencomm.h
new file mode 100644
index 00000000000..cded677bebf
--- /dev/null
+++ b/arch/ia64/include/asm/xen/xencomm.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _ASM_IA64_XEN_XENCOMM_H
+#define _ASM_IA64_XEN_XENCOMM_H
+
+#include <xen/xencomm.h>
+#include <asm/pgtable.h>
+
+/* Must be called before any hypercall.  */
+extern void xencomm_initialize(void);
+extern int xencomm_is_initialized(void);
+
+/* Check if virtual contiguity means physical contiguity
+ * where the passed address is a pointer value in virtual address.
+ * On ia64, identity mapping area in region 7 or the piece of region 5
+ * that is mapped by itr[IA64_TR_KERNEL]/dtr[IA64_TR_KERNEL]
+ */
+static inline int xencomm_is_phys_contiguous(unsigned long addr)
+{
+	return (PAGE_OFFSET <= addr &&
+		addr < (PAGE_OFFSET + (1UL << IA64_MAX_PHYS_BITS))) ||
+		(KERNEL_START <= addr &&
+		 addr < KERNEL_START + KERNEL_TR_PAGE_SIZE);
+}
+
+#endif /* _ASM_IA64_XEN_XENCOMM_H */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 87fea11aecb..c381ea95489 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -42,6 +42,10 @@ obj-$(CONFIG_IA64_ESI)		+= esi.o
 ifneq ($(CONFIG_IA64_ESI),)
 obj-y				+= esi_stub.o	# must be in kernel proper
 endif
+obj-$(CONFIG_DMAR)		+= pci-dma.o
+ifeq ($(CONFIG_DMAR), y)
+obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
+endif
 
 # The gate DSO image is built using a special linker script.
 targets += gate.so gate-syms.o
@@ -112,5 +116,23 @@ clean-files += $(objtree)/include/asm-ia64/nr-irqs.h
 ASM_PARAVIRT_OBJS = ivt.o entry.o
 define paravirtualized_native
 AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
+AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
+extra-y += pvchk-$(1)
 endef
 $(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj))))
+
+#
+# Checker for paravirtualizations of privileged operations.
+#
+quiet_cmd_pv_check_sed = PVCHK   $@
+define cmd_pv_check_sed
+	sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@
+endef
+
+$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE
+	$(call if_changed_dep,as_s_S)
+$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE
+	$(call if_changed,pv_check_sed)
+$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE
+	$(call if_changed,as_o_S)
+.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 5d1eb7ee2bf..0635015d0aa 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -52,6 +52,7 @@
 #include <asm/numa.h>
 #include <asm/sal.h>
 #include <asm/cyclone.h>
+#include <asm/xen/hypervisor.h>
 
 #define BAD_MADT_ENTRY(entry, end) (                                        \
 		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
@@ -91,6 +92,9 @@ acpi_get_sysname(void)
 	struct acpi_table_rsdp *rsdp;
 	struct acpi_table_xsdt *xsdt;
 	struct acpi_table_header *hdr;
+#ifdef CONFIG_DMAR
+	u64 i, nentries;
+#endif
 
 	rsdp_phys = acpi_find_rsdp();
 	if (!rsdp_phys) {
@@ -121,7 +125,21 @@ acpi_get_sysname(void)
 			return "uv";
 		else
 			return "sn2";
+	} else if (xen_pv_domain() && !strcmp(hdr->oem_id, "XEN")) {
+		return "xen";
+	}
+
+#ifdef CONFIG_DMAR
+	/* Look for Intel IOMMU */
+	nentries = (hdr->length - sizeof(*hdr)) /
+			 sizeof(xsdt->table_offset_entry[0]);
+	for (i = 0; i < nentries; i++) {
+		hdr = __va(xsdt->table_offset_entry[i]);
+		if (strncmp(hdr->signature, ACPI_SIG_DMAR,
+			sizeof(ACPI_SIG_DMAR) - 1) == 0)
+			return "dig_vtd";
 	}
+#endif
 
 	return "dig";
 #else
@@ -137,6 +155,10 @@ acpi_get_sysname(void)
 	return "uv";
 # elif defined (CONFIG_IA64_DIG)
 	return "dig";
+# elif defined (CONFIG_IA64_XEN_GUEST)
+	return "xen";
+# elif defined(CONFIG_IA64_DIG_VTD)
+	return "dig_vtd";
 # else
 #	error Unknown platform.  Fix acpi.c.
 # endif
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 94c44b1ccfd..742dbb1d5a4 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -16,6 +16,9 @@
 #include <asm/sigcontext.h>
 #include <asm/mca.h>
 
+#include <asm/xen/interface.h>
+#include <asm/xen/hypervisor.h>
+
 #include "../kernel/sigframe.h"
 #include "../kernel/fsyscall_gtod_data.h"
 
@@ -286,4 +289,32 @@ void foo(void)
 		offsetof (struct itc_jitter_data_t, itc_jitter));
 	DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
 		offsetof (struct itc_jitter_data_t, itc_lastcycle));
+
+#ifdef CONFIG_XEN
+	BLANK();
+
+	DEFINE(XEN_NATIVE_ASM, XEN_NATIVE);
+	DEFINE(XEN_PV_DOMAIN_ASM, XEN_PV_DOMAIN);
+
+#define DEFINE_MAPPED_REG_OFS(sym, field) \
+	DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(struct mapped_regs, field)))
+
+	DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr);
+	DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr);
+	DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip);
+	DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs);
+	DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs);
+	DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr);
+	DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa);
+	DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa);
+	DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim);
+	DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha);
+	DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir);
+	DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
+	DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
+	DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
+	DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
+	DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
+	DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);
+#endif /* CONFIG_XEN */
 }
diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c
index da60e90eeeb..23e91290e41 100644
--- a/arch/ia64/kernel/crash_dump.c
+++ b/arch/ia64/kernel/crash_dump.c
@@ -8,10 +8,14 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/crash_dump.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 51b75cea701..efaff15d8cf 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1335,7 +1335,7 @@ kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
 }
 #endif
 
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_DUMP
 /* locate the size find a the descriptor at a certain address */
 unsigned long __init
 vmcore_find_descriptor_size (unsigned long address)
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 0dd6c1419d8..7ef0c594f5e 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -534,6 +534,11 @@ GLOBAL_ENTRY(ia64_trace_syscall)
  	stf.spill [r16]=f10
  	stf.spill [r17]=f11
 	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
+	cmp.lt p6,p0=r8,r0			// check tracehook
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
+	mov r10=0
+(p6)	br.cond.sptk strace_error		// syscall failed ->
 	adds r16=PT(F6)+16,sp
 	adds r17=PT(F7)+16,sp
 	;;
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 416a952b19b..f675d8e3385 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -580,7 +580,7 @@ ENTRY(dirty_bit)
 	mov b0=r29				// restore b0
 	;;
 	st8 [r17]=r18				// store back updated PTE
-	itc.d r18				// install updated PTE
+	ITC_D(p0, r18, r16)			// install updated PTE
 #endif
 	mov pr=r31,-1				// restore pr
 	RFI
@@ -646,7 +646,7 @@ ENTRY(iaccess_bit)
 	mov b0=r29				// restore b0
 	;;
 	st8 [r17]=r18				// store back updated PTE
-	itc.i r18				// install updated PTE
+	ITC_I(p0, r18, r16)			// install updated PTE
 #endif /* !CONFIG_SMP */
 	mov pr=r31,-1
 	RFI
@@ -698,7 +698,7 @@ ENTRY(daccess_bit)
 	or r18=_PAGE_A,r18			// set the accessed bit
 	;;
 	st8 [r17]=r18				// store back updated PTE
-	itc.d r18				// install updated PTE
+	ITC_D(p0, r18, r16)			// install updated PTE
 #endif
 	mov b0=r29				// restore b0
 	mov pr=r31,-1
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 60c6ef67ebb..702a09c1323 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -5,6 +5,7 @@
 #include <linux/pci.h>
 #include <linux/irq.h>
 #include <linux/msi.h>
+#include <linux/dmar.h>
 #include <asm/smp.h>
 
 /*
@@ -162,3 +163,82 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 	return ia64_teardown_msi_irq(irq);
 }
+
+#ifdef CONFIG_DMAR
+#ifdef CONFIG_SMP
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	struct msi_msg msg;
+	int cpu = first_cpu(mask);
+
+
+	if (!cpu_online(cpu))
+		return;
+
+	if (irq_prepare_move(irq, cpu))
+		return;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DESTID_MASK;
+	msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+
+	dmar_msi_write(irq, &msg);
+	irq_desc[irq].affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip dmar_msi_type = {
+	.name = "DMAR_MSI",
+	.unmask = dmar_msi_unmask,
+	.mask = dmar_msi_mask,
+	.ack = ia64_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.set_affinity = dmar_msi_set_affinity,
+#endif
+	.retrigger = ia64_msi_retrigger_irq,
+};
+
+static int
+msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	unsigned dest;
+	cpumask_t mask;
+
+	cpus_and(mask, irq_to_domain(irq), cpu_online_map);
+	dest = cpu_physical_id(first_cpu(mask));
+
+	msg->address_hi = 0;
+	msg->address_lo =
+		MSI_ADDR_HEADER |
+		MSI_ADDR_DESTMODE_PHYS |
+		MSI_ADDR_REDIRECTION_CPU |
+		MSI_ADDR_DESTID_CPU(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		MSI_DATA_DELIVERY_FIXED |
+		MSI_DATA_VECTOR(cfg->vector);
+	return 0;
+}
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+		"edge");
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c
index 8273afc32db..ee564575148 100644
--- a/arch/ia64/kernel/nr-irqs.c
+++ b/arch/ia64/kernel/nr-irqs.c
@@ -10,6 +10,7 @@
 #include <linux/kbuild.h>
 #include <linux/threads.h>
 #include <asm/native/irq.h>
+#include <asm/xen/irq.h>
 
 void foo(void)
 {
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index afaf5b9a2cf..de35d8e8b7d 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -332,7 +332,7 @@ ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
 
 struct pv_iosapic_ops pv_iosapic_ops = {
 	.pcat_compat_init = ia64_native_iosapic_pcat_compat_init,
-	.get_irq_chip = ia64_native_iosapic_get_irq_chip,
+	.__get_irq_chip = ia64_native_iosapic_get_irq_chip,
 
 	.__read = ia64_native_iosapic_read,
 	.__write = ia64_native_iosapic_write,
diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h
index 5cad6fb2ed1..64d6d810c64 100644
--- a/arch/ia64/kernel/paravirt_inst.h
+++ b/arch/ia64/kernel/paravirt_inst.h
@@ -20,7 +20,9 @@
  *
  */
 
-#ifdef __IA64_ASM_PARAVIRTUALIZED_XEN
+#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK
+#include <asm/native/pvchk_inst.h>
+#elif defined(__IA64_ASM_PARAVIRTUALIZED_XEN)
 #include <asm/xen/inst.h>
 #include <asm/xen/minstate.h>
 #else
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
new file mode 100644
index 00000000000..10a75b55765
--- /dev/null
+++ b/arch/ia64/kernel/pci-dma.c
@@ -0,0 +1,129 @@
+/*
+ * Dynamic DMA mapping support.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/dmar.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/machvec.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_DMAR
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+#include <asm/iommu.h>
+
+dma_addr_t bad_dma_address __read_mostly;
+EXPORT_SYMBOL(bad_dma_address);
+
+static int iommu_sac_force __read_mostly;
+
+int no_iommu __read_mostly;
+#ifdef CONFIG_IOMMU_DEBUG
+int force_iommu __read_mostly = 1;
+#else
+int force_iommu __read_mostly;
+#endif
+
+/* Set this to 1 if there is a HW IOMMU in the system */
+int iommu_detected __read_mostly;
+
+/* Dummy device used for NULL arguments (normally ISA). Better would
+   be probably a smaller DMA mask, but this is bug-to-bug compatible
+   to i386. */
+struct device fallback_dev = {
+	.bus_id = "fallback device",
+	.coherent_dma_mask = DMA_32BIT_MASK,
+	.dma_mask = &fallback_dev.coherent_dma_mask,
+};
+
+void __init pci_iommu_alloc(void)
+{
+	/*
+	 * The order of these functions is important for
+	 * fall-back/fail-over reasons
+	 */
+	detect_intel_iommu();
+
+#ifdef CONFIG_SWIOTLB
+	pci_swiotlb_init();
+#endif
+}
+
+static int __init pci_iommu_init(void)
+{
+	if (iommu_detected)
+		intel_iommu_init();
+
+	return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+
+void pci_iommu_shutdown(void)
+{
+	return;
+}
+
+void __init
+iommu_dma_init(void)
+{
+	return;
+}
+
+struct dma_mapping_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
+#ifdef CONFIG_PCI
+	if (mask > 0xffffffff && forbid_dac > 0) {
+		dev_info(dev, "Disallowing DAC for device\n");
+		return 0;
+	}
+#endif
+
+	if (ops->dma_supported_op)
+		return ops->dma_supported_op(dev, mask);
+
+	/* Copied from i386. Doesn't make much sense, because it will
+	   only work for pci_alloc_coherent.
+	   The caller just has to use GFP_DMA in this case. */
+	if (mask < DMA_24BIT_MASK)
+		return 0;
+
+	/* Tell the device to use SAC when IOMMU force is on.  This
+	   allows the driver to use cheaper accesses in some cases.
+
+	   Problem with this is that if we overflow the IOMMU area and
+	   return DAC as fallback address the device may not handle it
+	   correctly.
+
+	   As a special case some controllers have a 39bit address
+	   mode that is as efficient as 32bit (aic79xx). Don't force
+	   SAC for these.  Assume all masks <= 40 bits are of this
+	   type. Normally this doesn't make any difference, but gives
+	   more gentle handling of IOMMU overflow. */
+	if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
+		dev_info(dev, "Force SAC with mask %lx\n", mask);
+		return 0;
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(iommu_dma_supported);
+
+#endif
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
new file mode 100644
index 00000000000..16c50516dbc
--- /dev/null
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -0,0 +1,46 @@
+/* Glue code to lib/swiotlb.c */
+
+#include <linux/pci.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+
+int swiotlb __read_mostly;
+EXPORT_SYMBOL(swiotlb);
+
+struct dma_mapping_ops swiotlb_dma_ops = {
+	.mapping_error = swiotlb_dma_mapping_error,
+	.alloc_coherent = swiotlb_alloc_coherent,
+	.free_coherent = swiotlb_free_coherent,
+	.map_single = swiotlb_map_single,
+	.unmap_single = swiotlb_unmap_single,
+	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+	.sync_single_for_device = swiotlb_sync_single_for_device,
+	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
+	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
+	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = swiotlb_sync_sg_for_device,
+	.map_sg = swiotlb_map_sg,
+	.unmap_sg = swiotlb_unmap_sg,
+	.dma_supported_op = swiotlb_dma_supported,
+};
+
+void __init pci_swiotlb_init(void)
+{
+	if (!iommu_detected) {
+#ifdef CONFIG_IA64_GENERIC
+		swiotlb = 1;
+		printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
+		machvec_init("dig");
+		swiotlb_init();
+		dma_ops = &swiotlb_dma_ops;
+#else
+		panic("Unable to find Intel IOMMU");
+#endif
+	}
+}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index fc8f3509df2..ada4605d122 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -40,6 +40,7 @@
 #include <linux/capability.h>
 #include <linux/rcupdate.h>
 #include <linux/completion.h>
+#include <linux/tracehook.h>
 
 #include <asm/errno.h>
 #include <asm/intrinsics.h>
@@ -3684,7 +3685,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 		PFM_SET_WORK_PENDING(task, 1);
 
-		tsk_set_notify_resume(task);
+		set_notify_resume(task);
 
 		/*
 		 * XXX: send reschedule if task runs on another CPU
@@ -5044,8 +5045,6 @@ pfm_handle_work(void)
 
 	PFM_SET_WORK_PENDING(current, 0);
 
-	tsk_clear_notify_resume(current);
-
 	regs = task_pt_regs(current);
 
 	/*
@@ -5414,7 +5413,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 			 * when coming from ctxsw, current still points to the
 			 * previous task, therefore we must work with task and not current.
 			 */
-			tsk_set_notify_resume(task);
+			set_notify_resume(task);
 		}
 		/*
 		 * defer until state is changed (shorten spin window). the context is locked
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 3ab8373103e..c5716270514 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -28,6 +28,7 @@
 #include <linux/delay.h>
 #include <linux/kdebug.h>
 #include <linux/utsname.h>
+#include <linux/tracehook.h>
 
 #include <asm/cpu.h>
 #include <asm/delay.h>
@@ -160,21 +161,6 @@ show_regs (struct pt_regs *regs)
 		show_stack(NULL, NULL);
 }
 
-void tsk_clear_notify_resume(struct task_struct *tsk)
-{
-#ifdef CONFIG_PERFMON
-	if (tsk->thread.pfm_needs_checking)
-		return;
-#endif
-	if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE))
-		return;
-	clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME);
-}
-
-/*
- * do_notify_resume_user():
- *	Called from notify_resume_user at entry.S, with interrupts disabled.
- */
 void
 do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
 {
@@ -203,6 +189,11 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
 		ia64_do_signal(scr, in_syscall);
 	}
 
+	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(&scr->pt);
+	}
+
 	/* copy user rbs to kernel rbs */
 	if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) {
 		local_irq_enable();	/* force interrupt enable */
@@ -251,7 +242,6 @@ default_idle (void)
 /* We don't actually take CPU down, just spin without interrupts. */
 static inline void play_dead(void)
 {
-	extern void ia64_cpu_local_tick (void);
 	unsigned int this_cpu = smp_processor_id();
 
 	/* Ack it */
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 2a9943b5947..92c9689b7d9 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/signal.h>
 #include <linux/regset.h>
 #include <linux/elf.h>
+#include <linux/tracehook.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -603,7 +604,7 @@ void ia64_ptrace_stop(void)
 {
 	if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE))
 		return;
-	tsk_set_notify_resume(current);
+	set_notify_resume(current);
 	unw_init_running(do_sync_rbs, ia64_sync_user_rbs);
 }
 
@@ -613,7 +614,6 @@ void ia64_ptrace_stop(void)
 void ia64_sync_krbs(void)
 {
 	clear_tsk_thread_flag(current, TIF_RESTORE_RSE);
-	tsk_clear_notify_resume(current);
 
 	unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs);
 }
@@ -644,7 +644,7 @@ ptrace_attach_sync_user_rbs (struct task_struct *child)
 		spin_lock_irq(&child->sighand->siglock);
 		if (child->state == TASK_STOPPED &&
 		    !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) {
-			tsk_set_notify_resume(child);
+			set_notify_resume(child);
 
 			child->state = TASK_TRACED;
 			stopped = 1;
@@ -1232,37 +1232,16 @@ arch_ptrace (struct task_struct *child, long request, long addr, long data)
 }
 
 
-static void
-syscall_trace (void)
-{
-	/*
-	 * The 0x80 provides a way for the tracing parent to
-	 * distinguish between a syscall stop and SIGTRAP delivery.
-	 */
-	ptrace_notify(SIGTRAP
-		      | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
-
-	/*
-	 * This isn't the same as continuing with a signal, but it
-	 * will do for normal use.  strace only continues with a
-	 * signal if the stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
-
 /* "asmlinkage" so the input arguments are preserved... */
 
-asmlinkage void
+asmlinkage long
 syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
 		     long arg4, long arg5, long arg6, long arg7,
 		     struct pt_regs regs)
 {
-	if (test_thread_flag(TIF_SYSCALL_TRACE) 
-	    && (current->ptrace & PT_PTRACED))
-		syscall_trace();
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		if (tracehook_report_syscall_entry(&regs))
+			return -ENOSYS;
 
 	/* copy user rbs to kernel rbs */
 	if (test_thread_flag(TIF_RESTORE_RSE))
@@ -1283,6 +1262,7 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
 		audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
 	}
 
+	return 0;
 }
 
 /* "asmlinkage" so the input arguments are preserved... */
@@ -1292,6 +1272,8 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 		     long arg4, long arg5, long arg6, long arg7,
 		     struct pt_regs regs)
 {
+	int step;
+
 	if (unlikely(current->audit_context)) {
 		int success = AUDITSC_RESULT(regs.r10);
 		long result = regs.r8;
@@ -1301,10 +1283,9 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 		audit_syscall_exit(success, result);
 	}
 
-	if ((test_thread_flag(TIF_SYSCALL_TRACE)
-	    || test_thread_flag(TIF_SINGLESTEP))
-	    && (current->ptrace & PT_PTRACED))
-		syscall_trace();
+	step = test_thread_flag(TIF_SINGLESTEP);
+	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(&regs, step);
 
 	/* copy user rbs to kernel rbs */
 	if (test_thread_flag(TIF_RESTORE_RSE))
@@ -1940,7 +1921,7 @@ gpregs_writeback(struct task_struct *target,
 {
 	if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
 		return 0;
-	tsk_set_notify_resume(target);
+	set_notify_resume(target);
 	return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
 		NULL, NULL);
 }
@@ -2199,3 +2180,68 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
 #endif
 	return &user_ia64_view;
 }
+
+struct syscall_get_set_args {
+	unsigned int i;
+	unsigned int n;
+	unsigned long *args;
+	struct pt_regs *regs;
+	int rw;
+};
+
+static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
+{
+	struct syscall_get_set_args *args = data;
+	struct pt_regs *pt = args->regs;
+	unsigned long *krbs, cfm, ndirty;
+	int i, count;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	cfm = pt->cr_ifs;
+	krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
+	ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+
+	count = 0;
+	if (in_syscall(pt))
+		count = min_t(int, args->n, cfm & 0x7f);
+
+	for (i = 0; i < count; i++) {
+		if (args->rw)
+			*ia64_rse_skip_regs(krbs, ndirty + i + args->i) =
+				args->args[i];
+		else
+			args->args[i] = *ia64_rse_skip_regs(krbs,
+				ndirty + i + args->i);
+	}
+
+	if (!args->rw) {
+		while (i < args->n) {
+			args->args[i] = 0;
+			i++;
+		}
+	}
+}
+
+void ia64_syscall_get_set_arguments(struct task_struct *task,
+	struct pt_regs *regs, unsigned int i, unsigned int n,
+	unsigned long *args, int rw)
+{
+	struct syscall_get_set_args data = {
+		.i = i,
+		.n = n,
+		.args = args,
+		.regs = regs,
+		.rw = rw,
+	};
+
+	if (task == current)
+		unw_init_running(syscall_get_set_args_cb, &data);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, task);
+		syscall_get_set_args_cb(&ufi, &data);
+	}
+}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index de636b21567..ae7911702bf 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -116,6 +116,13 @@ unsigned int num_io_spaces;
  */
 #define	I_CACHE_STRIDE_SHIFT	5	/* Safest way to go: 32 bytes by 32 bytes */
 unsigned long ia64_i_cache_stride_shift = ~0;
+/*
+ * "clflush_cache_range()" needs to know what processor dependent stride size to
+ * use when it flushes cache lines including both d-cache and i-cache.
+ */
+/* Safest way to go: 32 bytes by 32 bytes */
+#define	CACHE_STRIDE_SHIFT	5
+unsigned long ia64_cache_stride_shift = ~0;
 
 /*
  * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
@@ -352,7 +359,7 @@ reserve_memory (void)
 	}
 #endif
 
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_KERNEL
 	if (reserve_elfcorehdr(&rsvd_region[n].start,
 			       &rsvd_region[n].end) == 0)
 		n++;
@@ -478,7 +485,12 @@ static __init int setup_nomca(char *s)
 }
 early_param("nomca", setup_nomca);
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+#ifdef CONFIG_CRASH_DUMP
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel.
  */
@@ -502,11 +514,11 @@ int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
 	 * to work properly.
 	 */
 
-	if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
+	if (!is_vmcore_usable())
 		return -EINVAL;
 
 	if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
-		elfcorehdr_addr = ELFCORE_ADDR_MAX;
+		vmcore_unusable();
 		return -EINVAL;
 	}
 
@@ -847,13 +859,14 @@ setup_per_cpu_areas (void)
 }
 
 /*
- * Calculate the max. cache line size.
+ * Do the following calculations:
  *
- * In addition, the minimum of the i-cache stride sizes is calculated for
- * "flush_icache_range()".
+ * 1. the max. cache line size.
+ * 2. the minimum of the i-cache stride sizes for "flush_icache_range()".
+ * 3. the minimum of the cache stride sizes for "clflush_cache_range()".
  */
 static void __cpuinit
-get_max_cacheline_size (void)
+get_cache_info(void)
 {
 	unsigned long line_size, max = 1;
 	u64 l, levels, unique_caches;
@@ -867,12 +880,14 @@ get_max_cacheline_size (void)
                 max = SMP_CACHE_BYTES;
 		/* Safest setup for "flush_icache_range()" */
 		ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
+		/* Safest setup for "clflush_cache_range()" */
+		ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
 		goto out;
         }
 
 	for (l = 0; l < levels; ++l) {
-		status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
-						    &cci);
+		/* cache_type (data_or_unified)=2 */
+		status = ia64_pal_cache_config_info(l, 2, &cci);
 		if (status != 0) {
 			printk(KERN_ERR
 			       "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
@@ -880,15 +895,21 @@ get_max_cacheline_size (void)
 			max = SMP_CACHE_BYTES;
 			/* The safest setup for "flush_icache_range()" */
 			cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			/* The safest setup for "clflush_cache_range()" */
+			ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
 			cci.pcci_unified = 1;
+		} else {
+			if (cci.pcci_stride < ia64_cache_stride_shift)
+				ia64_cache_stride_shift = cci.pcci_stride;
+
+			line_size = 1 << cci.pcci_line_size;
+			if (line_size > max)
+				max = line_size;
 		}
-		line_size = 1 << cci.pcci_line_size;
-		if (line_size > max)
-			max = line_size;
+
 		if (!cci.pcci_unified) {
-			status = ia64_pal_cache_config_info(l,
-						    /* cache_type (instruction)= */ 1,
-						    &cci);
+			/* cache_type (instruction)=1*/
+			status = ia64_pal_cache_config_info(l, 1, &cci);
 			if (status != 0) {
 				printk(KERN_ERR
 				"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
@@ -942,7 +963,7 @@ cpu_init (void)
 	}
 #endif
 
-	get_max_cacheline_size();
+	get_cache_info();
 
 	/*
 	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 19c5a78636f..e12500a9c44 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
@@ -439,6 +440,13 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
 		sigaddset(&current->blocked, sig);
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
+
+	/*
+	 * Let tracing know that we've done the handler setup.
+	 */
+	tracehook_signal_handler(sig, info, ka, &scr->pt,
+				 test_thread_flag(TIF_SINGLESTEP));
+
 	return 1;
 }
 
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
index 2a0d27f2f21..1d8c8886006 100644
--- a/arch/ia64/lib/flush.S
+++ b/arch/ia64/lib/flush.S
@@ -60,3 +60,58 @@ GLOBAL_ENTRY(flush_icache_range)
 	mov	ar.lc=r3		// restore ar.lc
 	br.ret.sptk.many rp
 END(flush_icache_range)
+
+	/*
+	 * clflush_cache_range(start,size)
+	 *
+	 *	Flush cache lines from start to start+size-1.
+	 *
+	 *	Must deal with range from start to start+size-1 but nothing else
+	 *	(need to be careful not to touch addresses that may be
+	 *	unmapped).
+	 *
+	 *	Note: "in0" and "in1" are preserved for debugging purposes.
+	 */
+	.section .kprobes.text,"ax"
+GLOBAL_ENTRY(clflush_cache_range)
+
+	.prologue
+	alloc	r2=ar.pfs,2,0,0,0
+	movl	r3=ia64_cache_stride_shift
+	mov	r21=1
+	add     r22=in1,in0
+	;;
+	ld8	r20=[r3]		// r20: stride shift
+	sub	r22=r22,r0,1		// last byte address
+	;;
+	shr.u	r23=in0,r20		// start / (stride size)
+	shr.u	r22=r22,r20		// (last byte address) / (stride size)
+	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
+	;;
+	sub	r8=r22,r23		// number of strides - 1
+	shl	r24=r23,r20		// r24: addresses for "fc" =
+					//	"start" rounded down to stride
+					//	boundary
+	.save	ar.lc,r3
+	mov	r3=ar.lc		// save ar.lc
+	;;
+
+	.body
+	mov	ar.lc=r8
+	;;
+	/*
+	 * 32 byte aligned loop, even number of (actually 2) bundles
+	 */
+.Loop_fc:
+	fc	r24		// issuable on M0 only
+	add	r24=r21,r24	// we flush "stride size" bytes per iteration
+	nop.i	0
+	br.cloop.sptk.few .Loop_fc
+	;;
+	sync.i
+	;;
+	srlz.i
+	;;
+	mov	ar.lc=r3		// restore ar.lc
+	br.ret.sptk.many rp
+END(clflush_cache_range)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index f482a9098e3..054bcd9439a 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -700,23 +700,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return ret;
 }
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-	unsigned long start_pfn, end_pfn;
-	unsigned long timeout = 120 * HZ;
-	int ret;
-	start_pfn = start >> PAGE_SHIFT;
-	end_pfn = start_pfn + (size >> PAGE_SHIFT);
-	ret = offline_pages(start_pfn, end_pfn, timeout);
-	if (ret)
-		goto out;
-	/* we can free mem_map at this point */
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
-#endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif
 
 /*
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 8caf42471f0..bd9818a36b4 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -362,9 +362,13 @@ ia64_tlb_init (void)
 		per_cpu(ia64_tr_num, cpu) =
 				vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
 	if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
+		static int justonce = 1;
 		per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
-		printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!"
-			"IA64_TR_ALLOC_MAX should be extended\n");
+		if (justonce) {
+			justonce = 0;
+			printk(KERN_DEBUG "TR register number exceeds "
+			       "IA64_TR_ALLOC_MAX!\n");
+		}
 	}
 }
 
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7545037a862..211fcfd115f 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -614,12 +614,17 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
  * vector to get the base address.
  */
 int
-pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
+pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			   enum pci_mmap_state mmap_state)
 {
 	unsigned long size = vma->vm_end - vma->vm_start;
 	pgprot_t prot;
 	char *addr;
 
+	/* We only support mmap'ing of legacy memory space */
+	if (mmap_state != pci_mmap_mem)
+		return -ENOSYS;
+
 	/*
 	 * Avoid attribute aliasing.  See Documentation/ia64/aliasing.txt
 	 * for more details.
diff --git a/arch/ia64/scripts/pvcheck.sed b/arch/ia64/scripts/pvcheck.sed
new file mode 100644
index 00000000000..ba66ac2e4c6
--- /dev/null
+++ b/arch/ia64/scripts/pvcheck.sed
@@ -0,0 +1,32 @@
+#
+# Checker for paravirtualizations of privileged operations.
+#
+s/ssm.*psr\.ic.*/.warning \"ssm psr.ic should not be used directly\"/g
+s/rsm.*psr\.ic.*/.warning \"rsm psr.ic should not be used directly\"/g
+s/ssm.*psr\.i.*/.warning \"ssm psr.i should not be used directly\"/g
+s/rsm.*psr\.i.*/.warning \"rsm psr.i should not be used directly\"/g
+s/ssm.*psr\.dt.*/.warning \"ssm psr.dt should not be used directly\"/g
+s/rsm.*psr\.dt.*/.warning \"rsm psr.dt should not be used directly\"/g
+s/mov.*=.*cr\.ifa/.warning \"cr.ifa should not used directly\"/g
+s/mov.*=.*cr\.itir/.warning \"cr.itir should not used directly\"/g
+s/mov.*=.*cr\.isr/.warning \"cr.isr should not used directly\"/g
+s/mov.*=.*cr\.iha/.warning \"cr.iha should not used directly\"/g
+s/mov.*=.*cr\.ipsr/.warning \"cr.ipsr should not used directly\"/g
+s/mov.*=.*cr\.iim/.warning \"cr.iim should not used directly\"/g
+s/mov.*=.*cr\.iip/.warning \"cr.iip should not used directly\"/g
+s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g
+s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g	# avoid ar.fpsr
+s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g
+s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g
+s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g
+s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g
+s/mov.*cr\.ipsr.*=.*/.warning \"cr.ipsr should not used directly\"/g
+s/mov.*cr\.ifs.*=.*/.warning \"cr.ifs should not used directly\"/g
+s/mov.*cr\.iip.*=.*/.warning \"cr.iip should not used directly\"/g
+s/mov.*cr\.kr.*=.*/.warning \"cr.kr should not used directly\"/g
+s/mov.*ar\.eflags.*=.*/.warning \"ar.eflags should not used directly\"/g
+s/itc\.i.*/.warning \"itc.i should not be used directly.\"/g
+s/itc\.d.*/.warning \"itc.d should not be used directly.\"/g
+s/bsw\.0/.warning \"bsw.0 should not be used directly.\"/g
+s/bsw\.1/.warning \"bsw.1 should not be used directly.\"/g
+s/ptc\.ga.*/.warning \"ptc.ga should not be used directly.\"/g
diff --git a/arch/ia64/xen/Kconfig b/arch/ia64/xen/Kconfig
new file mode 100644
index 00000000000..f1683a20275
--- /dev/null
+++ b/arch/ia64/xen/Kconfig
@@ -0,0 +1,26 @@
+#
+# This Kconfig describes xen/ia64 options
+#
+
+config XEN
+	bool "Xen hypervisor support"
+	default y
+	depends on PARAVIRT && MCKINLEY && IA64_PAGE_SIZE_16KB && EXPERIMENTAL
+	select XEN_XENCOMM
+	select NO_IDLE_HZ
+
+	# those are required to save/restore.
+	select ARCH_SUSPEND_POSSIBLE
+	select SUSPEND
+	select PM_SLEEP
+	help
+	  Enable Xen hypervisor support.  Resulting kernel runs
+	  both as a guest OS on Xen and natively on hardware.
+
+config XEN_XENCOMM
+	depends on XEN
+	bool
+
+config NO_IDLE_HZ
+	depends on XEN
+	bool
diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile
new file mode 100644
index 00000000000..0ad0224693d
--- /dev/null
+++ b/arch/ia64/xen/Makefile
@@ -0,0 +1,22 @@
+#
+# Makefile for Xen components
+#
+
+obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \
+	 hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o
+
+obj-$(CONFIG_IA64_GENERIC) += machvec.o
+
+AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN
+
+# xen multi compile
+ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S
+ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o))
+obj-y += $(ASM_PARAVIRT_OBJS)
+define paravirtualized_xen
+AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_XEN
+endef
+$(foreach o,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_xen,$(o))))
+
+$(obj)/xen-%.o: $(src)/../kernel/%.S FORCE
+	$(call if_changed_dep,as_o_S)
diff --git a/arch/ia64/xen/grant-table.c b/arch/ia64/xen/grant-table.c
new file mode 100644
index 00000000000..777dd9a9108
--- /dev/null
+++ b/arch/ia64/xen/grant-table.c
@@ -0,0 +1,155 @@
+/******************************************************************************
+ * arch/ia64/xen/grant-table.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+
+#include <xen/interface/xen.h>
+#include <xen/interface/memory.h>
+#include <xen/grant_table.h>
+
+#include <asm/xen/hypervisor.h>
+
+struct vm_struct *xen_alloc_vm_area(unsigned long size)
+{
+	int order;
+	unsigned long virt;
+	unsigned long nr_pages;
+	struct vm_struct *area;
+
+	order = get_order(size);
+	virt = __get_free_pages(GFP_KERNEL, order);
+	if (virt == 0)
+		goto err0;
+	nr_pages = 1 << order;
+	scrub_pages(virt, nr_pages);
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (area == NULL)
+		goto err1;
+
+	area->flags = VM_IOREMAP;
+	area->addr = (void *)virt;
+	area->size = size;
+	area->pages = NULL;
+	area->nr_pages = nr_pages;
+	area->phys_addr = 0;	/* xenbus_map_ring_valloc uses this field!  */
+
+	return area;
+
+err1:
+	free_pages(virt, order);
+err0:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xen_alloc_vm_area);
+
+void xen_free_vm_area(struct vm_struct *area)
+{
+	unsigned int order = get_order(area->size);
+	unsigned long i;
+	unsigned long phys_addr = __pa(area->addr);
+
+	/* This area is used for foreign page mappping.
+	 * So underlying machine page may not be assigned. */
+	for (i = 0; i < (1 << order); i++) {
+		unsigned long ret;
+		unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i;
+		struct xen_memory_reservation reservation = {
+			.nr_extents   = 1,
+			.address_bits = 0,
+			.extent_order = 0,
+			.domid        = DOMID_SELF
+		};
+		set_xen_guest_handle(reservation.extent_start, &gpfn);
+		ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,
+					   &reservation);
+		BUG_ON(ret != 1);
+	}
+	free_pages((unsigned long)area->addr, order);
+	kfree(area);
+}
+EXPORT_SYMBOL_GPL(xen_free_vm_area);
+
+
+/****************************************************************************
+ * grant table hack
+ * cmd: GNTTABOP_xxx
+ */
+
+int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   struct grant_entry **__shared)
+{
+	*__shared = __va(frames[0] << PAGE_SHIFT);
+	return 0;
+}
+
+void arch_gnttab_unmap_shared(struct grant_entry *shared,
+			      unsigned long nr_gframes)
+{
+	/* nothing */
+}
+
+static void
+gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
+{
+	uint32_t flags;
+
+	flags = uop->flags;
+
+	if (flags & GNTMAP_host_map) {
+		if (flags & GNTMAP_application_map) {
+			printk(KERN_DEBUG
+			       "GNTMAP_application_map is not supported yet: "
+			       "flags 0x%x\n", flags);
+			BUG();
+		}
+		if (flags & GNTMAP_contains_pte) {
+			printk(KERN_DEBUG
+			       "GNTMAP_contains_pte is not supported yet: "
+			       "flags 0x%x\n", flags);
+			BUG();
+		}
+	} else if (flags & GNTMAP_device_map) {
+		printk("GNTMAP_device_map is not supported yet 0x%x\n", flags);
+		BUG();	/* not yet. actually this flag is not used. */
+	} else {
+		BUG();
+	}
+}
+
+int
+HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
+{
+	if (cmd == GNTTABOP_map_grant_ref) {
+		unsigned int i;
+		for (i = 0; i < count; i++) {
+			gnttab_map_grant_ref_pre(
+				(struct gnttab_map_grant_ref *)uop + i);
+		}
+	}
+	return xencomm_hypercall_grant_table_op(cmd, uop, count);
+}
+
+EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
diff --git a/arch/ia64/xen/hypercall.S b/arch/ia64/xen/hypercall.S
new file mode 100644
index 00000000000..d4ff0b9e79f
--- /dev/null
+++ b/arch/ia64/xen/hypercall.S
@@ -0,0 +1,91 @@
+/*
+ * Support routines for Xen hypercalls
+ *
+ * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
+ * Copyright (C) 2008 Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/intrinsics.h>
+#include <asm/xen/privop.h>
+
+/*
+ * Hypercalls without parameter.
+ */
+#define __HCALL0(name,hcall)		\
+	GLOBAL_ENTRY(name);		\
+	break	hcall;			\
+	br.ret.sptk.many rp;		\
+	END(name)
+
+/*
+ * Hypercalls with 1 parameter.
+ */
+#define __HCALL1(name,hcall)		\
+	GLOBAL_ENTRY(name);		\
+	mov r8=r32;			\
+	break	hcall;			\
+	br.ret.sptk.many rp;		\
+	END(name)
+
+/*
+ * Hypercalls with 2 parameters.
+ */
+#define __HCALL2(name,hcall)		\
+	GLOBAL_ENTRY(name);		\
+	mov r8=r32;			\
+	mov r9=r33;			\
+	break	hcall;			\
+	br.ret.sptk.many rp;		\
+	END(name)
+
+__HCALL0(xen_get_psr, HYPERPRIVOP_GET_PSR)
+__HCALL0(xen_get_ivr, HYPERPRIVOP_GET_IVR)
+__HCALL0(xen_get_tpr, HYPERPRIVOP_GET_TPR)
+__HCALL0(xen_hyper_ssm_i, HYPERPRIVOP_SSM_I)
+
+__HCALL1(xen_set_tpr, HYPERPRIVOP_SET_TPR)
+__HCALL1(xen_eoi, HYPERPRIVOP_EOI)
+__HCALL1(xen_thash, HYPERPRIVOP_THASH)
+__HCALL1(xen_set_itm, HYPERPRIVOP_SET_ITM)
+__HCALL1(xen_get_rr, HYPERPRIVOP_GET_RR)
+__HCALL1(xen_fc, HYPERPRIVOP_FC)
+__HCALL1(xen_get_cpuid, HYPERPRIVOP_GET_CPUID)
+__HCALL1(xen_get_pmd, HYPERPRIVOP_GET_PMD)
+
+__HCALL2(xen_ptcga, HYPERPRIVOP_PTC_GA)
+__HCALL2(xen_set_rr, HYPERPRIVOP_SET_RR)
+__HCALL2(xen_set_kr, HYPERPRIVOP_SET_KR)
+
+#ifdef CONFIG_IA32_SUPPORT
+__HCALL1(xen_get_eflag, HYPERPRIVOP_GET_EFLAG)
+__HCALL1(xen_set_eflag, HYPERPRIVOP_SET_EFLAG)	// refer SDM vol1 3.1.8
+#endif /* CONFIG_IA32_SUPPORT */
+
+GLOBAL_ENTRY(xen_set_rr0_to_rr4)
+	mov r8=r32
+	mov r9=r33
+	mov r10=r34
+	mov r11=r35
+	mov r14=r36
+	XEN_HYPER_SET_RR0_TO_RR4
+	br.ret.sptk.many rp
+	;;
+END(xen_set_rr0_to_rr4)
+
+GLOBAL_ENTRY(xen_send_ipi)
+	mov r14=r32
+	mov r15=r33
+	mov r2=0x400
+	break 0x1000
+	;;
+	br.ret.sptk.many rp
+	;;
+END(xen_send_ipi)
+
+GLOBAL_ENTRY(__hypercall)
+	mov r2=r37
+	break 0x1000
+	br.ret.sptk.many b0
+	;;
+END(__hypercall)
diff --git a/arch/ia64/xen/hypervisor.c b/arch/ia64/xen/hypervisor.c
new file mode 100644
index 00000000000..cac4d97c0b5
--- /dev/null
+++ b/arch/ia64/xen/hypervisor.c
@@ -0,0 +1,96 @@
+/******************************************************************************
+ * arch/ia64/xen/hypervisor.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/efi.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/privop.h>
+
+#include "irq_xen.h"
+
+struct shared_info *HYPERVISOR_shared_info __read_mostly =
+	(struct shared_info *)XSI_BASE;
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
+
+DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
+
+struct start_info *xen_start_info;
+EXPORT_SYMBOL(xen_start_info);
+
+EXPORT_SYMBOL(xen_domain_type);
+
+EXPORT_SYMBOL(__hypercall);
+
+/* Stolen from arch/x86/xen/enlighten.c */
+/*
+ * Flag to determine whether vcpu info placement is available on all
+ * VCPUs.  We assume it is to start with, and then set it to zero on
+ * the first failure.  This is because it can succeed on some VCPUs
+ * and not others, since it can involve hypervisor memory allocation,
+ * or because the guest failed to guarantee all the appropriate
+ * constraints on all VCPUs (ie buffer can't cross a page boundary).
+ *
+ * Note that any particular CPU may be using a placed vcpu structure,
+ * but we can only optimise if the all are.
+ *
+ * 0: not available, 1: available
+ */
+
+static void __init xen_vcpu_setup(int cpu)
+{
+	/*
+	 * WARNING:
+	 * before changing MAX_VIRT_CPUS,
+	 * check that shared_info fits on a page
+	 */
+	BUILD_BUG_ON(sizeof(struct shared_info) > PAGE_SIZE);
+	per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+}
+
+void __init xen_setup_vcpu_info_placement(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		xen_vcpu_setup(cpu);
+}
+
+void __cpuinit
+xen_cpu_init(void)
+{
+	xen_smp_intr_init();
+}
+
+/**************************************************************************
+ * opt feature
+ */
+void
+xen_ia64_enable_opt_feature(void)
+{
+	/* Enable region 7 identity map optimizations in Xen */
+	struct xen_ia64_opt_feature optf;
+
+	optf.cmd = XEN_IA64_OPTF_IDENT_MAP_REG7;
+	optf.on = XEN_IA64_OPTF_ON;
+	optf.pgprot = pgprot_val(PAGE_KERNEL);
+	optf.key = 0;	/* No key on linux. */
+	HYPERVISOR_opt_feature(&optf);
+}
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
new file mode 100644
index 00000000000..af93aadb68b
--- /dev/null
+++ b/arch/ia64/xen/irq_xen.c
@@ -0,0 +1,435 @@
+/******************************************************************************
+ * arch/ia64/xen/irq_xen.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/cpu.h>
+
+#include <xen/interface/xen.h>
+#include <xen/interface/callback.h>
+#include <xen/events.h>
+
+#include <asm/xen/privop.h>
+
+#include "irq_xen.h"
+
+/***************************************************************************
+ * pv_irq_ops
+ * irq operations
+ */
+
+static int
+xen_assign_irq_vector(int irq)
+{
+	struct physdev_irq irq_op;
+
+	irq_op.irq = irq;
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
+		return -ENOSPC;
+
+	return irq_op.vector;
+}
+
+static void
+xen_free_irq_vector(int vector)
+{
+	struct physdev_irq irq_op;
+
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
+		return;
+
+	irq_op.vector = vector;
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_free_irq_vector, &irq_op))
+		printk(KERN_WARNING "%s: xen_free_irq_vecotr fail vector=%d\n",
+		       __func__, vector);
+}
+
+
+static DEFINE_PER_CPU(int, timer_irq) = -1;
+static DEFINE_PER_CPU(int, ipi_irq) = -1;
+static DEFINE_PER_CPU(int, resched_irq) = -1;
+static DEFINE_PER_CPU(int, cmc_irq) = -1;
+static DEFINE_PER_CPU(int, cmcp_irq) = -1;
+static DEFINE_PER_CPU(int, cpep_irq) = -1;
+#define NAME_SIZE	15
+static DEFINE_PER_CPU(char[NAME_SIZE], timer_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], ipi_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], resched_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], cmc_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], cmcp_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], cpep_name);
+#undef NAME_SIZE
+
+struct saved_irq {
+	unsigned int irq;
+	struct irqaction *action;
+};
+/* 16 should be far optimistic value, since only several percpu irqs
+ * are registered early.
+ */
+#define MAX_LATE_IRQ	16
+static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ];
+static unsigned short late_irq_cnt;
+static unsigned short saved_irq_cnt;
+static int xen_slab_ready;
+
+#ifdef CONFIG_SMP
+/* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
+ * it ends up to issue several memory accesses upon percpu data and
+ * thus adds unnecessary traffic to other paths.
+ */
+static irqreturn_t
+xen_dummy_handler(int irq, void *dev_id)
+{
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction xen_ipi_irqaction = {
+	.handler =	handle_IPI,
+	.flags =	IRQF_DISABLED,
+	.name =		"IPI"
+};
+
+static struct irqaction xen_resched_irqaction = {
+	.handler =	xen_dummy_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"resched"
+};
+
+static struct irqaction xen_tlb_irqaction = {
+	.handler =	xen_dummy_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"tlb_flush"
+};
+#endif
+
+/*
+ * This is xen version percpu irq registration, which needs bind
+ * to xen specific evtchn sub-system. One trick here is that xen
+ * evtchn binding interface depends on kmalloc because related
+ * port needs to be freed at device/cpu down. So we cache the
+ * registration on BSP before slab is ready and then deal them
+ * at later point. For rest instances happening after slab ready,
+ * we hook them to xen evtchn immediately.
+ *
+ * FIXME: MCA is not supported by far, and thus "nomca" boot param is
+ * required.
+ */
+static void
+__xen_register_percpu_irq(unsigned int cpu, unsigned int vec,
+			struct irqaction *action, int save)
+{
+	irq_desc_t *desc;
+	int irq = 0;
+
+	if (xen_slab_ready) {
+		switch (vec) {
+		case IA64_TIMER_VECTOR:
+			snprintf(per_cpu(timer_name, cpu),
+				 sizeof(per_cpu(timer_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu,
+				action->handler, action->flags,
+				per_cpu(timer_name, cpu), action->dev_id);
+			per_cpu(timer_irq, cpu) = irq;
+			break;
+		case IA64_IPI_RESCHEDULE:
+			snprintf(per_cpu(resched_name, cpu),
+				 sizeof(per_cpu(resched_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu,
+				action->handler, action->flags,
+				per_cpu(resched_name, cpu), action->dev_id);
+			per_cpu(resched_irq, cpu) = irq;
+			break;
+		case IA64_IPI_VECTOR:
+			snprintf(per_cpu(ipi_name, cpu),
+				 sizeof(per_cpu(ipi_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_ipi_to_irqhandler(XEN_IPI_VECTOR, cpu,
+				action->handler, action->flags,
+				per_cpu(ipi_name, cpu), action->dev_id);
+			per_cpu(ipi_irq, cpu) = irq;
+			break;
+		case IA64_CMC_VECTOR:
+			snprintf(per_cpu(cmc_name, cpu),
+				 sizeof(per_cpu(cmc_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu,
+						      action->handler,
+						      action->flags,
+						      per_cpu(cmc_name, cpu),
+						      action->dev_id);
+			per_cpu(cmc_irq, cpu) = irq;
+			break;
+		case IA64_CMCP_VECTOR:
+			snprintf(per_cpu(cmcp_name, cpu),
+				 sizeof(per_cpu(cmcp_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_ipi_to_irqhandler(XEN_CMCP_VECTOR, cpu,
+						     action->handler,
+						     action->flags,
+						     per_cpu(cmcp_name, cpu),
+						     action->dev_id);
+			per_cpu(cmcp_irq, cpu) = irq;
+			break;
+		case IA64_CPEP_VECTOR:
+			snprintf(per_cpu(cpep_name, cpu),
+				 sizeof(per_cpu(cpep_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_ipi_to_irqhandler(XEN_CPEP_VECTOR, cpu,
+						     action->handler,
+						     action->flags,
+						     per_cpu(cpep_name, cpu),
+						     action->dev_id);
+			per_cpu(cpep_irq, cpu) = irq;
+			break;
+		case IA64_CPE_VECTOR:
+		case IA64_MCA_RENDEZ_VECTOR:
+		case IA64_PERFMON_VECTOR:
+		case IA64_MCA_WAKEUP_VECTOR:
+		case IA64_SPURIOUS_INT_VECTOR:
+			/* No need to complain, these aren't supported. */
+			break;
+		default:
+			printk(KERN_WARNING "Percpu irq %d is unsupported "
+			       "by xen!\n", vec);
+			break;
+		}
+		BUG_ON(irq < 0);
+
+		if (irq > 0) {
+			/*
+			 * Mark percpu.  Without this, migrate_irqs() will
+			 * mark the interrupt for migrations and trigger it
+			 * on cpu hotplug.
+			 */
+			desc = irq_desc + irq;
+			desc->status |= IRQ_PER_CPU;
+		}
+	}
+
+	/* For BSP, we cache registered percpu irqs, and then re-walk
+	 * them when initializing APs
+	 */
+	if (!cpu && save) {
+		BUG_ON(saved_irq_cnt == MAX_LATE_IRQ);
+		saved_percpu_irqs[saved_irq_cnt].irq = vec;
+		saved_percpu_irqs[saved_irq_cnt].action = action;
+		saved_irq_cnt++;
+		if (!xen_slab_ready)
+			late_irq_cnt++;
+	}
+}
+
+static void
+xen_register_percpu_irq(ia64_vector vec, struct irqaction *action)
+{
+	__xen_register_percpu_irq(smp_processor_id(), vec, action, 1);
+}
+
+static void
+xen_bind_early_percpu_irq(void)
+{
+	int i;
+
+	xen_slab_ready = 1;
+	/* There's no race when accessing this cached array, since only
+	 * BSP will face with such step shortly
+	 */
+	for (i = 0; i < late_irq_cnt; i++)
+		__xen_register_percpu_irq(smp_processor_id(),
+					  saved_percpu_irqs[i].irq,
+					  saved_percpu_irqs[i].action, 0);
+}
+
+/* FIXME: There's no obvious point to check whether slab is ready. So
+ * a hack is used here by utilizing a late time hook.
+ */
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int __devinit
+unbind_evtchn_callback(struct notifier_block *nfb,
+		       unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	if (action == CPU_DEAD) {
+		/* Unregister evtchn.  */
+		if (per_cpu(cpep_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(cpep_irq, cpu), NULL);
+			per_cpu(cpep_irq, cpu) = -1;
+		}
+		if (per_cpu(cmcp_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(cmcp_irq, cpu), NULL);
+			per_cpu(cmcp_irq, cpu) = -1;
+		}
+		if (per_cpu(cmc_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(cmc_irq, cpu), NULL);
+			per_cpu(cmc_irq, cpu) = -1;
+		}
+		if (per_cpu(ipi_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(ipi_irq, cpu), NULL);
+			per_cpu(ipi_irq, cpu) = -1;
+		}
+		if (per_cpu(resched_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(resched_irq, cpu),
+						NULL);
+			per_cpu(resched_irq, cpu) = -1;
+		}
+		if (per_cpu(timer_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL);
+			per_cpu(timer_irq, cpu) = -1;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block unbind_evtchn_notifier = {
+	.notifier_call = unbind_evtchn_callback,
+	.priority = 0
+};
+#endif
+
+void xen_smp_intr_init_early(unsigned int cpu)
+{
+#ifdef CONFIG_SMP
+	unsigned int i;
+
+	for (i = 0; i < saved_irq_cnt; i++)
+		__xen_register_percpu_irq(cpu, saved_percpu_irqs[i].irq,
+					  saved_percpu_irqs[i].action, 0);
+#endif
+}
+
+void xen_smp_intr_init(void)
+{
+#ifdef CONFIG_SMP
+	unsigned int cpu = smp_processor_id();
+	struct callback_register event = {
+		.type = CALLBACKTYPE_event,
+		.address = { .ip = (unsigned long)&xen_event_callback },
+	};
+
+	if (cpu == 0) {
+		/* Initialization was already done for boot cpu.  */
+#ifdef CONFIG_HOTPLUG_CPU
+		/* Register the notifier only once.  */
+		register_cpu_notifier(&unbind_evtchn_notifier);
+#endif
+		return;
+	}
+
+	/* This should be piggyback when setup vcpu guest context */
+	BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
+#endif /* CONFIG_SMP */
+}
+
+void __init
+xen_irq_init(void)
+{
+	struct callback_register event = {
+		.type = CALLBACKTYPE_event,
+		.address = { .ip = (unsigned long)&xen_event_callback },
+	};
+
+	xen_init_IRQ();
+	BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
+	late_time_init = xen_bind_early_percpu_irq;
+}
+
+void
+xen_platform_send_ipi(int cpu, int vector, int delivery_mode, int redirect)
+{
+#ifdef CONFIG_SMP
+	/* TODO: we need to call vcpu_up here */
+	if (unlikely(vector == ap_wakeup_vector)) {
+		/* XXX
+		 * This should be in __cpu_up(cpu) in ia64 smpboot.c
+		 * like x86. But don't want to modify it,
+		 * keep it untouched.
+		 */
+		xen_smp_intr_init_early(cpu);
+
+		xen_send_ipi(cpu, vector);
+		/* vcpu_prepare_and_up(cpu); */
+		return;
+	}
+#endif
+
+	switch (vector) {
+	case IA64_IPI_VECTOR:
+		xen_send_IPI_one(cpu, XEN_IPI_VECTOR);
+		break;
+	case IA64_IPI_RESCHEDULE:
+		xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
+		break;
+	case IA64_CMCP_VECTOR:
+		xen_send_IPI_one(cpu, XEN_CMCP_VECTOR);
+		break;
+	case IA64_CPEP_VECTOR:
+		xen_send_IPI_one(cpu, XEN_CPEP_VECTOR);
+		break;
+	case IA64_TIMER_VECTOR: {
+		/* this is used only once by check_sal_cache_flush()
+		   at boot time */
+		static int used = 0;
+		if (!used) {
+			xen_send_ipi(cpu, IA64_TIMER_VECTOR);
+			used = 1;
+			break;
+		}
+		/* fallthrough */
+	}
+	default:
+		printk(KERN_WARNING "Unsupported IPI type 0x%x\n",
+		       vector);
+		notify_remote_via_irq(0); /* defaults to 0 irq */
+		break;
+	}
+}
+
+static void __init
+xen_register_ipi(void)
+{
+#ifdef CONFIG_SMP
+	register_percpu_irq(IA64_IPI_VECTOR, &xen_ipi_irqaction);
+	register_percpu_irq(IA64_IPI_RESCHEDULE, &xen_resched_irqaction);
+	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &xen_tlb_irqaction);
+#endif
+}
+
+static void
+xen_resend_irq(unsigned int vector)
+{
+	(void)resend_irq_on_evtchn(vector);
+}
+
+const struct pv_irq_ops xen_irq_ops __initdata = {
+	.register_ipi = xen_register_ipi,
+
+	.assign_irq_vector = xen_assign_irq_vector,
+	.free_irq_vector = xen_free_irq_vector,
+	.register_percpu_irq = xen_register_percpu_irq,
+
+	.resend_irq = xen_resend_irq,
+};
diff --git a/arch/ia64/xen/irq_xen.h b/arch/ia64/xen/irq_xen.h
new file mode 100644
index 00000000000..26110f330c8
--- /dev/null
+++ b/arch/ia64/xen/irq_xen.h
@@ -0,0 +1,34 @@
+/******************************************************************************
+ * arch/ia64/xen/irq_xen.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef IRQ_XEN_H
+#define IRQ_XEN_H
+
+extern void (*late_time_init)(void);
+extern char xen_event_callback;
+void __init xen_init_IRQ(void);
+
+extern const struct pv_irq_ops xen_irq_ops __initdata;
+extern void xen_smp_intr_init(void);
+extern void xen_send_ipi(int cpu, int vec);
+
+#endif /* IRQ_XEN_H */
diff --git a/arch/ia64/xen/machvec.c b/arch/ia64/xen/machvec.c
new file mode 100644
index 00000000000..4ad588a7c27
--- /dev/null
+++ b/arch/ia64/xen/machvec.c
@@ -0,0 +1,4 @@
+#define MACHVEC_PLATFORM_NAME           xen
+#define MACHVEC_PLATFORM_HEADER         <asm/machvec_xen.h>
+#include <asm/machvec_init.h>
+
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
new file mode 100644
index 00000000000..fd66b048c6f
--- /dev/null
+++ b/arch/ia64/xen/suspend.c
@@ -0,0 +1,64 @@
+/******************************************************************************
+ * arch/ia64/xen/suspend.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * suspend/resume
+ */
+
+#include <xen/xen-ops.h>
+#include <asm/xen/hypervisor.h>
+#include "time.h"
+
+void
+xen_mm_pin_all(void)
+{
+	/* nothing */
+}
+
+void
+xen_mm_unpin_all(void)
+{
+	/* nothing */
+}
+
+void xen_pre_device_suspend(void)
+{
+	/* nothing */
+}
+
+void
+xen_pre_suspend()
+{
+	/* nothing */
+}
+
+void
+xen_post_suspend(int suspend_cancelled)
+{
+	if (suspend_cancelled)
+		return;
+
+	xen_ia64_enable_opt_feature();
+	/* add more if necessary */
+}
+
+void xen_arch_resume(void)
+{
+	xen_timer_resume_on_aps();
+}
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
new file mode 100644
index 00000000000..d15a94c330f
--- /dev/null
+++ b/arch/ia64/xen/time.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * arch/ia64/xen/time.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel_stat.h>
+#include <linux/posix-timers.h>
+#include <linux/irq.h>
+#include <linux/clocksource.h>
+
+#include <asm/timex.h>
+
+#include <asm/xen/hypervisor.h>
+
+#include <xen/interface/vcpu.h>
+
+#include "../kernel/fsyscall_gtod_data.h"
+
+DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+DEFINE_PER_CPU(unsigned long, processed_stolen_time);
+DEFINE_PER_CPU(unsigned long, processed_blocked_time);
+
+/* taken from i386/kernel/time-xen.c */
+static void xen_init_missing_ticks_accounting(int cpu)
+{
+	struct vcpu_register_runstate_memory_area area;
+	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
+	int rc;
+
+	memset(runstate, 0, sizeof(*runstate));
+
+	area.addr.v = runstate;
+	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
+				&area);
+	WARN_ON(rc && rc != -ENOSYS);
+
+	per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
+	per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
+					    + runstate->time[RUNSTATE_offline];
+}
+
+/*
+ * Runstate accounting
+ */
+/* stolen from arch/x86/xen/time.c */
+static void get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+	u64 state_time;
+	struct vcpu_runstate_info *state;
+
+	BUG_ON(preemptible());
+
+	state = &__get_cpu_var(runstate);
+
+	/*
+	 * The runstate info is always updated by the hypervisor on
+	 * the current CPU, so there's no need to use anything
+	 * stronger than a compiler barrier when fetching it.
+	 */
+	do {
+		state_time = state->state_entry_time;
+		rmb();
+		*res = *state;
+		rmb();
+	} while (state->state_entry_time != state_time);
+}
+
+#define NS_PER_TICK (1000000000LL/HZ)
+
+static unsigned long
+consider_steal_time(unsigned long new_itm)
+{
+	unsigned long stolen, blocked;
+	unsigned long delta_itm = 0, stolentick = 0;
+	int cpu = smp_processor_id();
+	struct vcpu_runstate_info runstate;
+	struct task_struct *p = current;
+
+	get_runstate_snapshot(&runstate);
+
+	/*
+	 * Check for vcpu migration effect
+	 * In this case, itc value is reversed.
+	 * This causes huge stolen value.
+	 * This function just checks and reject this effect.
+	 */
+	if (!time_after_eq(runstate.time[RUNSTATE_blocked],
+			   per_cpu(processed_blocked_time, cpu)))
+		blocked = 0;
+
+	if (!time_after_eq(runstate.time[RUNSTATE_runnable] +
+			   runstate.time[RUNSTATE_offline],
+			   per_cpu(processed_stolen_time, cpu)))
+		stolen = 0;
+
+	if (!time_after(delta_itm + new_itm, ia64_get_itc()))
+		stolentick = ia64_get_itc() - new_itm;
+
+	do_div(stolentick, NS_PER_TICK);
+	stolentick++;
+
+	do_div(stolen, NS_PER_TICK);
+
+	if (stolen > stolentick)
+		stolen = stolentick;
+
+	stolentick -= stolen;
+	do_div(blocked, NS_PER_TICK);
+
+	if (blocked > stolentick)
+		blocked = stolentick;
+
+	if (stolen > 0 || blocked > 0) {
+		account_steal_time(NULL, jiffies_to_cputime(stolen));
+		account_steal_time(idle_task(cpu), jiffies_to_cputime(blocked));
+		run_local_timers();
+
+		if (rcu_pending(cpu))
+			rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
+
+		scheduler_tick();
+		run_posix_cpu_timers(p);
+		delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
+
+		if (cpu == time_keeper_id) {
+			write_seqlock(&xtime_lock);
+			do_timer(stolen + blocked);
+			local_cpu_data->itm_next = delta_itm + new_itm;
+			write_sequnlock(&xtime_lock);
+		} else {
+			local_cpu_data->itm_next = delta_itm + new_itm;
+		}
+		per_cpu(processed_stolen_time, cpu) += NS_PER_TICK * stolen;
+		per_cpu(processed_blocked_time, cpu) += NS_PER_TICK * blocked;
+	}
+	return delta_itm;
+}
+
+static int xen_do_steal_accounting(unsigned long *new_itm)
+{
+	unsigned long delta_itm;
+	delta_itm = consider_steal_time(*new_itm);
+	*new_itm += delta_itm;
+	if (time_after(*new_itm, ia64_get_itc()) && delta_itm)
+		return 1;
+
+	return 0;
+}
+
+static void xen_itc_jitter_data_reset(void)
+{
+	u64 lcycle, ret;
+
+	do {
+		lcycle = itc_jitter_data.itc_lastcycle;
+		ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0);
+	} while (unlikely(ret != lcycle));
+}
+
+struct pv_time_ops xen_time_ops __initdata = {
+	.init_missing_ticks_accounting	= xen_init_missing_ticks_accounting,
+	.do_steal_accounting		= xen_do_steal_accounting,
+	.clocksource_resume		= xen_itc_jitter_data_reset,
+};
+
+/* Called after suspend, to resume time.  */
+static void xen_local_tick_resume(void)
+{
+	/* Just trigger a tick.  */
+	ia64_cpu_local_tick();
+	touch_softlockup_watchdog();
+}
+
+void
+xen_timer_resume(void)
+{
+	unsigned int cpu;
+
+	xen_local_tick_resume();
+
+	for_each_online_cpu(cpu)
+		xen_init_missing_ticks_accounting(cpu);
+}
+
+static void ia64_cpu_local_tick_fn(void *unused)
+{
+	xen_local_tick_resume();
+	xen_init_missing_ticks_accounting(smp_processor_id());
+}
+
+void
+xen_timer_resume_on_aps(void)
+{
+	smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1);
+}
diff --git a/arch/ia64/xen/time.h b/arch/ia64/xen/time.h
new file mode 100644
index 00000000000..f98d7e1a42f
--- /dev/null
+++ b/arch/ia64/xen/time.h
@@ -0,0 +1,24 @@
+/******************************************************************************
+ * arch/ia64/xen/time.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+extern struct pv_time_ops xen_time_ops __initdata;
+void xen_timer_resume_on_aps(void);
diff --git a/arch/ia64/xen/xcom_hcall.c b/arch/ia64/xen/xcom_hcall.c
new file mode 100644
index 00000000000..ccaf7431f7c
--- /dev/null
+++ b/arch/ia64/xen/xcom_hcall.c
@@ -0,0 +1,441 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ *          Tristan Gingold <tristan.gingold@bull.net>
+ *
+ *          Copyright (c) 2007
+ *          Isaku Yamahata <yamahata at valinux co jp>
+ *                          VA Linux Systems Japan K.K.
+ *          consolidate mini and inline version.
+ */
+
+#include <linux/module.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/callback.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/xencomm.h>
+
+/* Xencomm notes:
+ * This file defines hypercalls to be used by xencomm.  The hypercalls simply
+ * create inlines or mini descriptors for pointers and then call the raw arch
+ * hypercall xencomm_arch_hypercall_XXX
+ *
+ * If the arch wants to directly use these hypercalls, simply define macros
+ * in asm/xen/hypercall.h, eg:
+ *  #define HYPERVISOR_sched_op xencomm_hypercall_sched_op
+ *
+ * The arch may also define HYPERVISOR_xxx as a function and do more operations
+ * before/after doing the hypercall.
+ *
+ * Note: because only inline or mini descriptors are created these functions
+ * must only be called with in kernel memory parameters.
+ */
+
+int
+xencomm_hypercall_console_io(int cmd, int count, char *str)
+{
+	/* xen early printk uses console io hypercall before
+	 * xencomm initialization. In that case, we just ignore it.
+	 */
+	if (!xencomm_is_initialized())
+		return 0;
+
+	return xencomm_arch_hypercall_console_io
+		(cmd, count, xencomm_map_no_alloc(str, count));
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_console_io);
+
+int
+xencomm_hypercall_event_channel_op(int cmd, void *op)
+{
+	struct xencomm_handle *desc;
+	desc = xencomm_map_no_alloc(op, sizeof(struct evtchn_op));
+	if (desc == NULL)
+		return -EINVAL;
+
+	return xencomm_arch_hypercall_event_channel_op(cmd, desc);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_event_channel_op);
+
+int
+xencomm_hypercall_xen_version(int cmd, void *arg)
+{
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+
+	switch (cmd) {
+	case XENVER_version:
+		/* do not actually pass an argument */
+		return xencomm_arch_hypercall_xen_version(cmd, 0);
+	case XENVER_extraversion:
+		argsize = sizeof(struct xen_extraversion);
+		break;
+	case XENVER_compile_info:
+		argsize = sizeof(struct xen_compile_info);
+		break;
+	case XENVER_capabilities:
+		argsize = sizeof(struct xen_capabilities_info);
+		break;
+	case XENVER_changeset:
+		argsize = sizeof(struct xen_changeset_info);
+		break;
+	case XENVER_platform_parameters:
+		argsize = sizeof(struct xen_platform_parameters);
+		break;
+	case XENVER_get_features:
+		argsize = (arg == NULL) ? 0 : sizeof(struct xen_feature_info);
+		break;
+
+	default:
+		printk(KERN_DEBUG
+		       "%s: unknown version op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	desc = xencomm_map_no_alloc(arg, argsize);
+	if (desc == NULL)
+		return -EINVAL;
+
+	return xencomm_arch_hypercall_xen_version(cmd, desc);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_xen_version);
+
+int
+xencomm_hypercall_physdev_op(int cmd, void *op)
+{
+	unsigned int argsize;
+
+	switch (cmd) {
+	case PHYSDEVOP_apic_read:
+	case PHYSDEVOP_apic_write:
+		argsize = sizeof(struct physdev_apic);
+		break;
+	case PHYSDEVOP_alloc_irq_vector:
+	case PHYSDEVOP_free_irq_vector:
+		argsize = sizeof(struct physdev_irq);
+		break;
+	case PHYSDEVOP_irq_status_query:
+		argsize = sizeof(struct physdev_irq_status_query);
+		break;
+
+	default:
+		printk(KERN_DEBUG
+		       "%s: unknown physdev op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_physdev_op
+		(cmd, xencomm_map_no_alloc(op, argsize));
+}
+
+static int
+xencommize_grant_table_op(struct xencomm_mini **xc_area,
+			  unsigned int cmd, void *op, unsigned int count,
+			  struct xencomm_handle **desc)
+{
+	struct xencomm_handle *desc1;
+	unsigned int argsize;
+
+	switch (cmd) {
+	case GNTTABOP_map_grant_ref:
+		argsize = sizeof(struct gnttab_map_grant_ref);
+		break;
+	case GNTTABOP_unmap_grant_ref:
+		argsize = sizeof(struct gnttab_unmap_grant_ref);
+		break;
+	case GNTTABOP_setup_table:
+	{
+		struct gnttab_setup_table *setup = op;
+
+		argsize = sizeof(*setup);
+
+		if (count != 1)
+			return -EINVAL;
+		desc1 = __xencomm_map_no_alloc
+			(xen_guest_handle(setup->frame_list),
+			 setup->nr_frames *
+			 sizeof(*xen_guest_handle(setup->frame_list)),
+			 *xc_area);
+		if (desc1 == NULL)
+			return -EINVAL;
+		(*xc_area)++;
+		set_xen_guest_handle(setup->frame_list, (void *)desc1);
+		break;
+	}
+	case GNTTABOP_dump_table:
+		argsize = sizeof(struct gnttab_dump_table);
+		break;
+	case GNTTABOP_transfer:
+		argsize = sizeof(struct gnttab_transfer);
+		break;
+	case GNTTABOP_copy:
+		argsize = sizeof(struct gnttab_copy);
+		break;
+	case GNTTABOP_query_size:
+		argsize = sizeof(struct gnttab_query_size);
+		break;
+	default:
+		printk(KERN_DEBUG "%s: unknown hypercall grant table op %d\n",
+		       __func__, cmd);
+		BUG();
+	}
+
+	*desc = __xencomm_map_no_alloc(op, count * argsize, *xc_area);
+	if (*desc == NULL)
+		return -EINVAL;
+	(*xc_area)++;
+
+	return 0;
+}
+
+int
+xencomm_hypercall_grant_table_op(unsigned int cmd, void *op,
+				 unsigned int count)
+{
+	int rc;
+	struct xencomm_handle *desc;
+	XENCOMM_MINI_ALIGNED(xc_area, 2);
+
+	rc = xencommize_grant_table_op(&xc_area, cmd, op, count, &desc);
+	if (rc)
+		return rc;
+
+	return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_grant_table_op);
+
+int
+xencomm_hypercall_sched_op(int cmd, void *arg)
+{
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+
+	switch (cmd) {
+	case SCHEDOP_yield:
+	case SCHEDOP_block:
+		argsize = 0;
+		break;
+	case SCHEDOP_shutdown:
+		argsize = sizeof(struct sched_shutdown);
+		break;
+	case SCHEDOP_poll:
+	{
+		struct sched_poll *poll = arg;
+		struct xencomm_handle *ports;
+
+		argsize = sizeof(struct sched_poll);
+		ports = xencomm_map_no_alloc(xen_guest_handle(poll->ports),
+				     sizeof(*xen_guest_handle(poll->ports)));
+
+		set_xen_guest_handle(poll->ports, (void *)ports);
+		break;
+	}
+	default:
+		printk(KERN_DEBUG "%s: unknown sched op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	desc = xencomm_map_no_alloc(arg, argsize);
+	if (desc == NULL)
+		return -EINVAL;
+
+	return xencomm_arch_hypercall_sched_op(cmd, desc);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_sched_op);
+
+int
+xencomm_hypercall_multicall(void *call_list, int nr_calls)
+{
+	int rc;
+	int i;
+	struct multicall_entry *mce;
+	struct xencomm_handle *desc;
+	XENCOMM_MINI_ALIGNED(xc_area, nr_calls * 2);
+
+	for (i = 0; i < nr_calls; i++) {
+		mce = (struct multicall_entry *)call_list + i;
+
+		switch (mce->op) {
+		case __HYPERVISOR_update_va_mapping:
+		case __HYPERVISOR_mmu_update:
+			/* No-op on ia64.  */
+			break;
+		case __HYPERVISOR_grant_table_op:
+			rc = xencommize_grant_table_op
+				(&xc_area,
+				 mce->args[0], (void *)mce->args[1],
+				 mce->args[2], &desc);
+			if (rc)
+				return rc;
+			mce->args[1] = (unsigned long)desc;
+			break;
+		case __HYPERVISOR_memory_op:
+		default:
+			printk(KERN_DEBUG
+			       "%s: unhandled multicall op entry op %lu\n",
+			       __func__, mce->op);
+			return -ENOSYS;
+		}
+	}
+
+	desc = xencomm_map_no_alloc(call_list,
+				    nr_calls * sizeof(struct multicall_entry));
+	if (desc == NULL)
+		return -EINVAL;
+
+	return xencomm_arch_hypercall_multicall(desc, nr_calls);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_multicall);
+
+int
+xencomm_hypercall_callback_op(int cmd, void *arg)
+{
+	unsigned int argsize;
+	switch (cmd) {
+	case CALLBACKOP_register:
+		argsize = sizeof(struct callback_register);
+		break;
+	case CALLBACKOP_unregister:
+		argsize = sizeof(struct callback_unregister);
+		break;
+	default:
+		printk(KERN_DEBUG
+		       "%s: unknown callback op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_callback_op
+		(cmd, xencomm_map_no_alloc(arg, argsize));
+}
+
+static int
+xencommize_memory_reservation(struct xencomm_mini *xc_area,
+			      struct xen_memory_reservation *mop)
+{
+	struct xencomm_handle *desc;
+
+	desc = __xencomm_map_no_alloc(xen_guest_handle(mop->extent_start),
+			mop->nr_extents *
+			sizeof(*xen_guest_handle(mop->extent_start)),
+			xc_area);
+	if (desc == NULL)
+		return -EINVAL;
+
+	set_xen_guest_handle(mop->extent_start, (void *)desc);
+	return 0;
+}
+
+int
+xencomm_hypercall_memory_op(unsigned int cmd, void *arg)
+{
+	GUEST_HANDLE(xen_pfn_t) extent_start_va[2] = { {NULL}, {NULL} };
+	struct xen_memory_reservation *xmr = NULL;
+	int rc;
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+	XENCOMM_MINI_ALIGNED(xc_area, 2);
+
+	switch (cmd) {
+	case XENMEM_increase_reservation:
+	case XENMEM_decrease_reservation:
+	case XENMEM_populate_physmap:
+		xmr = (struct xen_memory_reservation *)arg;
+		set_xen_guest_handle(extent_start_va[0],
+				     xen_guest_handle(xmr->extent_start));
+
+		argsize = sizeof(*xmr);
+		rc = xencommize_memory_reservation(xc_area, xmr);
+		if (rc)
+			return rc;
+		xc_area++;
+		break;
+
+	case XENMEM_maximum_ram_page:
+		argsize = 0;
+		break;
+
+	case XENMEM_add_to_physmap:
+		argsize = sizeof(struct xen_add_to_physmap);
+		break;
+
+	default:
+		printk(KERN_DEBUG "%s: unknown memory op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	desc = xencomm_map_no_alloc(arg, argsize);
+	if (desc == NULL)
+		return -EINVAL;
+
+	rc = xencomm_arch_hypercall_memory_op(cmd, desc);
+
+	switch (cmd) {
+	case XENMEM_increase_reservation:
+	case XENMEM_decrease_reservation:
+	case XENMEM_populate_physmap:
+		set_xen_guest_handle(xmr->extent_start,
+				     xen_guest_handle(extent_start_va[0]));
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_memory_op);
+
+int
+xencomm_hypercall_suspend(unsigned long srec)
+{
+	struct sched_shutdown arg;
+
+	arg.reason = SHUTDOWN_suspend;
+
+	return xencomm_arch_hypercall_sched_op(
+		SCHEDOP_shutdown, xencomm_map_no_alloc(&arg, sizeof(arg)));
+}
+
+long
+xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg)
+{
+	unsigned int argsize;
+	switch (cmd) {
+	case VCPUOP_register_runstate_memory_area: {
+		struct vcpu_register_runstate_memory_area *area =
+			(struct vcpu_register_runstate_memory_area *)arg;
+		argsize = sizeof(*arg);
+		set_xen_guest_handle(area->addr.h,
+		     (void *)xencomm_map_no_alloc(area->addr.v,
+						  sizeof(area->addr.v)));
+		break;
+	}
+
+	default:
+		printk(KERN_DEBUG "%s: unknown vcpu op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_vcpu_op(cmd, cpu,
+					xencomm_map_no_alloc(arg, argsize));
+}
+
+long
+xencomm_hypercall_opt_feature(void *arg)
+{
+	return xencomm_arch_hypercall_opt_feature(
+		xencomm_map_no_alloc(arg,
+				     sizeof(struct xen_ia64_opt_feature)));
+}
diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
new file mode 100644
index 00000000000..04cd1235045
--- /dev/null
+++ b/arch/ia64/xen/xen_pv_ops.c
@@ -0,0 +1,364 @@
+/******************************************************************************
+ * arch/ia64/xen/xen_pv_ops.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/console.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pm.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/xencomm.h>
+#include <asm/xen/privop.h>
+
+#include "irq_xen.h"
+#include "time.h"
+
+/***************************************************************************
+ * general info
+ */
+static struct pv_info xen_info __initdata = {
+	.kernel_rpl = 2,	/* or 1: determin at runtime */
+	.paravirt_enabled = 1,
+	.name = "Xen/ia64",
+};
+
+#define IA64_RSC_PL_SHIFT	2
+#define IA64_RSC_PL_BIT_SIZE	2
+#define IA64_RSC_PL_MASK	\
+	(((1UL << IA64_RSC_PL_BIT_SIZE) - 1) << IA64_RSC_PL_SHIFT)
+
+static void __init
+xen_info_init(void)
+{
+	/* Xenified Linux/ia64 may run on pl = 1 or 2.
+	 * determin at run time. */
+	unsigned long rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	unsigned int rpl = (rsc & IA64_RSC_PL_MASK) >> IA64_RSC_PL_SHIFT;
+	xen_info.kernel_rpl = rpl;
+}
+
+/***************************************************************************
+ * pv_init_ops
+ * initialization hooks.
+ */
+
+static void
+xen_panic_hypercall(struct unw_frame_info *info, void *arg)
+{
+	current->thread.ksp = (__u64)info->sw - 16;
+	HYPERVISOR_shutdown(SHUTDOWN_crash);
+	/* we're never actually going to get here... */
+}
+
+static int
+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	unw_init_running(xen_panic_hypercall, NULL);
+	/* we're never actually going to get here... */
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block xen_panic_block = {
+	xen_panic_event, NULL, 0 /* try to go last */
+};
+
+static void xen_pm_power_off(void)
+{
+	local_irq_disable();
+	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+}
+
+static void __init
+xen_banner(void)
+{
+	printk(KERN_INFO
+	       "Running on Xen! pl = %d start_info_pfn=0x%lx nr_pages=%ld "
+	       "flags=0x%x\n",
+	       xen_info.kernel_rpl,
+	       HYPERVISOR_shared_info->arch.start_info_pfn,
+	       xen_start_info->nr_pages, xen_start_info->flags);
+}
+
+static int __init
+xen_reserve_memory(struct rsvd_region *region)
+{
+	region->start = (unsigned long)__va(
+		(HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT));
+	region->end   = region->start + PAGE_SIZE;
+	return 1;
+}
+
+static void __init
+xen_arch_setup_early(void)
+{
+	struct shared_info *s;
+	BUG_ON(!xen_pv_domain());
+
+	s = HYPERVISOR_shared_info;
+	xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
+
+	/* Must be done before any hypercall.  */
+	xencomm_initialize();
+
+	xen_setup_features();
+	/* Register a call for panic conditions. */
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &xen_panic_block);
+	pm_power_off = xen_pm_power_off;
+
+	xen_ia64_enable_opt_feature();
+}
+
+static void __init
+xen_arch_setup_console(char **cmdline_p)
+{
+	add_preferred_console("xenboot", 0, NULL);
+	add_preferred_console("tty", 0, NULL);
+	/* use hvc_xen */
+	add_preferred_console("hvc", 0, NULL);
+
+#if !defined(CONFIG_VT) || !defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = NULL;
+#endif
+}
+
+static int __init
+xen_arch_setup_nomca(void)
+{
+	return 1;
+}
+
+static void __init
+xen_post_smp_prepare_boot_cpu(void)
+{
+	xen_setup_vcpu_info_placement();
+}
+
+static const struct pv_init_ops xen_init_ops __initdata = {
+	.banner = xen_banner,
+
+	.reserve_memory = xen_reserve_memory,
+
+	.arch_setup_early = xen_arch_setup_early,
+	.arch_setup_console = xen_arch_setup_console,
+	.arch_setup_nomca = xen_arch_setup_nomca,
+
+	.post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu,
+};
+
+/***************************************************************************
+ * pv_cpu_ops
+ * intrinsics hooks.
+ */
+
+static void xen_setreg(int regnum, unsigned long val)
+{
+	switch (regnum) {
+	case _IA64_REG_AR_KR0 ... _IA64_REG_AR_KR7:
+		xen_set_kr(regnum - _IA64_REG_AR_KR0, val);
+		break;
+#ifdef CONFIG_IA32_SUPPORT
+	case _IA64_REG_AR_EFLAG:
+		xen_set_eflag(val);
+		break;
+#endif
+	case _IA64_REG_CR_TPR:
+		xen_set_tpr(val);
+		break;
+	case _IA64_REG_CR_ITM:
+		xen_set_itm(val);
+		break;
+	case _IA64_REG_CR_EOI:
+		xen_eoi(val);
+		break;
+	default:
+		ia64_native_setreg_func(regnum, val);
+		break;
+	}
+}
+
+static unsigned long xen_getreg(int regnum)
+{
+	unsigned long res;
+
+	switch (regnum) {
+	case _IA64_REG_PSR:
+		res = xen_get_psr();
+		break;
+#ifdef CONFIG_IA32_SUPPORT
+	case _IA64_REG_AR_EFLAG:
+		res = xen_get_eflag();
+		break;
+#endif
+	case _IA64_REG_CR_IVR:
+		res = xen_get_ivr();
+		break;
+	case _IA64_REG_CR_TPR:
+		res = xen_get_tpr();
+		break;
+	default:
+		res = ia64_native_getreg_func(regnum);
+		break;
+	}
+	return res;
+}
+
+/* turning on interrupts is a bit more complicated.. write to the
+ * memory-mapped virtual psr.i bit first (to avoid race condition),
+ * then if any interrupts were pending, we have to execute a hyperprivop
+ * to ensure the pending interrupt gets delivered; else we're done! */
+static void
+xen_ssm_i(void)
+{
+	int old = xen_get_virtual_psr_i();
+	xen_set_virtual_psr_i(1);
+	barrier();
+	if (!old && xen_get_virtual_pend())
+		xen_hyper_ssm_i();
+}
+
+/* turning off interrupts can be paravirtualized simply by writing
+ * to a memory-mapped virtual psr.i bit (implemented as a 16-bit bool) */
+static void
+xen_rsm_i(void)
+{
+	xen_set_virtual_psr_i(0);
+	barrier();
+}
+
+static unsigned long
+xen_get_psr_i(void)
+{
+	return xen_get_virtual_psr_i() ? IA64_PSR_I : 0;
+}
+
+static void
+xen_intrin_local_irq_restore(unsigned long mask)
+{
+	if (mask & IA64_PSR_I)
+		xen_ssm_i();
+	else
+		xen_rsm_i();
+}
+
+static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+	.fc		= xen_fc,
+	.thash		= xen_thash,
+	.get_cpuid	= xen_get_cpuid,
+	.get_pmd	= xen_get_pmd,
+	.getreg		= xen_getreg,
+	.setreg		= xen_setreg,
+	.ptcga		= xen_ptcga,
+	.get_rr		= xen_get_rr,
+	.set_rr		= xen_set_rr,
+	.set_rr0_to_rr4	= xen_set_rr0_to_rr4,
+	.ssm_i		= xen_ssm_i,
+	.rsm_i		= xen_rsm_i,
+	.get_psr_i	= xen_get_psr_i,
+	.intrin_local_irq_restore
+			= xen_intrin_local_irq_restore,
+};
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+
+extern char xen_switch_to;
+extern char xen_leave_syscall;
+extern char xen_work_processed_syscall;
+extern char xen_leave_kernel;
+
+const struct pv_cpu_asm_switch xen_cpu_asm_switch = {
+	.switch_to		= (unsigned long)&xen_switch_to,
+	.leave_syscall		= (unsigned long)&xen_leave_syscall,
+	.work_processed_syscall	= (unsigned long)&xen_work_processed_syscall,
+	.leave_kernel		= (unsigned long)&xen_leave_kernel,
+};
+
+/***************************************************************************
+ * pv_iosapic_ops
+ * iosapic read/write hooks.
+ */
+static void
+xen_pcat_compat_init(void)
+{
+	/* nothing */
+}
+
+static struct irq_chip*
+xen_iosapic_get_irq_chip(unsigned long trigger)
+{
+	return NULL;
+}
+
+static unsigned int
+xen_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	struct physdev_apic apic_op;
+	int ret;
+
+	apic_op.apic_physbase = (unsigned long)iosapic -
+					__IA64_UNCACHED_OFFSET;
+	apic_op.reg = reg;
+	ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+	if (ret)
+		return ret;
+	return apic_op.value;
+}
+
+static void
+xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	struct physdev_apic apic_op;
+
+	apic_op.apic_physbase = (unsigned long)iosapic -
+					__IA64_UNCACHED_OFFSET;
+	apic_op.reg = reg;
+	apic_op.value = val;
+	HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
+}
+
+static const struct pv_iosapic_ops xen_iosapic_ops __initdata = {
+	.pcat_compat_init = xen_pcat_compat_init,
+	.__get_irq_chip = xen_iosapic_get_irq_chip,
+
+	.__read = xen_iosapic_read,
+	.__write = xen_iosapic_write,
+};
+
+/***************************************************************************
+ * pv_ops initialization
+ */
+
+void __init
+xen_setup_pv_ops(void)
+{
+	xen_info_init();
+	pv_info = xen_info;
+	pv_init_ops = xen_init_ops;
+	pv_cpu_ops = xen_cpu_ops;
+	pv_iosapic_ops = xen_iosapic_ops;
+	pv_irq_ops = xen_irq_ops;
+	pv_time_ops = xen_time_ops;
+
+	paravirt_cpu_asm_init(&xen_cpu_asm_switch);
+}
diff --git a/arch/ia64/xen/xencomm.c b/arch/ia64/xen/xencomm.c
new file mode 100644
index 00000000000..1f5d7ac82e9
--- /dev/null
+++ b/arch/ia64/xen/xencomm.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/mm.h>
+
+static unsigned long kernel_virtual_offset;
+static int is_xencomm_initialized;
+
+/* for xen early printk. It uses console io hypercall which uses xencomm.
+ * However early printk may use it before xencomm initialization.
+ */
+int
+xencomm_is_initialized(void)
+{
+	return is_xencomm_initialized;
+}
+
+void
+xencomm_initialize(void)
+{
+	kernel_virtual_offset = KERNEL_START - ia64_tpa(KERNEL_START);
+	is_xencomm_initialized = 1;
+}
+
+/* Translate virtual address to physical address.  */
+unsigned long
+xencomm_vtop(unsigned long vaddr)
+{
+	struct page *page;
+	struct vm_area_struct *vma;
+
+	if (vaddr == 0)
+		return 0UL;
+
+	if (REGION_NUMBER(vaddr) == 5) {
+		pgd_t *pgd;
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *ptep;
+
+		/* On ia64, TASK_SIZE refers to current.  It is not initialized
+		   during boot.
+		   Furthermore the kernel is relocatable and __pa() doesn't
+		   work on  addresses.  */
+		if (vaddr >= KERNEL_START
+		    && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE))
+			return vaddr - kernel_virtual_offset;
+
+		/* In kernel area -- virtually mapped.  */
+		pgd = pgd_offset_k(vaddr);
+		if (pgd_none(*pgd) || pgd_bad(*pgd))
+			return ~0UL;
+
+		pud = pud_offset(pgd, vaddr);
+		if (pud_none(*pud) || pud_bad(*pud))
+			return ~0UL;
+
+		pmd = pmd_offset(pud, vaddr);
+		if (pmd_none(*pmd) || pmd_bad(*pmd))
+			return ~0UL;
+
+		ptep = pte_offset_kernel(pmd, vaddr);
+		if (!ptep)
+			return ~0UL;
+
+		return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK);
+	}
+
+	if (vaddr > TASK_SIZE) {
+		/* percpu variables */
+		if (REGION_NUMBER(vaddr) == 7 &&
+		    REGION_OFFSET(vaddr) >= (1ULL << IA64_MAX_PHYS_BITS))
+			ia64_tpa(vaddr);
+
+		/* kernel address */
+		return __pa(vaddr);
+	}
+
+	/* XXX double-check (lack of) locking */
+	vma = find_extend_vma(current->mm, vaddr);
+	if (!vma)
+		return ~0UL;
+
+	/* We assume the page is modified.  */
+	page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH);
+	if (!page)
+		return ~0UL;
+
+	return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+}
diff --git a/arch/ia64/xen/xenivt.S b/arch/ia64/xen/xenivt.S
new file mode 100644
index 00000000000..3e71d50584d
--- /dev/null
+++ b/arch/ia64/xen/xenivt.S
@@ -0,0 +1,52 @@
+/*
+ * arch/ia64/xen/ivt.S
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    pv_ops.
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/pgtable.h>
+
+#include "../kernel/minstate.h"
+
+	.section .text,"ax"
+GLOBAL_ENTRY(xen_event_callback)
+	mov r31=pr		// prepare to save predicates
+	;;
+	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
+	;;
+	movl r3=XSI_PSR_IC
+	mov r14=1
+	;;
+	st4 [r3]=r14
+	;;
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	srlz.i			// ensure everybody knows psr.ic is back on
+	;;
+	SAVE_REST
+	;;
+1:
+	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+	add out0=16,sp		// pass pointer to pt_regs as first arg
+	;;
+	br.call.sptk.many b0=xen_evtchn_do_upcall
+	;;
+	movl r20=XSI_PSR_I_ADDR
+	;;
+	ld8 r20=[r20]
+	;;
+	adds r20=-1,r20		// vcpu_info->evtchn_upcall_pending
+	;;
+	ld1 r20=[r20]
+	;;
+	cmp.ne p6,p0=r20,r0	// if there are pending events,
+	(p6) br.spnt.few 1b	// call evtchn_do_upcall again.
+	br.sptk.many xen_leave_kernel	// we know ia64_leave_kernel is
+					// paravirtualized as xen_leave_kernel
+END(xen_event_callback)
diff --git a/arch/ia64/xen/xensetup.S b/arch/ia64/xen/xensetup.S
new file mode 100644
index 00000000000..28fed1fcc07
--- /dev/null
+++ b/arch/ia64/xen/xensetup.S
@@ -0,0 +1,83 @@
+/*
+ * Support routines for Xen
+ *
+ * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/paravirt.h>
+#include <asm/xen/privop.h>
+#include <linux/elfnote.h>
+#include <linux/init.h>
+#include <xen/interface/elfnote.h>
+
+	.section .data.read_mostly
+	.align 8
+	.global xen_domain_type
+xen_domain_type:
+	data4 XEN_NATIVE_ASM
+	.previous
+
+	__INIT
+ENTRY(startup_xen)
+	// Calculate load offset.
+	// The constant, LOAD_OFFSET, can't be used because the boot
+	// loader doesn't always load to the LMA specified by the vmlinux.lds.
+	mov r9=ip	// must be the first instruction to make sure
+			// that r9 = the physical address of startup_xen.
+			// Usually r9 = startup_xen - LOAD_OFFSET
+	movl r8=startup_xen
+	;;
+	sub r9=r9,r8	// Usually r9 = -LOAD_OFFSET.
+
+	mov r10=PARAVIRT_HYPERVISOR_TYPE_XEN
+	movl r11=_start
+	;;
+	add r11=r11,r9
+	movl r8=hypervisor_type
+	;;
+	add r8=r8,r9
+	mov b0=r11
+	;;
+	st8 [r8]=r10
+	br.cond.sptk.many b0
+	;;
+END(startup_xen)
+
+	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,	.asciz "linux")
+	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,	.asciz "2.6")
+	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,	.asciz "xen-3.0")
+	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,		data8.ua startup_xen - LOAD_OFFSET)
+
+#define isBP	p3	// are we the Bootstrap Processor?
+
+	.text
+
+GLOBAL_ENTRY(xen_setup_hook)
+	mov r8=XEN_PV_DOMAIN_ASM
+(isBP)	movl r9=xen_domain_type;;
+(isBP)	st4 [r9]=r8
+	movl r10=xen_ivt;;
+
+	mov cr.iva=r10
+
+	/* Set xsi base.  */
+#define FW_HYPERCALL_SET_SHARED_INFO_VA			0x600
+(isBP)	mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA
+(isBP)	movl r28=XSI_BASE;;
+(isBP)	break 0x1000;;
+
+	/* setup pv_ops */
+(isBP)	mov r4=rp
+	;;
+(isBP)	br.call.sptk.many rp=xen_setup_pv_ops
+	;;
+(isBP)	mov rp=r4
+	;;
+
+	br.ret.sptk.many rp
+	;;
+END(xen_setup_hook)
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 00289c178f8..dbaed4a6381 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -42,6 +42,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Processor type and features"
 
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index fc2994811f1..39cb6da72dc 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -40,6 +40,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/cpu.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 677c93a490f..836fb66f080 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -62,6 +62,8 @@ mainmenu "Linux/68k Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Platform dependent setup"
 
 config EISA
diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c
index 808c9018b11..c50bec8aabb 100644
--- a/arch/m68k/bvme6000/rtc.c
+++ b/arch/m68k/bvme6000/rtc.c
@@ -18,7 +18,6 @@
 #include <linux/poll.h>
 #include <linux/module.h>
 #include <linux/mc146818rtc.h>	/* For struct rtc_time and ioctls, etc */
-#include <linux/smp_lock.h>
 #include <linux/bcd.h>
 #include <asm/bvme6000hw.h>
 
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 0a8998315e5..76b66feb74d 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -75,6 +75,8 @@ config NO_IOPORT
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Processor type and features"
 
 choice
diff --git a/arch/m68knommu/include/asm/thread_info.h b/arch/m68knommu/include/asm/thread_info.h
index 0c9bc095f3f..82529f424ea 100644
--- a/arch/m68knommu/include/asm/thread_info.h
+++ b/arch/m68knommu/include/asm/thread_info.h
@@ -84,12 +84,14 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_POLLING_NRFLAG	3	/* true if poll_idle() is polling
 					   TIF_NEED_RESCHED */
 #define TIF_MEMDIE		4
+#define TIF_FREEZE		16	/* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b905744d791..5f149b030c0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1885,6 +1885,8 @@ config PROBE_INITRD_HEADER
 	  add initrd or initramfs image to the kernel image.
 	  Otherwise, say N.
 
+source "kernel/Kconfig.freezer"
+
 menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
 
 config HW_HAS_EISA
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 3965fda94a8..1359c03ded5 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -45,12 +45,12 @@ unsigned long read_persistent_clock(void)
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		sec = BCD2BIN(sec);
-		min = BCD2BIN(min);
-		hour = BCD2BIN(hour);
-		day = BCD2BIN(day);
-		mon = BCD2BIN(mon);
-		year = BCD2BIN(year);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
 	}
 
 	year += real_year - 72 + 2000;
@@ -83,7 +83,7 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		cmos_minutes = BCD2BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -99,8 +99,8 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			real_seconds = BIN2BCD(real_seconds);
-			real_minutes = BIN2BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds, RTC_SECONDS);
 		CMOS_WRITE(real_minutes, RTC_MINUTES);
diff --git a/arch/mips/include/asm/mc146818-time.h b/arch/mips/include/asm/mc146818-time.h
index cdc379a0a94..199b45733a9 100644
--- a/arch/mips/include/asm/mc146818-time.h
+++ b/arch/mips/include/asm/mc146818-time.h
@@ -44,7 +44,7 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -60,8 +60,8 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds, RTC_SECONDS);
 		CMOS_WRITE(real_minutes, RTC_MINUTES);
@@ -103,12 +103,12 @@ static inline unsigned long mc146818_get_cmos_time(void)
 	} while (sec != CMOS_READ(RTC_SECONDS));
 
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hour);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mon);
-		BCD_TO_BIN(year);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
 	}
 	spin_unlock_irqrestore(&rtc_lock, flags);
 	year = mc146818_decode_year(year);
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 6537d90a25b..2d3c0dca275 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -79,14 +79,14 @@ unsigned long read_persistent_clock(void)
 	/* Stop the update to the time */
 	m48t37_base->control = 0x40;
 
-	year = BCD2BIN(m48t37_base->year);
-	year += BCD2BIN(m48t37_base->century) * 100;
+	year = bcd2bin(m48t37_base->year);
+	year += bcd2bin(m48t37_base->century) * 100;
 
-	month = BCD2BIN(m48t37_base->month);
-	day = BCD2BIN(m48t37_base->date);
-	hour = BCD2BIN(m48t37_base->hour);
-	min = BCD2BIN(m48t37_base->min);
-	sec = BCD2BIN(m48t37_base->sec);
+	month = bcd2bin(m48t37_base->month);
+	day = bcd2bin(m48t37_base->date);
+	hour = bcd2bin(m48t37_base->hour);
+	min = bcd2bin(m48t37_base->min);
+	sec = bcd2bin(m48t37_base->sec);
 
 	/* Start the update to the time again */
 	m48t37_base->control = 0x00;
@@ -113,22 +113,22 @@ int rtc_mips_set_time(unsigned long tim)
 	m48t37_base->control = 0x80;
 
 	/* year */
-	m48t37_base->year = BIN2BCD(tm.tm_year % 100);
-	m48t37_base->century = BIN2BCD(tm.tm_year / 100);
+	m48t37_base->year = bin2bcd(tm.tm_year % 100);
+	m48t37_base->century = bin2bcd(tm.tm_year / 100);
 
 	/* month */
-	m48t37_base->month = BIN2BCD(tm.tm_mon);
+	m48t37_base->month = bin2bcd(tm.tm_mon);
 
 	/* day */
-	m48t37_base->date = BIN2BCD(tm.tm_mday);
+	m48t37_base->date = bin2bcd(tm.tm_mday);
 
 	/* hour/min/sec */
-	m48t37_base->hour = BIN2BCD(tm.tm_hour);
-	m48t37_base->min = BIN2BCD(tm.tm_min);
-	m48t37_base->sec = BIN2BCD(tm.tm_sec);
+	m48t37_base->hour = bin2bcd(tm.tm_hour);
+	m48t37_base->min = bin2bcd(tm.tm_min);
+	m48t37_base->sec = bin2bcd(tm.tm_sec);
 
 	/* day of week -- not really used, but let's keep it up-to-date */
-	m48t37_base->day = BIN2BCD(tm.tm_wday + 1);
+	m48t37_base->day = bin2bcd(tm.tm_wday + 1);
 
 	/* disable writing */
 	m48t37_base->control = 0x00;
diff --git a/arch/mips/sibyte/swarm/rtc_m41t81.c b/arch/mips/sibyte/swarm/rtc_m41t81.c
index 26fbff4c15b..b732600b47f 100644
--- a/arch/mips/sibyte/swarm/rtc_m41t81.c
+++ b/arch/mips/sibyte/swarm/rtc_m41t81.c
@@ -156,32 +156,32 @@ int m41t81_set_time(unsigned long t)
 	 */
 
 	spin_lock_irqsave(&rtc_lock, flags);
-	tm.tm_sec = BIN2BCD(tm.tm_sec);
+	tm.tm_sec = bin2bcd(tm.tm_sec);
 	m41t81_write(M41T81REG_SC, tm.tm_sec);
 
-	tm.tm_min = BIN2BCD(tm.tm_min);
+	tm.tm_min = bin2bcd(tm.tm_min);
 	m41t81_write(M41T81REG_MN, tm.tm_min);
 
-	tm.tm_hour = BIN2BCD(tm.tm_hour);
+	tm.tm_hour = bin2bcd(tm.tm_hour);
 	tm.tm_hour = (tm.tm_hour & 0x3f) | (m41t81_read(M41T81REG_HR) & 0xc0);
 	m41t81_write(M41T81REG_HR, tm.tm_hour);
 
 	/* tm_wday starts from 0 to 6 */
 	if (tm.tm_wday == 0) tm.tm_wday = 7;
-	tm.tm_wday = BIN2BCD(tm.tm_wday);
+	tm.tm_wday = bin2bcd(tm.tm_wday);
 	m41t81_write(M41T81REG_DY, tm.tm_wday);
 
-	tm.tm_mday = BIN2BCD(tm.tm_mday);
+	tm.tm_mday = bin2bcd(tm.tm_mday);
 	m41t81_write(M41T81REG_DT, tm.tm_mday);
 
 	/* tm_mon starts from 0, *ick* */
 	tm.tm_mon ++;
-	tm.tm_mon = BIN2BCD(tm.tm_mon);
+	tm.tm_mon = bin2bcd(tm.tm_mon);
 	m41t81_write(M41T81REG_MO, tm.tm_mon);
 
 	/* we don't do century, everything is beyond 2000 */
 	tm.tm_year %= 100;
-	tm.tm_year = BIN2BCD(tm.tm_year);
+	tm.tm_year = bin2bcd(tm.tm_year);
 	m41t81_write(M41T81REG_YR, tm.tm_year);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
@@ -209,12 +209,12 @@ unsigned long m41t81_get_time(void)
 	year = m41t81_read(M41T81REG_YR);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	sec = BCD2BIN(sec);
-	min = BCD2BIN(min);
-	hour = BCD2BIN(hour);
-	day = BCD2BIN(day);
-	mon = BCD2BIN(mon);
-	year = BCD2BIN(year);
+	sec = bcd2bin(sec);
+	min = bcd2bin(min);
+	hour = bcd2bin(hour);
+	day = bcd2bin(day);
+	mon = bcd2bin(mon);
+	year = bcd2bin(year);
 
 	year += 2000;
 
diff --git a/arch/mips/sibyte/swarm/rtc_xicor1241.c b/arch/mips/sibyte/swarm/rtc_xicor1241.c
index ff3e5dabb34..4438b2195c4 100644
--- a/arch/mips/sibyte/swarm/rtc_xicor1241.c
+++ b/arch/mips/sibyte/swarm/rtc_xicor1241.c
@@ -124,18 +124,18 @@ int xicor_set_time(unsigned long t)
 	xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL);
 
 	/* trivial ones */
-	tm.tm_sec = BIN2BCD(tm.tm_sec);
+	tm.tm_sec = bin2bcd(tm.tm_sec);
 	xicor_write(X1241REG_SC, tm.tm_sec);
 
-	tm.tm_min = BIN2BCD(tm.tm_min);
+	tm.tm_min = bin2bcd(tm.tm_min);
 	xicor_write(X1241REG_MN, tm.tm_min);
 
-	tm.tm_mday = BIN2BCD(tm.tm_mday);
+	tm.tm_mday = bin2bcd(tm.tm_mday);
 	xicor_write(X1241REG_DT, tm.tm_mday);
 
 	/* tm_mon starts from 0, *ick* */
 	tm.tm_mon ++;
-	tm.tm_mon = BIN2BCD(tm.tm_mon);
+	tm.tm_mon = bin2bcd(tm.tm_mon);
 	xicor_write(X1241REG_MO, tm.tm_mon);
 
 	/* year is split */
@@ -148,7 +148,7 @@ int xicor_set_time(unsigned long t)
 	tmp = xicor_read(X1241REG_HR);
 	if (tmp & X1241REG_HR_MIL) {
 		/* 24 hour format */
-		tm.tm_hour = BIN2BCD(tm.tm_hour);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
 		tmp = (tmp & ~0x3f) | (tm.tm_hour & 0x3f);
 	} else {
 		/* 12 hour format, with 0x2 for pm */
@@ -157,7 +157,7 @@ int xicor_set_time(unsigned long t)
 			tmp |= 0x20;
 			tm.tm_hour -= 12;
 		}
-		tm.tm_hour = BIN2BCD(tm.tm_hour);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
 		tmp |= tm.tm_hour;
 	}
 	xicor_write(X1241REG_HR, tmp);
@@ -191,13 +191,13 @@ unsigned long xicor_get_time(void)
 	y2k = xicor_read(X1241REG_Y2K);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	sec = BCD2BIN(sec);
-	min = BCD2BIN(min);
-	hour = BCD2BIN(hour);
-	day = BCD2BIN(day);
-	mon = BCD2BIN(mon);
-	year = BCD2BIN(year);
-	y2k = BCD2BIN(y2k);
+	sec = bcd2bin(sec);
+	min = bcd2bin(min);
+	hour = bcd2bin(hour);
+	day = bcd2bin(day);
+	mon = bcd2bin(mon);
+	year = bcd2bin(year);
+	y2k = bcd2bin(y2k);
 
 	year += (y2k * 100);
 
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index dd557c9cf00..9a9f4335887 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -68,6 +68,8 @@ mainmenu "Matsushita MN10300/AM33 Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Matsushita MN10300 system setup"
 
diff --git a/arch/mn10300/kernel/rtc.c b/arch/mn10300/kernel/rtc.c
index 042f792d843..7978470b574 100644
--- a/arch/mn10300/kernel/rtc.c
+++ b/arch/mn10300/kernel/rtc.c
@@ -67,7 +67,7 @@ static int set_rtc_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -84,8 +84,8 @@ static int set_rtc_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds, RTC_SECONDS);
 		CMOS_WRITE(real_minutes, RTC_MINUTES);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 8313fccced5..644a70b1b04 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,6 +9,8 @@ config PARISC
 	def_bool y
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select RTC_CLASS
+	select RTC_DRV_PARISC
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,
@@ -90,6 +92,8 @@ config ARCH_MAY_HAVE_PC_FDC
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Processor type and features"
 
diff --git a/include/asm-parisc/Kbuild b/arch/parisc/include/asm/Kbuild
index f88b252e419..f88b252e419 100644
--- a/include/asm-parisc/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
diff --git a/include/asm-parisc/agp.h b/arch/parisc/include/asm/agp.h
index 9651660da63..9651660da63 100644
--- a/include/asm-parisc/agp.h
+++ b/arch/parisc/include/asm/agp.h
diff --git a/include/asm-parisc/asmregs.h b/arch/parisc/include/asm/asmregs.h
index d93c646e188..d93c646e188 100644
--- a/include/asm-parisc/asmregs.h
+++ b/arch/parisc/include/asm/asmregs.h
diff --git a/include/asm-parisc/assembly.h b/arch/parisc/include/asm/assembly.h
index ffb208840ec..ffb208840ec 100644
--- a/include/asm-parisc/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
diff --git a/include/asm-parisc/atomic.h b/arch/parisc/include/asm/atomic.h
index 57fcc4a5ebb..57fcc4a5ebb 100644
--- a/include/asm-parisc/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
diff --git a/include/asm-parisc/auxvec.h b/arch/parisc/include/asm/auxvec.h
index 9c3ac4b89dc..9c3ac4b89dc 100644
--- a/include/asm-parisc/auxvec.h
+++ b/arch/parisc/include/asm/auxvec.h
diff --git a/include/asm-parisc/bitops.h b/arch/parisc/include/asm/bitops.h
index 7a6ea10bd23..7a6ea10bd23 100644
--- a/include/asm-parisc/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
diff --git a/include/asm-parisc/bug.h b/arch/parisc/include/asm/bug.h
index 8cfc553fc83..8cfc553fc83 100644
--- a/include/asm-parisc/bug.h
+++ b/arch/parisc/include/asm/bug.h
diff --git a/include/asm-parisc/bugs.h b/arch/parisc/include/asm/bugs.h
index 9e6284342a5..9e6284342a5 100644
--- a/include/asm-parisc/bugs.h
+++ b/arch/parisc/include/asm/bugs.h
diff --git a/include/asm-parisc/byteorder.h b/arch/parisc/include/asm/byteorder.h
index db148313de5..db148313de5 100644
--- a/include/asm-parisc/byteorder.h
+++ b/arch/parisc/include/asm/byteorder.h
diff --git a/include/asm-parisc/cache.h b/arch/parisc/include/asm/cache.h
index 32c2cca7434..32c2cca7434 100644
--- a/include/asm-parisc/cache.h
+++ b/arch/parisc/include/asm/cache.h
diff --git a/include/asm-parisc/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index b7ca6dc7fdd..b7ca6dc7fdd 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
diff --git a/include/asm-parisc/checksum.h b/arch/parisc/include/asm/checksum.h
index e9639ccc3fc..e9639ccc3fc 100644
--- a/include/asm-parisc/checksum.h
+++ b/arch/parisc/include/asm/checksum.h
diff --git a/include/asm-parisc/compat.h b/arch/parisc/include/asm/compat.h
index 7f32611a7a5..7f32611a7a5 100644
--- a/include/asm-parisc/compat.h
+++ b/arch/parisc/include/asm/compat.h
diff --git a/include/asm-parisc/compat_rt_sigframe.h b/arch/parisc/include/asm/compat_rt_sigframe.h
index 81bec28bdc4..81bec28bdc4 100644
--- a/include/asm-parisc/compat_rt_sigframe.h
+++ b/arch/parisc/include/asm/compat_rt_sigframe.h
diff --git a/include/asm-parisc/compat_signal.h b/arch/parisc/include/asm/compat_signal.h
index 6ad02c360b2..6ad02c360b2 100644
--- a/include/asm-parisc/compat_signal.h
+++ b/arch/parisc/include/asm/compat_signal.h
diff --git a/include/asm-parisc/compat_ucontext.h b/arch/parisc/include/asm/compat_ucontext.h
index 2f7292afde3..2f7292afde3 100644
--- a/include/asm-parisc/compat_ucontext.h
+++ b/arch/parisc/include/asm/compat_ucontext.h
diff --git a/include/asm-parisc/cputime.h b/arch/parisc/include/asm/cputime.h
index dcdf2fbd7e7..dcdf2fbd7e7 100644
--- a/include/asm-parisc/cputime.h
+++ b/arch/parisc/include/asm/cputime.h
diff --git a/include/asm-parisc/current.h b/arch/parisc/include/asm/current.h
index 0fb9338e3bf..0fb9338e3bf 100644
--- a/include/asm-parisc/current.h
+++ b/arch/parisc/include/asm/current.h
diff --git a/include/asm-parisc/delay.h b/arch/parisc/include/asm/delay.h
index 7a75e984674..7a75e984674 100644
--- a/include/asm-parisc/delay.h
+++ b/arch/parisc/include/asm/delay.h
diff --git a/include/asm-parisc/device.h b/arch/parisc/include/asm/device.h
index d8f9872b0e2..d8f9872b0e2 100644
--- a/include/asm-parisc/device.h
+++ b/arch/parisc/include/asm/device.h
diff --git a/include/asm-parisc/div64.h b/arch/parisc/include/asm/div64.h
index 6cd978cefb2..6cd978cefb2 100644
--- a/include/asm-parisc/div64.h
+++ b/arch/parisc/include/asm/div64.h
diff --git a/include/asm-parisc/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
index 53af696f23d..53af696f23d 100644
--- a/include/asm-parisc/dma-mapping.h
+++ b/arch/parisc/include/asm/dma-mapping.h
diff --git a/include/asm-parisc/dma.h b/arch/parisc/include/asm/dma.h
index 31ad0f05af3..31ad0f05af3 100644
--- a/include/asm-parisc/dma.h
+++ b/arch/parisc/include/asm/dma.h
diff --git a/include/asm-parisc/eisa_bus.h b/arch/parisc/include/asm/eisa_bus.h
index 201085f83dd..201085f83dd 100644
--- a/include/asm-parisc/eisa_bus.h
+++ b/arch/parisc/include/asm/eisa_bus.h
diff --git a/include/asm-parisc/eisa_eeprom.h b/arch/parisc/include/asm/eisa_eeprom.h
index 9c9da980402..9c9da980402 100644
--- a/include/asm-parisc/eisa_eeprom.h
+++ b/arch/parisc/include/asm/eisa_eeprom.h
diff --git a/include/asm-parisc/elf.h b/arch/parisc/include/asm/elf.h
index 7fa675799e6..7fa675799e6 100644
--- a/include/asm-parisc/elf.h
+++ b/arch/parisc/include/asm/elf.h
diff --git a/include/asm-parisc/emergency-restart.h b/arch/parisc/include/asm/emergency-restart.h
index 108d8c48e42..108d8c48e42 100644
--- a/include/asm-parisc/emergency-restart.h
+++ b/arch/parisc/include/asm/emergency-restart.h
diff --git a/include/asm-parisc/errno.h b/arch/parisc/include/asm/errno.h
index e2f3ddc796b..e2f3ddc796b 100644
--- a/include/asm-parisc/errno.h
+++ b/arch/parisc/include/asm/errno.h
diff --git a/include/asm-parisc/fb.h b/arch/parisc/include/asm/fb.h
index 4d503a023ab..4d503a023ab 100644
--- a/include/asm-parisc/fb.h
+++ b/arch/parisc/include/asm/fb.h
diff --git a/include/asm-parisc/fcntl.h b/arch/parisc/include/asm/fcntl.h
index 1e1c824764e..1e1c824764e 100644
--- a/include/asm-parisc/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
diff --git a/include/asm-parisc/fixmap.h b/arch/parisc/include/asm/fixmap.h
index de3fe3a1822..de3fe3a1822 100644
--- a/include/asm-parisc/fixmap.h
+++ b/arch/parisc/include/asm/fixmap.h
diff --git a/include/asm-parisc/floppy.h b/arch/parisc/include/asm/floppy.h
index 4ca69f558fa..4ca69f558fa 100644
--- a/include/asm-parisc/floppy.h
+++ b/arch/parisc/include/asm/floppy.h
diff --git a/include/asm-parisc/futex.h b/arch/parisc/include/asm/futex.h
index 0c705c3a55e..0c705c3a55e 100644
--- a/include/asm-parisc/futex.h
+++ b/arch/parisc/include/asm/futex.h
diff --git a/include/asm-parisc/grfioctl.h b/arch/parisc/include/asm/grfioctl.h
index 671e06042b4..671e06042b4 100644
--- a/include/asm-parisc/grfioctl.h
+++ b/arch/parisc/include/asm/grfioctl.h
diff --git a/include/asm-parisc/hardirq.h b/arch/parisc/include/asm/hardirq.h
index ce93133d511..ce93133d511 100644
--- a/include/asm-parisc/hardirq.h
+++ b/arch/parisc/include/asm/hardirq.h
diff --git a/include/asm-parisc/hardware.h b/arch/parisc/include/asm/hardware.h
index 4e9626836ba..4e9626836ba 100644
--- a/include/asm-parisc/hardware.h
+++ b/arch/parisc/include/asm/hardware.h
diff --git a/include/asm-parisc/hw_irq.h b/arch/parisc/include/asm/hw_irq.h
index 6707f7df392..6707f7df392 100644
--- a/include/asm-parisc/hw_irq.h
+++ b/arch/parisc/include/asm/hw_irq.h
diff --git a/include/asm-parisc/ide.h b/arch/parisc/include/asm/ide.h
index c246ef75017..81700a2321c 100644
--- a/include/asm-parisc/ide.h
+++ b/arch/parisc/include/asm/ide.h
@@ -13,10 +13,6 @@
 
 #ifdef __KERNEL__
 
-#define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
-#define ide_free_irq(irq,dev_id)		free_irq((irq), (dev_id))
-#define ide_request_region(from,extent,name)	request_region((from), (extent), (name))
-#define ide_release_region(from,extent)		release_region((from), (extent))
 /* Generic I/O and MEMIO string operations.  */
 
 #define __ide_insw	insw
diff --git a/include/asm-parisc/io.h b/arch/parisc/include/asm/io.h
index 55ddb184210..55ddb184210 100644
--- a/include/asm-parisc/io.h
+++ b/arch/parisc/include/asm/io.h
diff --git a/include/asm-parisc/ioctl.h b/arch/parisc/include/asm/ioctl.h
index ec8efa02bed..ec8efa02bed 100644
--- a/include/asm-parisc/ioctl.h
+++ b/arch/parisc/include/asm/ioctl.h
diff --git a/include/asm-parisc/ioctls.h b/arch/parisc/include/asm/ioctls.h
index 6747fad07a3..6747fad07a3 100644
--- a/include/asm-parisc/ioctls.h
+++ b/arch/parisc/include/asm/ioctls.h
diff --git a/include/asm-parisc/ipcbuf.h b/arch/parisc/include/asm/ipcbuf.h
index bd956c42578..bd956c42578 100644
--- a/include/asm-parisc/ipcbuf.h
+++ b/arch/parisc/include/asm/ipcbuf.h
diff --git a/include/asm-parisc/irq.h b/arch/parisc/include/asm/irq.h
index 399c81981ed..399c81981ed 100644
--- a/include/asm-parisc/irq.h
+++ b/arch/parisc/include/asm/irq.h
diff --git a/include/asm-parisc/irq_regs.h b/arch/parisc/include/asm/irq_regs.h
index 3dd9c0b7027..3dd9c0b7027 100644
--- a/include/asm-parisc/irq_regs.h
+++ b/arch/parisc/include/asm/irq_regs.h
diff --git a/include/asm-parisc/kdebug.h b/arch/parisc/include/asm/kdebug.h
index 6ece1b03766..6ece1b03766 100644
--- a/include/asm-parisc/kdebug.h
+++ b/arch/parisc/include/asm/kdebug.h
diff --git a/include/asm-parisc/kmap_types.h b/arch/parisc/include/asm/kmap_types.h
index 806aae3c533..806aae3c533 100644
--- a/include/asm-parisc/kmap_types.h
+++ b/arch/parisc/include/asm/kmap_types.h
diff --git a/include/asm-parisc/led.h b/arch/parisc/include/asm/led.h
index c3405ab9d60..c3405ab9d60 100644
--- a/include/asm-parisc/led.h
+++ b/arch/parisc/include/asm/led.h
diff --git a/include/asm-parisc/linkage.h b/arch/parisc/include/asm/linkage.h
index 0b19a7242d0..0b19a7242d0 100644
--- a/include/asm-parisc/linkage.h
+++ b/arch/parisc/include/asm/linkage.h
diff --git a/include/asm-parisc/local.h b/arch/parisc/include/asm/local.h
index c11c530f74d..c11c530f74d 100644
--- a/include/asm-parisc/local.h
+++ b/arch/parisc/include/asm/local.h
diff --git a/include/asm-parisc/machdep.h b/arch/parisc/include/asm/machdep.h
index a231c97d703..a231c97d703 100644
--- a/include/asm-parisc/machdep.h
+++ b/arch/parisc/include/asm/machdep.h
diff --git a/include/asm-parisc/mc146818rtc.h b/arch/parisc/include/asm/mc146818rtc.h
index adf41631449..adf41631449 100644
--- a/include/asm-parisc/mc146818rtc.h
+++ b/arch/parisc/include/asm/mc146818rtc.h
diff --git a/include/asm-parisc/mckinley.h b/arch/parisc/include/asm/mckinley.h
index d1ea6f12915..d1ea6f12915 100644
--- a/include/asm-parisc/mckinley.h
+++ b/arch/parisc/include/asm/mckinley.h
diff --git a/include/asm-parisc/mman.h b/arch/parisc/include/asm/mman.h
index defe752cc99..defe752cc99 100644
--- a/include/asm-parisc/mman.h
+++ b/arch/parisc/include/asm/mman.h
diff --git a/include/asm-parisc/mmu.h b/arch/parisc/include/asm/mmu.h
index 6a310cf8b73..6a310cf8b73 100644
--- a/include/asm-parisc/mmu.h
+++ b/arch/parisc/include/asm/mmu.h
diff --git a/include/asm-parisc/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 85856c74ad1..85856c74ad1 100644
--- a/include/asm-parisc/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
diff --git a/include/asm-parisc/mmzone.h b/arch/parisc/include/asm/mmzone.h
index 9608d2cf214..9608d2cf214 100644
--- a/include/asm-parisc/mmzone.h
+++ b/arch/parisc/include/asm/mmzone.h
diff --git a/include/asm-parisc/module.h b/arch/parisc/include/asm/module.h
index c2cb49e934c..c2cb49e934c 100644
--- a/include/asm-parisc/module.h
+++ b/arch/parisc/include/asm/module.h
diff --git a/include/asm-parisc/msgbuf.h b/arch/parisc/include/asm/msgbuf.h
index fe88f264941..fe88f264941 100644
--- a/include/asm-parisc/msgbuf.h
+++ b/arch/parisc/include/asm/msgbuf.h
diff --git a/include/asm-parisc/mutex.h b/arch/parisc/include/asm/mutex.h
index 458c1f7fbc1..458c1f7fbc1 100644
--- a/include/asm-parisc/mutex.h
+++ b/arch/parisc/include/asm/mutex.h
diff --git a/include/asm-parisc/page.h b/arch/parisc/include/asm/page.h
index c3941f09a87..c3941f09a87 100644
--- a/include/asm-parisc/page.h
+++ b/arch/parisc/include/asm/page.h
diff --git a/include/asm-parisc/param.h b/arch/parisc/include/asm/param.h
index 32e03d87785..32e03d87785 100644
--- a/include/asm-parisc/param.h
+++ b/arch/parisc/include/asm/param.h
diff --git a/include/asm-parisc/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
index 7aa13f2add7..7aa13f2add7 100644
--- a/include/asm-parisc/parisc-device.h
+++ b/arch/parisc/include/asm/parisc-device.h
diff --git a/include/asm-parisc/parport.h b/arch/parisc/include/asm/parport.h
index 00d9cc3e7b9..00d9cc3e7b9 100644
--- a/include/asm-parisc/parport.h
+++ b/arch/parisc/include/asm/parport.h
diff --git a/include/asm-parisc/pci.h b/arch/parisc/include/asm/pci.h
index 4ba868f44a5..4ba868f44a5 100644
--- a/include/asm-parisc/pci.h
+++ b/arch/parisc/include/asm/pci.h
diff --git a/include/asm-parisc/pdc.h b/arch/parisc/include/asm/pdc.h
index 9eaa794c3e4..c584b00c607 100644
--- a/include/asm-parisc/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -332,6 +332,9 @@
 #define BOOT_CONSOLE_SPA_OFFSET  0x3c4
 #define BOOT_CONSOLE_PATH_OFFSET 0x3a8
 
+/* size of the pdc_result buffer for firmware.c */
+#define NUM_PDC_RESULT	32
+
 #if !defined(__ASSEMBLY__)
 #ifdef __KERNEL__
 
@@ -600,6 +603,7 @@ int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsi
 int pdc_chassis_disp(unsigned long disp);
 int pdc_chassis_warn(unsigned long *warn);
 int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info);
+int pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info);
 int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index,
 		  void *iodc_data, unsigned int iodc_data_size);
 int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
@@ -638,6 +642,7 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
 #endif
 
 void set_firmware_width(void);
+void set_firmware_width_unlocked(void);
 int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
 int pdc_do_reset(void);
 int pdc_soft_power_info(unsigned long *power_reg);
diff --git a/include/asm-parisc/pdc_chassis.h b/arch/parisc/include/asm/pdc_chassis.h
index a609273dc6b..a609273dc6b 100644
--- a/include/asm-parisc/pdc_chassis.h
+++ b/arch/parisc/include/asm/pdc_chassis.h
diff --git a/include/asm-parisc/pdcpat.h b/arch/parisc/include/asm/pdcpat.h
index 47539f11795..47539f11795 100644
--- a/include/asm-parisc/pdcpat.h
+++ b/arch/parisc/include/asm/pdcpat.h
diff --git a/include/asm-parisc/percpu.h b/arch/parisc/include/asm/percpu.h
index a0dcd197012..a0dcd197012 100644
--- a/include/asm-parisc/percpu.h
+++ b/arch/parisc/include/asm/percpu.h
diff --git a/include/asm-parisc/perf.h b/arch/parisc/include/asm/perf.h
index a18e11972c0..a18e11972c0 100644
--- a/include/asm-parisc/perf.h
+++ b/arch/parisc/include/asm/perf.h
diff --git a/include/asm-parisc/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index fc987a1c12a..fc987a1c12a 100644
--- a/include/asm-parisc/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
diff --git a/include/asm-parisc/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 470a4b88124..470a4b88124 100644
--- a/include/asm-parisc/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
diff --git a/include/asm-parisc/poll.h b/arch/parisc/include/asm/poll.h
index c98509d3149..c98509d3149 100644
--- a/include/asm-parisc/poll.h
+++ b/arch/parisc/include/asm/poll.h
diff --git a/include/asm-parisc/posix_types.h b/arch/parisc/include/asm/posix_types.h
index bb725a6630b..bb725a6630b 100644
--- a/include/asm-parisc/posix_types.h
+++ b/arch/parisc/include/asm/posix_types.h
diff --git a/include/asm-parisc/prefetch.h b/arch/parisc/include/asm/prefetch.h
index c5edc60c059..c5edc60c059 100644
--- a/include/asm-parisc/prefetch.h
+++ b/arch/parisc/include/asm/prefetch.h
diff --git a/include/asm-parisc/processor.h b/arch/parisc/include/asm/processor.h
index 3c9d34844c8..3c9d34844c8 100644
--- a/include/asm-parisc/processor.h
+++ b/arch/parisc/include/asm/processor.h
diff --git a/include/asm-parisc/psw.h b/arch/parisc/include/asm/psw.h
index 5a3e23c9ce6..5a3e23c9ce6 100644
--- a/include/asm-parisc/psw.h
+++ b/arch/parisc/include/asm/psw.h
diff --git a/include/asm-parisc/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 3e94c5d85ff..afa5333187b 100644
--- a/include/asm-parisc/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -47,6 +47,16 @@ struct pt_regs {
 
 #define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
 
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+
+struct task_struct;
+#define arch_has_single_step()	1
+void user_disable_single_step(struct task_struct *task);
+void user_enable_single_step(struct task_struct *task);
+
+#define arch_has_block_step()	1
+void user_enable_block_step(struct task_struct *task);
+
 /* XXX should we use iaoq[1] or iaoq[0] ? */
 #define user_mode(regs)			(((regs)->iaoq[0] & 3) ? 1 : 0)
 #define user_space(regs)		(((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/include/asm-parisc/real.h b/arch/parisc/include/asm/real.h
index 82acb25db39..82acb25db39 100644
--- a/include/asm-parisc/real.h
+++ b/arch/parisc/include/asm/real.h
diff --git a/include/asm-parisc/resource.h b/arch/parisc/include/asm/resource.h
index 8b06343b62e..8b06343b62e 100644
--- a/include/asm-parisc/resource.h
+++ b/arch/parisc/include/asm/resource.h
diff --git a/include/asm-parisc/ropes.h b/arch/parisc/include/asm/ropes.h
index 007a880615e..09f51d5ab57 100644
--- a/include/asm-parisc/ropes.h
+++ b/arch/parisc/include/asm/ropes.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_PARISC_ROPES_H_
 #define _ASM_PARISC_ROPES_H_
 
-#include <asm-parisc/parisc-device.h>
+#include <asm/parisc-device.h>
 
 #ifdef CONFIG_64BIT
 /* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */
diff --git a/include/asm-parisc/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index f0dd3b30f6c..f0dd3b30f6c 100644
--- a/include/asm-parisc/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
diff --git a/include/asm-parisc/rtc.h b/arch/parisc/include/asm/rtc.h
index 099d641a42c..099d641a42c 100644
--- a/include/asm-parisc/rtc.h
+++ b/arch/parisc/include/asm/rtc.h
diff --git a/include/asm-parisc/runway.h b/arch/parisc/include/asm/runway.h
index 5bea02da7e2..5bea02da7e2 100644
--- a/include/asm-parisc/runway.h
+++ b/arch/parisc/include/asm/runway.h
diff --git a/include/asm-parisc/scatterlist.h b/arch/parisc/include/asm/scatterlist.h
index 62269b31ebf..62269b31ebf 100644
--- a/include/asm-parisc/scatterlist.h
+++ b/arch/parisc/include/asm/scatterlist.h
diff --git a/include/asm-parisc/sections.h b/arch/parisc/include/asm/sections.h
index 9d13c3507ad..9d13c3507ad 100644
--- a/include/asm-parisc/sections.h
+++ b/arch/parisc/include/asm/sections.h
diff --git a/include/asm-parisc/segment.h b/arch/parisc/include/asm/segment.h
index 26794ddb652..26794ddb652 100644
--- a/include/asm-parisc/segment.h
+++ b/arch/parisc/include/asm/segment.h
diff --git a/include/asm-parisc/sembuf.h b/arch/parisc/include/asm/sembuf.h
index 1e59ffd3bd1..1e59ffd3bd1 100644
--- a/include/asm-parisc/sembuf.h
+++ b/arch/parisc/include/asm/sembuf.h
diff --git a/include/asm-parisc/serial.h b/arch/parisc/include/asm/serial.h
index d7e3cc60dbc..d7e3cc60dbc 100644
--- a/include/asm-parisc/serial.h
+++ b/arch/parisc/include/asm/serial.h
diff --git a/include/asm-parisc/setup.h b/arch/parisc/include/asm/setup.h
index 7da2e5b8747..7da2e5b8747 100644
--- a/include/asm-parisc/setup.h
+++ b/arch/parisc/include/asm/setup.h
diff --git a/include/asm-parisc/shmbuf.h b/arch/parisc/include/asm/shmbuf.h
index 0a3eada1863..0a3eada1863 100644
--- a/include/asm-parisc/shmbuf.h
+++ b/arch/parisc/include/asm/shmbuf.h
diff --git a/include/asm-parisc/shmparam.h b/arch/parisc/include/asm/shmparam.h
index 628ddc22faa..628ddc22faa 100644
--- a/include/asm-parisc/shmparam.h
+++ b/arch/parisc/include/asm/shmparam.h
diff --git a/include/asm-parisc/sigcontext.h b/arch/parisc/include/asm/sigcontext.h
index 27ef31bb3b6..27ef31bb3b6 100644
--- a/include/asm-parisc/sigcontext.h
+++ b/arch/parisc/include/asm/sigcontext.h
diff --git a/include/asm-parisc/siginfo.h b/arch/parisc/include/asm/siginfo.h
index d7034728f37..d7034728f37 100644
--- a/include/asm-parisc/siginfo.h
+++ b/arch/parisc/include/asm/siginfo.h
diff --git a/include/asm-parisc/signal.h b/arch/parisc/include/asm/signal.h
index c20356375d1..c20356375d1 100644
--- a/include/asm-parisc/signal.h
+++ b/arch/parisc/include/asm/signal.h
diff --git a/include/asm-parisc/smp.h b/arch/parisc/include/asm/smp.h
index 398cdbaf4e5..398cdbaf4e5 100644
--- a/include/asm-parisc/smp.h
+++ b/arch/parisc/include/asm/smp.h
diff --git a/include/asm-parisc/socket.h b/arch/parisc/include/asm/socket.h
index fba402c95ac..fba402c95ac 100644
--- a/include/asm-parisc/socket.h
+++ b/arch/parisc/include/asm/socket.h
diff --git a/include/asm-parisc/sockios.h b/arch/parisc/include/asm/sockios.h
index dabfbc7483f..dabfbc7483f 100644
--- a/include/asm-parisc/sockios.h
+++ b/arch/parisc/include/asm/sockios.h
diff --git a/include/asm-parisc/spinlock.h b/arch/parisc/include/asm/spinlock.h
index f3d2090a18d..f3d2090a18d 100644
--- a/include/asm-parisc/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
diff --git a/include/asm-parisc/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index 3f72f47cf4b..3f72f47cf4b 100644
--- a/include/asm-parisc/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
diff --git a/include/asm-parisc/stat.h b/arch/parisc/include/asm/stat.h
index 9d5fbbc5c31..9d5fbbc5c31 100644
--- a/include/asm-parisc/stat.h
+++ b/arch/parisc/include/asm/stat.h
diff --git a/include/asm-parisc/statfs.h b/arch/parisc/include/asm/statfs.h
index 324bea905dc..324bea905dc 100644
--- a/include/asm-parisc/statfs.h
+++ b/arch/parisc/include/asm/statfs.h
diff --git a/include/asm-parisc/string.h b/arch/parisc/include/asm/string.h
index eda01be65e3..eda01be65e3 100644
--- a/include/asm-parisc/string.h
+++ b/arch/parisc/include/asm/string.h
diff --git a/include/asm-parisc/superio.h b/arch/parisc/include/asm/superio.h
index 6598acb4d46..6598acb4d46 100644
--- a/include/asm-parisc/superio.h
+++ b/arch/parisc/include/asm/superio.h
diff --git a/include/asm-parisc/system.h b/arch/parisc/include/asm/system.h
index ee80c920b46..ee80c920b46 100644
--- a/include/asm-parisc/system.h
+++ b/arch/parisc/include/asm/system.h
diff --git a/include/asm-parisc/termbits.h b/arch/parisc/include/asm/termbits.h
index d8bbc73b16b..d8bbc73b16b 100644
--- a/include/asm-parisc/termbits.h
+++ b/arch/parisc/include/asm/termbits.h
diff --git a/include/asm-parisc/termios.h b/arch/parisc/include/asm/termios.h
index a2a57a4548a..a2a57a4548a 100644
--- a/include/asm-parisc/termios.h
+++ b/arch/parisc/include/asm/termios.h
diff --git a/include/asm-parisc/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 9f812741c35..0407959da48 100644
--- a/include/asm-parisc/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -58,6 +58,7 @@ struct thread_info {
 #define TIF_32BIT               4       /* 32 bit binary */
 #define TIF_MEMDIE		5
 #define TIF_RESTORE_SIGMASK	6	/* restore saved signal mask */
+#define TIF_FREEZE		7	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -65,6 +66,7 @@ struct thread_info {
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 #define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | \
                                  _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
diff --git a/include/asm-parisc/timex.h b/arch/parisc/include/asm/timex.h
index 3b68d77273d..3b68d77273d 100644
--- a/include/asm-parisc/timex.h
+++ b/arch/parisc/include/asm/timex.h
diff --git a/include/asm-parisc/tlb.h b/arch/parisc/include/asm/tlb.h
index 383b1db310e..383b1db310e 100644
--- a/include/asm-parisc/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
diff --git a/include/asm-parisc/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index b72ec66db69..b72ec66db69 100644
--- a/include/asm-parisc/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
diff --git a/include/asm-parisc/topology.h b/arch/parisc/include/asm/topology.h
index d8133eb0b1e..d8133eb0b1e 100644
--- a/include/asm-parisc/topology.h
+++ b/arch/parisc/include/asm/topology.h
diff --git a/include/asm-parisc/traps.h b/arch/parisc/include/asm/traps.h
index 1945f995f2d..1945f995f2d 100644
--- a/include/asm-parisc/traps.h
+++ b/arch/parisc/include/asm/traps.h
diff --git a/include/asm-parisc/types.h b/arch/parisc/include/asm/types.h
index 7f5a39bfb4c..7f5a39bfb4c 100644
--- a/include/asm-parisc/types.h
+++ b/arch/parisc/include/asm/types.h
diff --git a/include/asm-parisc/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 4878b9501f2..4878b9501f2 100644
--- a/include/asm-parisc/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
diff --git a/include/asm-parisc/ucontext.h b/arch/parisc/include/asm/ucontext.h
index 6c8883e4b0b..6c8883e4b0b 100644
--- a/include/asm-parisc/ucontext.h
+++ b/arch/parisc/include/asm/ucontext.h
diff --git a/include/asm-parisc/unaligned.h b/arch/parisc/include/asm/unaligned.h
index dfc5d3321a5..dfc5d3321a5 100644
--- a/include/asm-parisc/unaligned.h
+++ b/arch/parisc/include/asm/unaligned.h
diff --git a/include/asm-parisc/unistd.h b/arch/parisc/include/asm/unistd.h
index a7d857f0e4f..ef26b009dc5 100644
--- a/include/asm-parisc/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -801,8 +801,14 @@
 #define __NR_timerfd_create	(__NR_Linux + 306)
 #define __NR_timerfd_settime	(__NR_Linux + 307)
 #define __NR_timerfd_gettime	(__NR_Linux + 308)
-
-#define __NR_Linux_syscalls	(__NR_timerfd_gettime + 1)
+#define __NR_signalfd4		(__NR_Linux + 309)
+#define __NR_eventfd2		(__NR_Linux + 310)
+#define __NR_epoll_create1	(__NR_Linux + 311)
+#define __NR_dup3		(__NR_Linux + 312)
+#define __NR_pipe2		(__NR_Linux + 313)
+#define __NR_inotify_init1	(__NR_Linux + 314)
+
+#define __NR_Linux_syscalls	(__NR_inotify_init1 + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/include/asm-parisc/unwind.h b/arch/parisc/include/asm/unwind.h
index 2f7e6e50a15..52482e4fc20 100644
--- a/include/asm-parisc/unwind.h
+++ b/arch/parisc/include/asm/unwind.h
@@ -74,4 +74,6 @@ void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *r
 int unwind_once(struct unwind_frame_info *info);
 int unwind_to_user(struct unwind_frame_info *info);
 
+int unwind_init(void);
+
 #endif
diff --git a/include/asm-parisc/user.h b/arch/parisc/include/asm/user.h
index 80224753e50..80224753e50 100644
--- a/include/asm-parisc/user.h
+++ b/arch/parisc/include/asm/user.h
diff --git a/include/asm-parisc/vga.h b/arch/parisc/include/asm/vga.h
index 171399a88ca..171399a88ca 100644
--- a/include/asm-parisc/vga.h
+++ b/arch/parisc/include/asm/vga.h
diff --git a/include/asm-parisc/xor.h b/arch/parisc/include/asm/xor.h
index c82eb12a5b1..c82eb12a5b1 100644
--- a/include/asm-parisc/xor.h
+++ b/arch/parisc/include/asm/xor.h
diff --git a/arch/parisc/kernel/.gitignore b/arch/parisc/kernel/.gitignore
new file mode 100644
index 00000000000..c5f676c3c22
--- /dev/null
+++ b/arch/parisc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 3efc0b73e4f..699cf8ef211 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -290,5 +290,8 @@ int main(void)
 	DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
 	DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
 	DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
+	BLANK();
+	DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long));
+	BLANK();
 	return 0;
 }
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 7177a6cd1b7..03f26bd75bd 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -71,8 +71,8 @@
 #include <asm/processor.h>	/* for boot_cpu_data */
 
 static DEFINE_SPINLOCK(pdc_lock);
-static unsigned long pdc_result[32] __attribute__ ((aligned (8)));
-static unsigned long pdc_result2[32] __attribute__ ((aligned (8)));
+extern unsigned long pdc_result[NUM_PDC_RESULT];
+extern unsigned long pdc_result2[NUM_PDC_RESULT];
 
 #ifdef CONFIG_64BIT
 #define WIDE_FIRMWARE 0x1
@@ -150,26 +150,40 @@ static void convert_to_wide(unsigned long *addr)
 #endif
 }
 
+#ifdef CONFIG_64BIT
+void __init set_firmware_width_unlocked(void)
+{
+	int ret;
+
+	ret = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES,
+		__pa(pdc_result), 0);
+	convert_to_wide(pdc_result);
+	if (pdc_result[0] != NARROW_FIRMWARE)
+		parisc_narrow_firmware = 0;
+}
+	
 /**
  * set_firmware_width - Determine if the firmware is wide or narrow.
  * 
- * This function must be called before any pdc_* function that uses the convert_to_wide
- * function.
+ * This function must be called before any pdc_* function that uses the
+ * convert_to_wide function.
  */
 void __init set_firmware_width(void)
 {
-#ifdef CONFIG_64BIT
-	int retval;
 	unsigned long flags;
+	spin_lock_irqsave(&pdc_lock, flags);
+	set_firmware_width_unlocked();
+	spin_unlock_irqrestore(&pdc_lock, flags);
+}
+#else
+void __init set_firmware_width_unlocked(void) {
+	return;
+}
 
-        spin_lock_irqsave(&pdc_lock, flags);
-	retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0);
-	convert_to_wide(pdc_result);
-	if(pdc_result[0] != NARROW_FIRMWARE)
-		parisc_narrow_firmware = 0;
-        spin_unlock_irqrestore(&pdc_lock, flags);
-#endif
+void __init set_firmware_width(void) {
+	return;
 }
+#endif /*CONFIG_64BIT*/
 
 /**
  * pdc_emergency_unlock - Unlock the linux pdc lock
@@ -288,6 +302,20 @@ int pdc_chassis_warn(unsigned long *warn)
 	return retval;
 }
 
+int __init pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info)
+{
+	int ret;
+
+	ret = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
+	convert_to_wide(pdc_result);
+	pdc_coproc_info->ccr_functional = pdc_result[0];
+	pdc_coproc_info->ccr_present = pdc_result[1];
+	pdc_coproc_info->revision = pdc_result[17];
+	pdc_coproc_info->model = pdc_result[18];
+
+	return ret;
+}
+
 /**
  * pdc_coproc_cfg - To identify coprocessors attached to the processor.
  * @pdc_coproc_info: Return buffer address.
@@ -297,19 +325,14 @@ int pdc_chassis_warn(unsigned long *warn)
  */
 int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info)
 {
-        int retval;
+	int ret;
 	unsigned long flags;
 
-        spin_lock_irqsave(&pdc_lock, flags);
-        retval = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
-        convert_to_wide(pdc_result);
-        pdc_coproc_info->ccr_functional = pdc_result[0];
-        pdc_coproc_info->ccr_present = pdc_result[1];
-        pdc_coproc_info->revision = pdc_result[17];
-        pdc_coproc_info->model = pdc_result[18];
-        spin_unlock_irqrestore(&pdc_lock, flags);
+	spin_lock_irqsave(&pdc_lock, flags);
+	ret = pdc_coproc_cfg_unlocked(pdc_coproc_info);
+	spin_unlock_irqrestore(&pdc_lock, flags);
 
-        return retval;
+	return ret;
 }
 
 /**
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index a84e31e8287..0e3d9f9b9e3 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -121,7 +121,7 @@ $pgt_fill_loop:
 	copy		%r0,%r2
 
 	/* And the RFI Target address too */
-	load32		start_kernel,%r11
+	load32		start_parisc,%r11
 
 	/* And the initial task pointer */
 	load32		init_thread_union,%r6
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 49c63797078..90904f9dfc5 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc.
  * Copyright (C) 2000 Matthew Wilcox <matthew@wil.cx>
  * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org>
+ * Copyright (C) 2008 Helge Deller <deller@gmx.de>
  */
 
 #include <linux/kernel.h>
@@ -27,15 +28,149 @@
 /* PSW bits we allow the debugger to modify */
 #define USER_PSW_BITS	(PSW_N | PSW_V | PSW_CB)
 
-#undef DEBUG_PTRACE
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+	task->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
 
-#ifdef DEBUG_PTRACE
-#define DBG(x...)	printk(x)
-#else
-#define DBG(x...)
-#endif
+	/* make sure the trap bits are not set */
+	pa_psw(task)->r = 0;
+	pa_psw(task)->t = 0;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+/*
+ * The following functions are called by ptrace_resume() when
+ * enabling or disabling single/block tracing.
+ */
+void user_disable_single_step(struct task_struct *task)
+{
+	ptrace_disable(task);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+	task->ptrace &= ~PT_BLOCKSTEP;
+	task->ptrace |= PT_SINGLESTEP;
+
+	if (pa_psw(task)->n) {
+		struct siginfo si;
+
+		/* Nullified, just crank over the queue. */
+		task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1];
+		task_regs(task)->iasq[0] = task_regs(task)->iasq[1];
+		task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4;
+		pa_psw(task)->n = 0;
+		pa_psw(task)->x = 0;
+		pa_psw(task)->y = 0;
+		pa_psw(task)->z = 0;
+		pa_psw(task)->b = 0;
+		ptrace_disable(task);
+		/* Don't wake up the task, but let the
+		   parent know something happened. */
+		si.si_code = TRAP_TRACE;
+		si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3);
+		si.si_signo = SIGTRAP;
+		si.si_errno = 0;
+		force_sig_info(SIGTRAP, &si, task);
+		/* notify_parent(task, SIGCHLD); */
+		return;
+	}
+
+	/* Enable recovery counter traps.  The recovery counter
+	 * itself will be set to zero on a task switch.  If the
+	 * task is suspended on a syscall then the syscall return
+	 * path will overwrite the recovery counter with a suitable
+	 * value such that it traps once back in user space.  We
+	 * disable interrupts in the tasks PSW here also, to avoid
+	 * interrupts while the recovery counter is decrementing.
+	 */
+	pa_psw(task)->r = 1;
+	pa_psw(task)->t = 0;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+	task->ptrace &= ~PT_SINGLESTEP;
+	task->ptrace |= PT_BLOCKSTEP;
+
+	/* Enable taken branch trap. */
+	pa_psw(task)->r = 0;
+	pa_psw(task)->t = 1;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+	unsigned long tmp;
+	long ret = -EIO;
 
-#ifdef CONFIG_64BIT
+	switch (request) {
+
+	/* Read the word at location addr in the USER area.  For ptraced
+	   processes, the kernel saves all regs on a syscall. */
+	case PTRACE_PEEKUSR:
+		if ((addr & (sizeof(long)-1)) ||
+		    (unsigned long) addr >= sizeof(struct pt_regs))
+			break;
+		tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
+		ret = put_user(tmp, (unsigned long *) data);
+		break;
+
+	/* Write the word at location addr in the USER area.  This will need
+	   to change when the kernel no longer saves all regs on a syscall.
+	   FIXME.  There is a problem at the moment in that r3-r18 are only
+	   saved if the process is ptraced on syscall entry, and even then
+	   those values are overwritten by actual register values on syscall
+	   exit. */
+	case PTRACE_POKEUSR:
+		/* Some register values written here may be ignored in
+		 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
+		 * r31/r31+4, and not with the values in pt_regs.
+		 */
+		if (addr == PT_PSW) {
+			/* Allow writing to Nullify, Divide-step-correction,
+			 * and carry/borrow bits.
+			 * BEWARE, if you set N, and then single step, it won't
+			 * stop on the nullified instruction.
+			 */
+			data &= USER_PSW_BITS;
+			task_regs(child)->gr[0] &= ~USER_PSW_BITS;
+			task_regs(child)->gr[0] |= data;
+			ret = 0;
+			break;
+		}
+
+		if ((addr & (sizeof(long)-1)) ||
+		    (unsigned long) addr >= sizeof(struct pt_regs))
+			break;
+		if ((addr >= PT_GR1 && addr <= PT_GR31) ||
+				addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
+				(addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
+				addr == PT_SAR) {
+			*(unsigned long *) ((char *) task_regs(child) + addr) = data;
+			ret = 0;
+		}
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+
+#ifdef CONFIG_COMPAT
 
 /* This function is needed to translate 32 bit pt_regs offsets in to
  * 64 bit pt_regs offsets.  For example, a 32 bit gdb under a 64 bit kernel
@@ -61,106 +196,25 @@ static long translate_usr_offset(long offset)
 	else
 		return -1;
 }
-#endif
 
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t addr, compat_ulong_t data)
 {
-	/* make sure the trap bits are not set */
-	pa_psw(child)->r = 0;
-	pa_psw(child)->t = 0;
-	pa_psw(child)->h = 0;
-	pa_psw(child)->l = 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-	long ret;
-#ifdef DEBUG_PTRACE
-	long oaddr=addr, odata=data;
-#endif
+	compat_uint_t tmp;
+	long ret = -EIO;
 
 	switch (request) {
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
-	case PTRACE_PEEKDATA: {
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			int copied;
-			unsigned int tmp;
-
-			addr &= 0xffffffffL;
-			copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-			ret = -EIO;
-			if (copied != sizeof(tmp))
-				goto out_tsk;
-			ret = put_user(tmp,(unsigned int *) data);
-			DBG("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n",
-				request == PTRACE_PEEKTEXT ? "TEXT" : "DATA",
-				pid, oaddr, odata, ret, tmp);
-		}
-		else
-#endif
-			ret = generic_ptrace_peekdata(child, addr, data);
-		goto out_tsk;
-	}
 
-	/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = 0;
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			unsigned int tmp = (unsigned int)data;
-			DBG("sys_ptrace(POKE%s, %d, %lx, %lx)\n",
-				request == PTRACE_POKETEXT ? "TEXT" : "DATA",
-				pid, oaddr, odata);
-			addr &= 0xffffffffL;
-			if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp))
-				goto out_tsk;
-		}
-		else
-#endif
-		{
-			if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
-				goto out_tsk;
-		}
-		ret = -EIO;
-		goto out_tsk;
-
-	/* Read the word at location addr in the USER area.  For ptraced
-	   processes, the kernel saves all regs on a syscall. */
-	case PTRACE_PEEKUSR: {
-		ret = -EIO;
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			unsigned int tmp;
-
-			if (addr & (sizeof(int)-1))
-				goto out_tsk;
-			if ((addr = translate_usr_offset(addr)) < 0)
-				goto out_tsk;
-
-			tmp = *(unsigned int *) ((char *) task_regs(child) + addr);
-			ret = put_user(tmp, (unsigned int *) data);
-			DBG("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n",
-				pid, oaddr, odata, ret, addr, tmp);
-		}
-		else
-#endif
-		{
-			unsigned long tmp;
+	case PTRACE_PEEKUSR:
+		if (addr & (sizeof(compat_uint_t)-1))
+			break;
+		addr = translate_usr_offset(addr);
+		if (addr < 0)
+			break;
 
-			if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
-				goto out_tsk;
-			tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
-			ret = put_user(tmp, (unsigned long *) data);
-		}
-		goto out_tsk;
-	}
+		tmp = *(compat_uint_t *) ((char *) task_regs(child) + addr);
+		ret = put_user(tmp, (compat_uint_t *) (unsigned long) data);
+		break;
 
 	/* Write the word at location addr in the USER area.  This will need
 	   to change when the kernel no longer saves all regs on a syscall.
@@ -169,185 +223,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	   those values are overwritten by actual register values on syscall
 	   exit. */
 	case PTRACE_POKEUSR:
-		ret = -EIO;
 		/* Some register values written here may be ignored in
 		 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
 		 * r31/r31+4, and not with the values in pt_regs.
 		 */
-		 /* PT_PSW=0, so this is valid for 32 bit processes under 64
-		 * bit kernels.
-		 */
 		if (addr == PT_PSW) {
-			/* PT_PSW=0, so this is valid for 32 bit processes
-			 * under 64 bit kernels.
-			 *
-			 * Allow writing to Nullify, Divide-step-correction,
-			 * and carry/borrow bits.
-			 * BEWARE, if you set N, and then single step, it won't
-			 * stop on the nullified instruction.
+			/* Since PT_PSW==0, it is valid for 32 bit processes
+			 * under 64 bit kernels as well.
 			 */
-			DBG("sys_ptrace(POKEUSR, %d, %lx, %lx)\n",
-				pid, oaddr, odata);
-			data &= USER_PSW_BITS;
-			task_regs(child)->gr[0] &= ~USER_PSW_BITS;
-			task_regs(child)->gr[0] |= data;
-			ret = 0;
-			goto out_tsk;
-		}
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			if (addr & (sizeof(int)-1))
-				goto out_tsk;
-			if ((addr = translate_usr_offset(addr)) < 0)
-				goto out_tsk;
-			DBG("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n",
-				pid, oaddr, odata, addr);
+			ret = arch_ptrace(child, request, addr, data);
+		} else {
+			if (addr & (sizeof(compat_uint_t)-1))
+				break;
+			addr = translate_usr_offset(addr);
+			if (addr < 0)
+				break;
 			if (addr >= PT_FR0 && addr <= PT_FR31 + 4) {
 				/* Special case, fp regs are 64 bits anyway */
-				*(unsigned int *) ((char *) task_regs(child) + addr) = data;
+				*(__u64 *) ((char *) task_regs(child) + addr) = data;
 				ret = 0;
 			}
 			else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) ||
 					addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 ||
 					addr == PT_SAR+4) {
 				/* Zero the top 32 bits */
-				*(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0;
-				*(unsigned int *) ((char *) task_regs(child) + addr) = data;
+				*(__u32 *) ((char *) task_regs(child) + addr - 4) = 0;
+				*(__u32 *) ((char *) task_regs(child) + addr) = data;
 				ret = 0;
 			}
-			goto out_tsk;
 		}
-		else
-#endif
-		{
-			if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
-				goto out_tsk;
-			if ((addr >= PT_GR1 && addr <= PT_GR31) ||
-					addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
-					(addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
-					addr == PT_SAR) {
-				*(unsigned long *) ((char *) task_regs(child) + addr) = data;
-				ret = 0;
-			}
-			goto out_tsk;
-		}
-
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT:
-		ret = -EIO;
-		DBG("sys_ptrace(%s)\n",
-			request == PTRACE_SYSCALL ? "SYSCALL" : "CONT");
-		if (!valid_signal(data))
-			goto out_tsk;
-		child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
-		if (request == PTRACE_SYSCALL) {
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		} else {
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		}		
-		child->exit_code = data;
-		goto out_wake_notrap;
-
-	case PTRACE_KILL:
-		/*
-		 * make the child exit.  Best I can do is send it a
-		 * sigkill.  perhaps it should be put in the status
-		 * that it wants to exit.
-		 */
-		ret = 0;
-		DBG("sys_ptrace(KILL)\n");
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			goto out_tsk;
-		child->exit_code = SIGKILL;
-		goto out_wake_notrap;
-
-	case PTRACE_SINGLEBLOCK:
-		DBG("sys_ptrace(SINGLEBLOCK)\n");
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->ptrace &= ~PT_SINGLESTEP;
-		child->ptrace |= PT_BLOCKSTEP;
-		child->exit_code = data;
-
-		/* Enable taken branch trap. */
-		pa_psw(child)->r = 0;
-		pa_psw(child)->t = 1;
-		pa_psw(child)->h = 0;
-		pa_psw(child)->l = 0;
-		goto out_wake;
-
-	case PTRACE_SINGLESTEP:
-		DBG("sys_ptrace(SINGLESTEP)\n");
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->ptrace &= ~PT_BLOCKSTEP;
-		child->ptrace |= PT_SINGLESTEP;
-		child->exit_code = data;
-
-		if (pa_psw(child)->n) {
-			struct siginfo si;
-
-			/* Nullified, just crank over the queue. */
-			task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1];
-			task_regs(child)->iasq[0] = task_regs(child)->iasq[1];
-			task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4;
-			pa_psw(child)->n = 0;
-			pa_psw(child)->x = 0;
-			pa_psw(child)->y = 0;
-			pa_psw(child)->z = 0;
-			pa_psw(child)->b = 0;
-			ptrace_disable(child);
-			/* Don't wake up the child, but let the
-			   parent know something happened. */
-			si.si_code = TRAP_TRACE;
-			si.si_addr = (void __user *) (task_regs(child)->iaoq[0] & ~3);
-			si.si_signo = SIGTRAP;
-			si.si_errno = 0;
-			force_sig_info(SIGTRAP, &si, child);
-			//notify_parent(child, SIGCHLD);
-			//ret = 0;
-			goto out_wake;
-		}
-
-		/* Enable recovery counter traps.  The recovery counter
-		 * itself will be set to zero on a task switch.  If the
-		 * task is suspended on a syscall then the syscall return
-		 * path will overwrite the recovery counter with a suitable
-		 * value such that it traps once back in user space.  We
-		 * disable interrupts in the childs PSW here also, to avoid
-		 * interrupts while the recovery counter is decrementing.
-		 */
-		pa_psw(child)->r = 1;
-		pa_psw(child)->t = 0;
-		pa_psw(child)->h = 0;
-		pa_psw(child)->l = 0;
-		/* give it a chance to run. */
-		goto out_wake;
-
-	case PTRACE_GETEVENTMSG:
-                ret = put_user(child->ptrace_message, (unsigned int __user *) data);
-		goto out_tsk;
+		break;
 
 	default:
-		ret = ptrace_request(child, request, addr, data);
-		goto out_tsk;
+		ret = compat_ptrace_request(child, request, addr, data);
+		break;
 	}
 
-out_wake_notrap:
-	ptrace_disable(child);
-out_wake:
-	wake_up_process(child);
-	ret = 0;
-out_tsk:
-	DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
-		request, pid, oaddr, odata, ret);
 	return ret;
 }
+#endif
+
 
 void syscall_trace(void)
 {
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 7a92695d95a..5f3d3a1f903 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -8,12 +8,24 @@
  *
  */
 
+#include <asm/pdc.h>
 #include <asm/psw.h>
 #include <asm/assembly.h>
+#include <asm/asm-offsets.h>
 
 #include <linux/linkage.h>
 
+
 	.section	.bss
+
+	.export pdc_result
+	.export pdc_result2
+	.align 8
+pdc_result:
+	.block	ASM_PDC_RESULT_SIZE
+pdc_result2:
+	.block	ASM_PDC_RESULT_SIZE
+
 	.export real_stack
 	.export real32_stack
 	.export real64_stack
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 39e7c5a5946..7d27853ff8c 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -44,6 +44,7 @@
 #include <asm/pdc_chassis.h>
 #include <asm/io.h>
 #include <asm/setup.h>
+#include <asm/unwind.h>
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
@@ -123,6 +124,7 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_64BIT
 	extern int parisc_narrow_firmware;
 #endif
+	unwind_init();
 
 	init_per_cpu(smp_processor_id());	/* Set Modes & Enable FP */
 
@@ -368,6 +370,31 @@ static int __init parisc_init(void)
 
 	return 0;
 }
-
 arch_initcall(parisc_init);
 
+void start_parisc(void)
+{
+	extern void start_kernel(void);
+
+	int ret, cpunum;
+	struct pdc_coproc_cfg coproc_cfg;
+
+	cpunum = smp_processor_id();
+
+	set_firmware_width_unlocked();
+
+	ret = pdc_coproc_cfg_unlocked(&coproc_cfg);
+	if (ret >= 0 && coproc_cfg.ccr_functional) {
+		mtctl(coproc_cfg.ccr_functional, 10);
+
+		cpu_data[cpunum].fp_rev = coproc_cfg.revision;
+		cpu_data[cpunum].fp_model = coproc_cfg.model;
+
+		asm volatile ("fstd	%fr0,8(%sp)");
+	} else {
+		panic("must have an fpu to boot linux");
+	}
+
+	start_kernel();
+	// not reached
+}
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index c7e59f54881..303d2b647e4 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -87,7 +87,7 @@
 	ENTRY_SAME(setuid)
 	ENTRY_SAME(getuid)
 	ENTRY_COMP(stime)		/* 25 */
-	ENTRY_SAME(ptrace)
+	ENTRY_COMP(ptrace)
 	ENTRY_SAME(alarm)
 	/* see stat comment */
 	ENTRY_COMP(newfstat)
@@ -407,6 +407,12 @@
 	ENTRY_SAME(timerfd_create)
 	ENTRY_COMP(timerfd_settime)
 	ENTRY_COMP(timerfd_gettime)
+	ENTRY_COMP(signalfd4)
+	ENTRY_SAME(eventfd2)		/* 310 */
+	ENTRY_SAME(epoll_create1)
+	ENTRY_SAME(dup3)
+	ENTRY_SAME(pipe2)
+	ENTRY_SAME(inotify_init1)
 
 	/* Nothing yet */
 
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 24be86bba94..4d09203bc69 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -23,6 +23,7 @@
 #include <linux/smp.h>
 #include <linux/profile.h>
 #include <linux/clocksource.h>
+#include <linux/platform_device.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -215,6 +216,24 @@ void __init start_cpu_itimer(void)
 	cpu_data[cpu].it_value = next_tick;
 }
 
+struct platform_device rtc_parisc_dev = {
+	.name = "rtc-parisc",
+	.id = -1,
+};
+
+static int __init rtc_init(void)
+{
+	int ret;
+
+	ret = platform_device_register(&rtc_parisc_dev);
+	if (ret < 0)
+		printk(KERN_ERR "unable to register rtc device...\n");
+
+	/* not necessarily an error */
+	return 0;
+}
+module_init(rtc_init);
+
 void __init time_init(void)
 {
 	static struct pdc_tod tod_data;
@@ -245,4 +264,3 @@ void __init time_init(void)
 		xtime.tv_nsec = 0;
 	}
 }
-
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 701b2d2d888..6773c582e45 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -170,7 +170,7 @@ void unwind_table_remove(struct unwind_table *table)
 }
 
 /* Called from setup_arch to import the kernel unwind info */
-static int unwind_init(void)
+int unwind_init(void)
 {
 	long start, stop;
 	register unsigned long gp __asm__ ("r27");
@@ -417,5 +417,3 @@ int unwind_to_user(struct unwind_frame_info *info)
 
 	return ret;
 }
-
-module_init(unwind_init);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 380baa1780e..9391199d9e7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -230,6 +230,8 @@ config PPC_OF_PLATFORM_PCI
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "arch/powerpc/sysdev/Kconfig"
 source "arch/powerpc/platforms/Kconfig"
 
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 64e144505f6..5ac51e6efc1 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -10,9 +10,13 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#else
+#include <asm/types.h>
+#endif
 #include <asm/asm-compat.h>
 #include <asm/kdump.h>
-#include <asm/types.h>
 
 /*
  * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index ae2ea803a0f..9047af7baa6 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -74,6 +74,13 @@ struct pci_controller {
 	unsigned long pci_io_size;
 #endif
 
+	/* Some machines have a special region to forward the ISA
+	 * "memory" cycles such as VGA memory regions. Left to 0
+	 * if unsupported
+	 */
+	resource_size_t	isa_mem_phys;
+	resource_size_t	isa_mem_size;
+
 	struct pci_ops *ops;
 	unsigned int __iomem *cfg_addr;
 	void __iomem *cfg_data;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 0e52c7828ea..39d547fde95 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -123,6 +123,16 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
 /* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
 #define HAVE_PCI_MMAP	1
 
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+			   size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+			   size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+
+#define HAVE_PCI_LEGACY	1
+
 #if defined(CONFIG_PPC64) || defined(CONFIG_NOT_COHERENT_CACHE)
 /*
  * For 64-bit kernels, pci_unmap_{single,page} is not a nop.
@@ -226,5 +236,6 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
 extern void pcibios_do_bus_setup(struct pci_bus *bus);
 extern void pcibios_fixup_of_probed_bus(struct pci_bus *bus);
 
+
 #endif	/* __KERNEL__ */
 #endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/include/asm/ps3av.h b/arch/powerpc/include/asm/ps3av.h
index fda98715cd3..5aa22cffdbd 100644
--- a/arch/powerpc/include/asm/ps3av.h
+++ b/arch/powerpc/include/asm/ps3av.h
@@ -678,6 +678,8 @@ struct ps3av_pkt_avb_param {
 	u8 buf[PS3AV_PKT_AVB_PARAM_MAX_BUF_SIZE];
 };
 
+/* channel status */
+extern u8 ps3av_mode_cs_info[];
 
 /** command status **/
 #define PS3AV_STATUS_SUCCESS			0x0000	/* success */
@@ -735,6 +737,7 @@ extern int ps3av_get_mode(void);
 extern int ps3av_video_mode2res(u32, u32 *, u32 *);
 extern int ps3av_video_mute(int);
 extern int ps3av_audio_mute(int);
+extern int ps3av_audio_mute_analog(int);
 extern int ps3av_dev_open(void);
 extern int ps3av_dev_close(void);
 extern void ps3av_register_flip_ctl(void (*flip_ctl)(int on, void *data),
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 734e0754fb9..280a90cc989 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -129,7 +129,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno,
 #define CHECK_FULL_REGS(regs)						      \
 do {									      \
 	if ((regs)->trap & 1)						      \
-		printk(KERN_CRIT "%s: partial register set\n", __FUNCTION__); \
+		printk(KERN_CRIT "%s: partial register set\n", __func__); \
 } while (0)
 #endif /* __powerpc64__ */
 
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index a323c9b32ee..97e05637972 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -27,6 +27,9 @@
 #define DBG(fmt...)
 #endif
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 void __init reserve_kdump_trampoline(void)
 {
 	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -66,7 +69,11 @@ void __init setup_kdump_trampoline(void)
 	DBG(" <- setup_kdump_trampoline()\n");
 }
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
 static int __init parse_elfcorehdr(char *p)
 {
 	if (p)
@@ -75,7 +82,6 @@ static int __init parse_elfcorehdr(char *p)
 	return 1;
 }
 __setup("elfcorehdr=", parse_elfcorehdr);
-#endif
 
 static int __init parse_savemaxmem(char *p)
 {
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 01ce8c38bae..3815d84a1ef 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -451,7 +451,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
 		pci_dev_put(pdev);
 	}
 
-	DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+	DBG("non-PCI map for %llx, prot: %lx\n",
+	    (unsigned long long)offset, prot);
 
 	return __pgprot(prot);
 }
@@ -490,6 +491,131 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 	return ret;
 }
 
+/* This provides legacy IO read access on a bus */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+	unsigned long offset;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct resource *rp = &hose->io_resource;
+	void __iomem *addr;
+
+	/* Check if port can be supported by that bus. We only check
+	 * the ranges of the PHB though, not the bus itself as the rules
+	 * for forwarding legacy cycles down bridges are not our problem
+	 * here. So if the host bridge supports it, we do it.
+	 */
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	offset += port;
+
+	if (!(rp->flags & IORESOURCE_IO))
+		return -ENXIO;
+	if (offset < rp->start || (offset + size) > rp->end)
+		return -ENXIO;
+	addr = hose->io_base_virt + port;
+
+	switch(size) {
+	case 1:
+		*((u8 *)val) = in_8(addr);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		*((u16 *)val) = in_le16(addr);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		*((u32 *)val) = in_le32(addr);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+/* This provides legacy IO write access on a bus */
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+	unsigned long offset;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct resource *rp = &hose->io_resource;
+	void __iomem *addr;
+
+	/* Check if port can be supported by that bus. We only check
+	 * the ranges of the PHB though, not the bus itself as the rules
+	 * for forwarding legacy cycles down bridges are not our problem
+	 * here. So if the host bridge supports it, we do it.
+	 */
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	offset += port;
+
+	if (!(rp->flags & IORESOURCE_IO))
+		return -ENXIO;
+	if (offset < rp->start || (offset + size) > rp->end)
+		return -ENXIO;
+	addr = hose->io_base_virt + port;
+
+	/* WARNING: The generic code is idiotic. It gets passed a pointer
+	 * to what can be a 1, 2 or 4 byte quantity and always reads that
+	 * as a u32, which means that we have to correct the location of
+	 * the data read within those 32 bits for size 1 and 2
+	 */
+	switch(size) {
+	case 1:
+		out_8(addr, val >> 24);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		out_le16(addr, val >> 16);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		out_le32(addr, val);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+/* This provides legacy IO or memory mmap access on a bus */
+int pci_mmap_legacy_page_range(struct pci_bus *bus,
+			       struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_state)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	resource_size_t offset =
+		((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+	resource_size_t size = vma->vm_end - vma->vm_start;
+	struct resource *rp;
+
+	pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
+		 pci_domain_nr(bus), bus->number,
+		 mmap_state == pci_mmap_mem ? "MEM" : "IO",
+		 (unsigned long long)offset,
+		 (unsigned long long)(offset + size - 1));
+
+	if (mmap_state == pci_mmap_mem) {
+		if ((offset + size) > hose->isa_mem_size)
+			return -ENXIO;
+		offset += hose->isa_mem_phys;
+	} else {
+		unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+		unsigned long roffset = offset + io_offset;
+		rp = &hose->io_resource;
+		if (!(rp->flags & IORESOURCE_IO))
+			return -ENXIO;
+		if (roffset < rp->start || (roffset + size) > rp->end)
+			return -ENXIO;
+		offset += hose->io_base_phys;
+	}
+	pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
+
+	vma->vm_pgoff = offset >> PAGE_SHIFT;
+	vma->vm_page_prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
+
 void pci_resource_to_user(const struct pci_dev *dev, int bar,
 			  const struct resource *rsrc,
 			  resource_size_t *start, resource_size_t *end)
@@ -592,6 +718,12 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 		cpu_addr = of_translate_address(dev, ranges + 3);
 		size = of_read_number(ranges + pna + 3, 2);
 		ranges += np;
+
+		/* If we failed translation or got a zero-sized region
+		 * (some FW try to feed us with non sensical zero sized regions
+		 * such as power3 which look like some kind of attempt at exposing
+		 * the VGA memory hole)
+		 */
 		if (cpu_addr == OF_BAD_ADDR || size == 0)
 			continue;
 
@@ -665,6 +797,8 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 				isa_hole = memno;
 				if (primary || isa_mem_base == 0)
 					isa_mem_base = cpu_addr;
+				hose->isa_mem_phys = cpu_addr;
+				hose->isa_mem_size = size;
 			}
 
 			/* We get the PCI/Mem offset from the first range or
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 98d7bf99533..b9e1a1da6e5 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -134,23 +134,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return __add_pages(zone, start_pfn, nr_pages);
 }
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-	unsigned long start_pfn, end_pfn;
-	int ret;
-
-	start_pfn = start >> PAGE_SHIFT;
-	end_pfn = start_pfn + (size >> PAGE_SHIFT);
-	ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
-	if (ret)
-		goto out;
-	/* Arch-specific calls go here - next patch */
-out:
-	return ret;
-}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 /*
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 92d20e993ed..2ece399f286 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -232,6 +232,7 @@ static void __exit sputrace_exit(void)
 
 	remove_proc_entry("sputrace", NULL);
 	kfree(sputrace_log);
+	marker_synchronize_unregister();
 }
 
 module_init(sputrace_init);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bc581d8a7cd..70b7645ce74 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -78,6 +78,8 @@ config S390
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Base setup"
 
 comment "Processor type and features"
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index ea40a9d690f..de3fad60c68 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -99,6 +99,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_31BIT		18	/* 32bit process */ 
 #define TIF_MEMDIE		19
 #define TIF_RESTORE_SIGMASK	20	/* restore signal mask in do_signal() */
+#define TIF_FREEZE		21	/* thread is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 1169130a97e..158b0d6d704 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -189,14 +189,3 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	return rc;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-	unsigned long start_pfn, end_pfn;
-
-	start_pfn = PFN_DOWN(start);
-	end_pfn = start_pfn + PFN_DOWN(size);
-	return offline_pages(start_pfn, end_pfn, 120 * HZ);
-}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5131d50f851..cb2c87df70c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -12,6 +12,7 @@ config SUPERH
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_DMA_COHERENT
+	select HAVE_IOREMAP_PROT if MMU
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
@@ -20,6 +21,10 @@ config SUPERH
 
 config SUPERH32
 	def_bool !SUPERH64
+	select HAVE_KPROBES
+	select HAVE_KRETPROBES
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_FTRACE
 
 config SUPERH64
 	def_bool y if CPU_SH5
@@ -54,8 +59,11 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
 config GENERIC_IRQ_PROBE
 	def_bool y
 
+config GENERIC_GPIO
+	def_bool n
+
 config GENERIC_CALIBRATE_DELAY
-	def_bool y
+	bool
 
 config GENERIC_IOMAP
 	bool
@@ -66,6 +74,9 @@ config GENERIC_TIME
 config GENERIC_CLOCKEVENTS
 	def_bool n
 
+config GENERIC_CLOCKEVENTS_BROADCAST
+	bool
+
 config GENERIC_LOCKBREAK
 	def_bool y
 	depends on SMP && PREEMPT
@@ -92,6 +103,10 @@ config STACKTRACE_SUPPORT
 config LOCKDEP_SUPPORT
 	def_bool y
 
+config HAVE_LATENCYTOP_SUPPORT
+	def_bool y
+	depends on !SMP
+
 config ARCH_HAS_ILOG2_U32
 	def_bool n
 
@@ -106,6 +121,8 @@ config IO_TRAPPED
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System type"
 
 #
@@ -323,6 +340,7 @@ config CPU_SUBTYPE_SHX3
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
 	select SYS_SUPPORTS_SMP
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 
 # SH4AL-DSP Processor Support
 
@@ -490,7 +508,6 @@ config CRASH_DUMP
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
-	default y
 	help
 	  This kernel feature is useful for number crunching applications
 	  that may need to compute untrusted bytecode during their
diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 4d2d102e00d..e6d2c8b11ab 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -82,7 +82,7 @@ config DEBUG_STACK_USAGE
 
 config 4KSTACKS
 	bool "Use 4Kb for kernel stacks instead of 8Kb"
-	depends on DEBUG_KERNEL && (MMU || BROKEN)
+	depends on DEBUG_KERNEL && (MMU || BROKEN) && !PAGE_SIZE_64KB
 	help
 	  If you say Y here the kernel will use a 4Kb stacksize for the
 	  kernel stack attached to each process/thread. This facilitates
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 01d85c74481..1f409bf8180 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -76,8 +76,10 @@ KBUILD_IMAGE		:= $(defaultimage-y)
 # error messages during linking.
 #
 ifdef CONFIG_SUPERH32
+UTS_MACHINE	:= sh
 LDFLAGS_vmlinux	+= -e _stext
 else
+UTS_MACHINE	:= sh64
 LDFLAGS_vmlinux	+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
 		   --defsym phys_stext_shmedia=phys_stext+1 \
 		   -e phys_stext_shmedia
@@ -123,6 +125,9 @@ core-y	+= $(addprefix arch/sh/boards/, \
 	     $(filter-out ., $(patsubst %,%/,$(machdir-y))))
 endif
 
+# Common machine type headers. Not part of the arch/sh/boards/ hierarchy.
+machdir-y	+= mach-common
+
 # Companion chips
 core-$(CONFIG_HD6446X_SERIES)	+= arch/sh/cchips/hd6446x/
 
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index ae194869fd6..50467f9d0d0 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -128,6 +128,7 @@ config SH_RTS7751R2D
 
 config SH_RSK7203
 	bool "RSK7203"
+	select GENERIC_GPIO
 	depends on CPU_SUBTYPE_SH7203
 
 config SH_SDK7780
@@ -162,6 +163,7 @@ config SH_SH7785LCR_29BIT_PHYSMAPS
 config SH_MIGOR
 	bool "Migo-R"
 	depends on CPU_SUBTYPE_SH7722
+	select GENERIC_GPIO
 	help
 	  Select Migo-R if configuring for the SH7722 Migo-R platform
           by Renesas System Solutions Asia Pte. Ltd.
@@ -169,6 +171,7 @@ config SH_MIGOR
 config SH_AP325RXA
 	bool "AP-325RXA"
 	depends on CPU_SUBTYPE_SH7723
+	select GENERIC_GPIO
 	help
 	  Renesas "AP-325RXA" support.
 	  Compatible with ALGO SYSTEM CO.,LTD. "AP-320A"
@@ -184,6 +187,13 @@ config SH_EDOSK7705
 	bool "EDOSK7705"
 	depends on CPU_SUBTYPE_SH7705
 
+config SH_EDOSK7760
+	bool "EDOSK7760"
+	depends on CPU_SUBTYPE_SH7760
+	help
+	  Select if configuring for a Renesas EDOSK7760
+	  evaluation board.
+
 config SH_SH4202_MICRODEV
 	bool "SH4-202 MicroDev"
 	depends on CPU_SUBTYPE_SH4_202
@@ -228,6 +238,7 @@ config SH_X3PROTO
 config SH_MAGIC_PANEL_R2
 	bool "Magic Panel R2"
 	depends on CPU_SUBTYPE_SH7720
+	select GENERIC_GPIO
 	help
 	  Select Magic Panel R2 if configuring for Magic Panel R2.
 
diff --git a/arch/sh/boards/Makefile b/arch/sh/boards/Makefile
index 463022c7df3..d9efa392372 100644
--- a/arch/sh/boards/Makefile
+++ b/arch/sh/boards/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_SH_MAGIC_PANEL_R2)	+= board-magicpanelr2.o
 obj-$(CONFIG_SH_RSK7203)	+= board-rsk7203.o
 obj-$(CONFIG_SH_SH7785LCR)	+= board-sh7785lcr.o
 obj-$(CONFIG_SH_SHMIN)		+= board-shmin.o
+obj-$(CONFIG_SH_EDOSK7760)	+= board-edosk7760.o
diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index fd1612590bf..7c7874e6ac3 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -18,11 +18,13 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/smc911x.h>
+#include <linux/gpio.h>
 #include <media/soc_camera_platform.h>
 #include <media/sh_mobile_ceu.h>
-#include <asm/sh_mobile_lcdc.h>
+#include <video/sh_mobile_lcdc.h>
 #include <asm/io.h>
 #include <asm/clock.h>
+#include <cpu/sh7723.h>
 
 static struct smc911x_platdata smc911x_info = {
 	.flags = SMC911X_USE_32BIT,
@@ -52,20 +54,33 @@ static struct platform_device smc9118_device = {
 	},
 };
 
+/*
+ * AP320 and AP325RXA has CPLD data in NOR Flash(0xA80000-0xABFFFF).
+ * If this area erased, this board can not boot.
+ */
 static struct mtd_partition ap325rxa_nor_flash_partitions[] = {
 	{
-		 .name = "uboot",
-		 .offset = 0,
-		 .size = (1 * 1024 * 1024),
-		 .mask_flags = MTD_WRITEABLE,	/* Read-only */
+		.name = "uboot",
+		.offset = 0,
+		.size = (1 * 1024 * 1024),
+		.mask_flags = MTD_WRITEABLE,	/* Read-only */
+	}, {
+		.name = "kernel",
+		.offset = MTDPART_OFS_APPEND,
+		.size = (2 * 1024 * 1024),
+	}, {
+		.name = "free-area0",
+		.offset = MTDPART_OFS_APPEND,
+		.size = ((7 * 1024 * 1024) + (512 * 1024)),
 	}, {
-		 .name = "kernel",
-		 .offset = MTDPART_OFS_APPEND,
-		 .size = (2 * 1024 * 1024),
+		.name = "CPLD-Data",
+		.offset = MTDPART_OFS_APPEND,
+		.mask_flags = MTD_WRITEABLE,	/* Read-only */
+		.size = (1024 * 128 * 2),
 	}, {
-		 .name = "other",
-		 .offset = MTDPART_OFS_APPEND,
-		 .size = MTDPART_SIZ_FULL,
+		.name = "free-area1",
+		.offset = MTDPART_OFS_APPEND,
+		.size = MTDPART_SIZ_FULL,
 	},
 };
 
@@ -96,17 +111,7 @@ static struct platform_device ap325rxa_nor_flash_device = {
 #define FPGA_LCDREG	0xB4100180
 #define FPGA_BKLREG	0xB4100212
 #define FPGA_LCDREG_VAL	0x0018
-#define PORT_PHCR	0xA405010E
-#define PORT_PLCR	0xA4050114
-#define PORT_PMCR	0xA4050116
-#define PORT_PRCR	0xA405011C
-#define PORT_PSCR	0xA405011E
-#define PORT_PZCR	0xA405014C
-#define PORT_HIZCRA	0xA4050158
 #define PORT_MSELCRB	0xA4050182
-#define PORT_PSDR	0xA405013E
-#define PORT_PZDR	0xA405016C
-#define PORT_PSELD	0xA4050154
 
 static void ap320_wvga_power_on(void *board_data)
 {
@@ -116,8 +121,7 @@ static void ap320_wvga_power_on(void *board_data)
 	ctrl_outw(FPGA_LCDREG_VAL, FPGA_LCDREG);
 
 	/* backlight */
-	ctrl_outw((ctrl_inw(PORT_PSCR) & ~0x00C0) | 0x40, PORT_PSCR);
-	ctrl_outb(ctrl_inb(PORT_PSDR) & ~0x08, PORT_PSDR);
+	gpio_set_value(GPIO_PTS3, 0);
 	ctrl_outw(0x100, FPGA_BKLREG);
 }
 
@@ -281,12 +285,84 @@ static struct platform_device *ap325rxa_devices[] __initdata = {
 };
 
 static struct i2c_board_info __initdata ap325rxa_i2c_devices[] = {
+	{
+		I2C_BOARD_INFO("pcf8563", 0x51),
+	},
 };
 
 static int __init ap325rxa_devices_setup(void)
 {
-	clk_always_enable("mstp200"); /* LCDC */
-	clk_always_enable("mstp203"); /* CEU */
+	/* LD3 and LD4 LEDs */
+	gpio_request(GPIO_PTX5, NULL); /* RUN */
+	gpio_direction_output(GPIO_PTX5, 1);
+	gpio_export(GPIO_PTX5, 0);
+
+	gpio_request(GPIO_PTX4, NULL); /* INDICATOR */
+	gpio_direction_output(GPIO_PTX4, 0);
+	gpio_export(GPIO_PTX4, 0);
+
+	/* SW1 input */
+	gpio_request(GPIO_PTF7, NULL); /* MODE */
+	gpio_direction_input(GPIO_PTF7);
+	gpio_export(GPIO_PTF7, 0);
+
+	/* LCDC */
+	clk_always_enable("mstp200");
+	gpio_request(GPIO_FN_LCDD15, NULL);
+	gpio_request(GPIO_FN_LCDD14, NULL);
+	gpio_request(GPIO_FN_LCDD13, NULL);
+	gpio_request(GPIO_FN_LCDD12, NULL);
+	gpio_request(GPIO_FN_LCDD11, NULL);
+	gpio_request(GPIO_FN_LCDD10, NULL);
+	gpio_request(GPIO_FN_LCDD9, NULL);
+	gpio_request(GPIO_FN_LCDD8, NULL);
+	gpio_request(GPIO_FN_LCDD7, NULL);
+	gpio_request(GPIO_FN_LCDD6, NULL);
+	gpio_request(GPIO_FN_LCDD5, NULL);
+	gpio_request(GPIO_FN_LCDD4, NULL);
+	gpio_request(GPIO_FN_LCDD3, NULL);
+	gpio_request(GPIO_FN_LCDD2, NULL);
+	gpio_request(GPIO_FN_LCDD1, NULL);
+	gpio_request(GPIO_FN_LCDD0, NULL);
+	gpio_request(GPIO_FN_LCDLCLK_PTR, NULL);
+	gpio_request(GPIO_FN_LCDDCK, NULL);
+	gpio_request(GPIO_FN_LCDVEPWC, NULL);
+	gpio_request(GPIO_FN_LCDVCPWC, NULL);
+	gpio_request(GPIO_FN_LCDVSYN, NULL);
+	gpio_request(GPIO_FN_LCDHSYN, NULL);
+	gpio_request(GPIO_FN_LCDDISP, NULL);
+	gpio_request(GPIO_FN_LCDDON, NULL);
+
+	/* LCD backlight */
+	gpio_request(GPIO_PTS3, NULL);
+	gpio_direction_output(GPIO_PTS3, 1);
+
+	/* CEU */
+	clk_always_enable("mstp203");
+	gpio_request(GPIO_FN_VIO_CLK2, NULL);
+	gpio_request(GPIO_FN_VIO_VD2, NULL);
+	gpio_request(GPIO_FN_VIO_HD2, NULL);
+	gpio_request(GPIO_FN_VIO_FLD, NULL);
+	gpio_request(GPIO_FN_VIO_CKO, NULL);
+	gpio_request(GPIO_FN_VIO_D15, NULL);
+	gpio_request(GPIO_FN_VIO_D14, NULL);
+	gpio_request(GPIO_FN_VIO_D13, NULL);
+	gpio_request(GPIO_FN_VIO_D12, NULL);
+	gpio_request(GPIO_FN_VIO_D11, NULL);
+	gpio_request(GPIO_FN_VIO_D10, NULL);
+	gpio_request(GPIO_FN_VIO_D9, NULL);
+	gpio_request(GPIO_FN_VIO_D8, NULL);
+
+	gpio_request(GPIO_PTZ7, NULL);
+	gpio_direction_output(GPIO_PTZ7, 0); /* OE_CAM */
+	gpio_request(GPIO_PTZ6, NULL);
+	gpio_direction_output(GPIO_PTZ6, 0); /* STBY_CAM */
+	gpio_request(GPIO_PTZ5, NULL);
+	gpio_direction_output(GPIO_PTZ5, 1); /* RST_CAM */
+	gpio_request(GPIO_PTZ4, NULL);
+	gpio_direction_output(GPIO_PTZ4, 0); /* SADDR */
+
+	ctrl_outw(ctrl_inw(PORT_MSELCRB) & ~0x0001, PORT_MSELCRB);
 
 	platform_resource_setup_memory(&ceu_device, "ceu", 4 << 20);
 
@@ -300,18 +376,6 @@ device_initcall(ap325rxa_devices_setup);
 
 static void __init ap325rxa_setup(char **cmdline_p)
 {
-	/* LCDC configuration */
-	ctrl_outw(ctrl_inw(PORT_PHCR) & ~0xffff, PORT_PHCR);
-	ctrl_outw(ctrl_inw(PORT_PLCR) & ~0xffff, PORT_PLCR);
-	ctrl_outw(ctrl_inw(PORT_PMCR) & ~0xffff, PORT_PMCR);
-	ctrl_outw(ctrl_inw(PORT_PRCR) & ~0x03ff, PORT_PRCR);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x01C0, PORT_HIZCRA);
-
-	/* CEU */
-	ctrl_outw(ctrl_inw(PORT_MSELCRB) & ~0x0001, PORT_MSELCRB);
-	ctrl_outw(ctrl_inw(PORT_PSELD) & ~0x0003, PORT_PSELD);
-	ctrl_outw((ctrl_inw(PORT_PZCR) & ~0xff00) | 0x5500, PORT_PZCR);
-	ctrl_outb((ctrl_inb(PORT_PZDR) & ~0xf0) | 0x20, PORT_PZDR);
 }
 
 static struct sh_machine_vector mv_ap325rxa __initmv = {
diff --git a/arch/sh/boards/board-edosk7760.c b/arch/sh/boards/board-edosk7760.c
new file mode 100644
index 00000000000..35dc0994875
--- /dev/null
+++ b/arch/sh/boards/board-edosk7760.c
@@ -0,0 +1,193 @@
+/*
+ * Renesas Europe EDOSK7760 Board Support
+ *
+ * Copyright (C) 2008 SPES Societa' Progettazione Elettronica e Software Ltd.
+ * Author: Luca Santini <luca.santini@spesonline.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/smc91x.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/mtd/physmap.h>
+#include <asm/machvec.h>
+#include <asm/io.h>
+#include <asm/addrspace.h>
+#include <asm/delay.h>
+#include <asm/i2c-sh7760.h>
+#include <asm/sizes.h>
+
+/* Bus state controller registers for CS4 area */
+#define BSC_CS4BCR	0xA4FD0010
+#define BSC_CS4WCR	0xA4FD0030
+
+#define SMC_IOBASE	0xA2000000
+#define SMC_IO_OFFSET	0x300
+#define SMC_IOADDR	(SMC_IOBASE + SMC_IO_OFFSET)
+
+#define ETHERNET_IRQ	5
+
+/* NOR flash */
+static struct mtd_partition edosk7760_nor_flash_partitions[] = {
+	{
+		.name = "bootloader",
+		.offset = 0,
+		.size = SZ_256K,
+		.mask_flags = MTD_WRITEABLE,	/* Read-only */
+	}, {
+		.name = "kernel",
+		.offset = MTDPART_OFS_APPEND,
+		.size = SZ_2M,
+	}, {
+		.name = "fs",
+		.offset = MTDPART_OFS_APPEND,
+		.size = SZ_26M,
+	}, {
+		.name = "other",
+		.offset = MTDPART_OFS_APPEND,
+		.size = MTDPART_SIZ_FULL,
+	},
+};
+
+static struct physmap_flash_data edosk7760_nor_flash_data = {
+	.width		= 4,
+	.parts		= edosk7760_nor_flash_partitions,
+	.nr_parts	= ARRAY_SIZE(edosk7760_nor_flash_partitions),
+};
+
+static struct resource edosk7760_nor_flash_resources[] = {
+	[0] = {
+		.name	= "NOR Flash",
+		.start	= 0x00000000,
+		.end	= 0x00000000 + SZ_32M - 1,
+		.flags	= IORESOURCE_MEM,
+	}
+};
+
+static struct platform_device edosk7760_nor_flash_device = {
+	.name		= "physmap-flash",
+	.resource	= edosk7760_nor_flash_resources,
+	.num_resources	= ARRAY_SIZE(edosk7760_nor_flash_resources),
+	.dev		= {
+		.platform_data = &edosk7760_nor_flash_data,
+	},
+};
+
+/* i2c initialization functions */
+static struct sh7760_i2c_platdata i2c_pd = {
+	.speed_khz	= 400,
+};
+
+static struct resource sh7760_i2c1_res[] = {
+	{
+		.start	= SH7760_I2C1_MMIO,
+		.end	= SH7760_I2C1_MMIOEND,
+		.flags	= IORESOURCE_MEM,
+	},{
+		.start	= SH7760_I2C1_IRQ,
+		.end	= SH7760_I2C1_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sh7760_i2c1_dev = {
+	.dev    = {
+		.platform_data	= &i2c_pd,
+	},
+
+	.name		= SH7760_I2C_DEVNAME,
+	.id		= 1,
+	.resource	= sh7760_i2c1_res,
+	.num_resources	= ARRAY_SIZE(sh7760_i2c1_res),
+};
+
+static struct resource sh7760_i2c0_res[] = {
+	{
+		.start	= SH7760_I2C0_MMIO,
+		.end	= SH7760_I2C0_MMIOEND,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= SH7760_I2C0_IRQ,
+		.end	= SH7760_I2C0_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sh7760_i2c0_dev = {
+	.dev    = {
+		.platform_data	= &i2c_pd,
+	},
+	.name		= SH7760_I2C_DEVNAME,
+	.id		= 0,
+	.resource	= sh7760_i2c0_res,
+	.num_resources	= ARRAY_SIZE(sh7760_i2c0_res),
+};
+
+/* eth initialization functions */
+static struct smc91x_platdata smc91x_info = {
+	.flags = SMC91X_USE_16BIT | SMC91X_IO_SHIFT_1 | IORESOURCE_IRQ_LOWLEVEL,
+};
+
+static struct resource smc91x_res[] = {
+	[0] = {
+		.start	= SMC_IOADDR,
+		.end	= SMC_IOADDR + SZ_32 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= ETHERNET_IRQ,
+		.end	= ETHERNET_IRQ,
+		.flags	= IORESOURCE_IRQ ,
+	}
+};
+
+static struct platform_device smc91x_dev = {
+	.name		= "smc91x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(smc91x_res),
+	.resource	= smc91x_res,
+
+	.dev	= {
+		.platform_data	= &smc91x_info,
+	},
+};
+
+/* platform init code */
+static struct platform_device *edosk7760_devices[] __initdata = {
+	&smc91x_dev,
+	&edosk7760_nor_flash_device,
+	&sh7760_i2c0_dev,
+	&sh7760_i2c1_dev,
+};
+
+static int __init init_edosk7760_devices(void)
+{
+	plat_irq_setup_pins(IRQ_MODE_IRQ);
+
+	return platform_add_devices(edosk7760_devices,
+				    ARRAY_SIZE(edosk7760_devices));
+}
+__initcall(init_edosk7760_devices);
+
+/*
+ * The Machine Vector
+ */
+struct sh_machine_vector mv_edosk7760 __initmv = {
+	.mv_name	= "EDOSK7760",
+	.mv_nr_irqs	= 128,
+};
diff --git a/arch/sh/boards/board-magicpanelr2.c b/arch/sh/boards/board-magicpanelr2.c
index f3b8b07ea5d..3de22ccdeb7 100644
--- a/arch/sh/boards/board-magicpanelr2.c
+++ b/arch/sh/boards/board-magicpanelr2.c
@@ -13,12 +13,14 @@
 #include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
+#include <linux/gpio.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/map.h>
-#include <asm/magicpanelr2.h>
+#include <mach/magicpanelr2.h>
 #include <asm/heartbeat.h>
+#include <cpu/sh7720.h>
 
 #define LAN9115_READY	(ctrl_inl(0xA8000084UL) & 0x00000001UL)
 
@@ -170,7 +172,14 @@ static void __init setup_port_multiplexing(void)
 	/* R7 A25;	     R6 A24;	     R5 A23;		  R4 A22;
 	 * R3 A21;	     R2 A20;	     R1 A19;		  R0 A0;
 	 */
-	ctrl_outw(0x0000, PORT_PRCR);	/* 00 00 00 00 00 00 00 00 */
+	gpio_request(GPIO_FN_A25, NULL);
+	gpio_request(GPIO_FN_A24, NULL);
+	gpio_request(GPIO_FN_A23, NULL);
+	gpio_request(GPIO_FN_A22, NULL);
+	gpio_request(GPIO_FN_A21, NULL);
+	gpio_request(GPIO_FN_A20, NULL);
+	gpio_request(GPIO_FN_A19, NULL);
+	gpio_request(GPIO_FN_A0, NULL);
 
 	/* S7 (x);		S6 (x);        S5 (x);	     S4 GPO(EEPROM_CS2);
 	 * S3 GPO(EEPROM_CS1);  S2 SIOF0_TXD;  S1 SIOF0_RXD; S0 SIOF0_SCK;
diff --git a/arch/sh/boards/board-rsk7203.c b/arch/sh/boards/board-rsk7203.c
index ffbedc59a97..ded799cf3ea 100644
--- a/arch/sh/boards/board-rsk7203.c
+++ b/arch/sh/boards/board-rsk7203.c
@@ -16,8 +16,10 @@
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/map.h>
 #include <linux/smc911x.h>
+#include <linux/gpio.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
+#include <asm/sh7203.h>
 
 static struct smc911x_platdata smc911x_info = {
 	.flags		= SMC911X_USE_16BIT,
@@ -122,6 +124,15 @@ static struct platform_device *rsk7203_devices[] __initdata = {
 
 static int __init rsk7203_devices_setup(void)
 {
+	/* Select pins for SCIF0 */
+	gpio_request(GPIO_FN_TXD0, NULL);
+	gpio_request(GPIO_FN_RXD0, NULL);
+
+	/* Lit LED0 */
+	gpio_request(GPIO_PE10, NULL);
+	gpio_direction_output(GPIO_PE10, 0);
+	gpio_export(GPIO_PE10, 0);
+
 	set_mtd_partitions();
 	return platform_add_devices(rsk7203_devices,
 				    ARRAY_SIZE(rsk7203_devices));
diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index b95d674ee70..408bbddaf32 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -19,7 +19,7 @@
 #include <linux/i2c-pca-platform.h>
 #include <linux/i2c-algo-pca.h>
 #include <asm/heartbeat.h>
-#include <asm/sh7785lcr.h>
+#include <mach/sh7785lcr.h>
 
 /*
  * NOTE: This board has 2 physical memory maps.
diff --git a/arch/sh/boards/board-shmin.c b/arch/sh/boards/board-shmin.c
index 16e5dae8ecf..5cc0867de5a 100644
--- a/arch/sh/boards/board-shmin.c
+++ b/arch/sh/boards/board-shmin.c
@@ -8,7 +8,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <asm/machvec.h>
-#include <asm/shmin.h>
+#include <mach/shmin.h>
 #include <asm/clock.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/boards/mach-edosk7705/io.c b/arch/sh/boards/mach-edosk7705/io.c
index 541cea2a652..7d153e50a01 100644
--- a/arch/sh/boards/mach-edosk7705/io.c
+++ b/arch/sh/boards/mach-edosk7705/io.c
@@ -11,7 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <asm/io.h>
-#include <asm/edosk7705/io.h>
+#include <mach/edosk7705.h>
 #include <asm/addrspace.h>
 
 #define SMC_IOADDR	0xA2000000
diff --git a/arch/sh/boards/mach-edosk7705/setup.c b/arch/sh/boards/mach-edosk7705/setup.c
index f076c45308d..ab3f47bffdf 100644
--- a/arch/sh/boards/mach-edosk7705/setup.c
+++ b/arch/sh/boards/mach-edosk7705/setup.c
@@ -10,7 +10,7 @@
  */
 #include <linux/init.h>
 #include <asm/machvec.h>
-#include <asm/edosk7705/io.h>
+#include <mach/edosk7705.h>
 
 static void __init sh_edosk7705_init_irq(void)
 {
diff --git a/arch/sh/boards/mach-highlander/irq-r7780mp.c b/arch/sh/boards/mach-highlander/irq-r7780mp.c
index ae1cfcb2970..83c28bcd4d2 100644
--- a/arch/sh/boards/mach-highlander/irq-r7780mp.c
+++ b/arch/sh/boards/mach-highlander/irq-r7780mp.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/boards/mach-highlander/irq-r7780rp.c b/arch/sh/boards/mach-highlander/irq-r7780rp.c
index 9d3921fe27c..b721e86b5af 100644
--- a/arch/sh/boards/mach-highlander/irq-r7780rp.c
+++ b/arch/sh/boards/mach-highlander/irq-r7780rp.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/boards/mach-highlander/irq-r7785rp.c b/arch/sh/boards/mach-highlander/irq-r7785rp.c
index 896c045aa39..3811b060a39 100644
--- a/arch/sh/boards/mach-highlander/irq-r7785rp.c
+++ b/arch/sh/boards/mach-highlander/irq-r7785rp.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/boards/mach-highlander/psw.c b/arch/sh/boards/mach-highlander/psw.c
index be8d5477fc6..37b1a2ee71a 100644
--- a/arch/sh/boards/mach-highlander/psw.c
+++ b/arch/sh/boards/mach-highlander/psw.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 #include <asm/push-switch.h>
 
 static irqreturn_t psw_irq_handler(int irq, void *arg)
diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c
index bc79afb6fc4..c5a40f7906d 100644
--- a/arch/sh/boards/mach-highlander/setup.c
+++ b/arch/sh/boards/mach-highlander/setup.c
@@ -20,7 +20,7 @@
 #include <linux/i2c.h>
 #include <net/ax88796.h>
 #include <asm/machvec.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 #include <asm/clock.h>
 #include <asm/heartbeat.h>
 #include <asm/io.h>
diff --git a/arch/sh/boards/mach-hp6xx/hp6xx_apm.c b/arch/sh/boards/mach-hp6xx/hp6xx_apm.c
index 177f4f028e0..e85212faf40 100644
--- a/arch/sh/boards/mach-hp6xx/hp6xx_apm.c
+++ b/arch/sh/boards/mach-hp6xx/hp6xx_apm.c
@@ -14,7 +14,7 @@
 #include <linux/apm-emulation.h>
 #include <linux/io.h>
 #include <asm/adc.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 
 /* percentage values */
 #define APM_CRITICAL			10
diff --git a/arch/sh/boards/mach-hp6xx/pm.c b/arch/sh/boards/mach-hp6xx/pm.c
index e96684def78..64af1f2eef0 100644
--- a/arch/sh/boards/mach-hp6xx/pm.c
+++ b/arch/sh/boards/mach-hp6xx/pm.c
@@ -12,7 +12,7 @@
 #include <linux/time.h>
 #include <asm/io.h>
 #include <asm/hd64461.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 #include <cpu/dac.h>
 #include <asm/pm.h>
 
diff --git a/arch/sh/boards/mach-hp6xx/setup.c b/arch/sh/boards/mach-hp6xx/setup.c
index 475b46caec1..48fece78ff5 100644
--- a/arch/sh/boards/mach-hp6xx/setup.c
+++ b/arch/sh/boards/mach-hp6xx/setup.c
@@ -15,7 +15,7 @@
 #include <asm/hd64461.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 #include <cpu/dac.h>
 
 #define	SCPCR	0xa4000116
diff --git a/arch/sh/boards/mach-lboxre2/irq.c b/arch/sh/boards/mach-lboxre2/irq.c
index 5a1c3bbe7b5..8aa171ab833 100644
--- a/arch/sh/boards/mach-lboxre2/irq.c
+++ b/arch/sh/boards/mach-lboxre2/irq.c
@@ -15,7 +15,7 @@
 #include <linux/irq.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/lboxre2.h>
+#include <mach/lboxre2.h>
 
 /*
  * Initialize IRQ setting
diff --git a/arch/sh/boards/mach-lboxre2/setup.c b/arch/sh/boards/mach-lboxre2/setup.c
index c74440d38ee..2b0b5818e1e 100644
--- a/arch/sh/boards/mach-lboxre2/setup.c
+++ b/arch/sh/boards/mach-lboxre2/setup.c
@@ -16,7 +16,7 @@
 #include <linux/ata_platform.h>
 #include <asm/machvec.h>
 #include <asm/addrspace.h>
-#include <asm/lboxre2.h>
+#include <mach/lboxre2.h>
 #include <asm/io.h>
 
 static struct resource cf_ide_resources[] = {
diff --git a/arch/sh/boards/mach-microdev/io.c b/arch/sh/boards/mach-microdev/io.c
index 9f8a540f7e1..52dd748211c 100644
--- a/arch/sh/boards/mach-microdev/io.c
+++ b/arch/sh/boards/mach-microdev/io.c
@@ -15,7 +15,7 @@
 #include <linux/pci.h>
 #include <linux/wait.h>
 #include <asm/io.h>
-#include <asm/microdev.h>
+#include <mach/microdev.h>
 
 	/*
 	 *	we need to have a 'safe' address to re-direct all I/O requests
diff --git a/arch/sh/boards/mach-microdev/irq.c b/arch/sh/boards/mach-microdev/irq.c
index 4d335077a3f..702753cbd28 100644
--- a/arch/sh/boards/mach-microdev/irq.c
+++ b/arch/sh/boards/mach-microdev/irq.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/microdev.h>
+#include <mach/microdev.h>
 
 #define NUM_EXTERNAL_IRQS 16	/* IRL0 .. IRL15 */
 
diff --git a/arch/sh/boards/mach-microdev/setup.c b/arch/sh/boards/mach-microdev/setup.c
index fc8cd06d66c..a9202fe3cb5 100644
--- a/arch/sh/boards/mach-microdev/setup.c
+++ b/arch/sh/boards/mach-microdev/setup.c
@@ -14,7 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <video/s1d13xxxfb.h>
-#include <asm/microdev.h>
+#include <mach/microdev.h>
 #include <asm/io.h>
 #include <asm/machvec.h>
 
diff --git a/arch/sh/boards/mach-migor/lcd_qvga.c b/arch/sh/boards/mach-migor/lcd_qvga.c
index 6e960959644..de9014a8a93 100644
--- a/arch/sh/boards/mach-migor/lcd_qvga.c
+++ b/arch/sh/boards/mach-migor/lcd_qvga.c
@@ -17,8 +17,10 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <asm/sh_mobile_lcdc.h>
-#include <asm/migor.h>
+#include <linux/gpio.h>
+#include <video/sh_mobile_lcdc.h>
+#include <cpu/sh7722.h>
+#include <mach/migor.h>
 
 /* LCD Module is a PH240320T according to board schematics. This module
  * is made up of a 240x320 LCD hooked up to a R61505U (or HX8347-A01?)
@@ -30,9 +32,9 @@
 
 static void reset_lcd_module(void)
 {
-	ctrl_outb(ctrl_inb(PORT_PHDR) & ~0x04, PORT_PHDR);
+	gpio_set_value(GPIO_PTH2, 0);
 	mdelay(2);
-	ctrl_outb(ctrl_inb(PORT_PHDR) | 0x04, PORT_PHDR);
+	gpio_set_value(GPIO_PTH2, 1);
 	mdelay(1);
 }
 
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 714dce91cc9..769d6304342 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -17,14 +17,16 @@
 #include <linux/smc91x.h>
 #include <linux/delay.h>
 #include <linux/clk.h>
+#include <linux/gpio.h>
 #include <media/soc_camera_platform.h>
 #include <media/sh_mobile_ceu.h>
+#include <video/sh_mobile_lcdc.h>
 #include <asm/clock.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
 #include <asm/sh_keysc.h>
-#include <asm/sh_mobile_lcdc.h>
-#include <asm/migor.h>
+#include <mach/migor.h>
+#include <cpu/sh7722.h>
 
 /* Address     IRQ  Size  Bus  Description
  * 0x00000000       64MB  16   NOR Flash (SP29PL256N)
@@ -35,7 +37,7 @@
  */
 
 static struct smc91x_platdata smc91x_info = {
-	.flags = SMC91X_USE_16BIT,
+	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
 };
 
 static struct resource smc91x_eth_resources[] = {
@@ -169,7 +171,7 @@ static void migor_nand_flash_cmd_ctl(struct mtd_info *mtd, int cmd,
 
 static int migor_nand_flash_ready(struct mtd_info *mtd)
 {
-	return ctrl_inb(PORT_PADR) & 0x02; /* PTA1 */
+	return gpio_get_value(GPIO_PTA1); /* NAND_RBn */
 }
 
 struct platform_nand_data migor_nand_flash_data = {
@@ -286,22 +288,15 @@ static struct clk *camera_clk;
 
 static void camera_power_on(void)
 {
-	unsigned char value;
-
 	camera_clk = clk_get(NULL, "video_clk");
 	clk_set_rate(camera_clk, 24000000);
 	clk_enable(camera_clk);	/* start VIO_CKO */
 
+	/* use VIO_RST to take camera out of reset */
 	mdelay(10);
-	value = ctrl_inb(PORT_PTDR);
-	value &= ~0x09;
-#ifndef CONFIG_SH_MIGOR_RTA_WVGA
-	value |= 0x01;
-#endif
-	ctrl_outb(value, PORT_PTDR);
+	gpio_set_value(GPIO_PTT3, 0);
 	mdelay(10);
-
-	ctrl_outb(value | 8, PORT_PTDR);
+	gpio_set_value(GPIO_PTT3, 1);
 }
 
 static void camera_power_off(void)
@@ -309,7 +304,7 @@ static void camera_power_off(void)
 	clk_disable(camera_clk); /* stop VIO_CKO */
 	clk_put(camera_clk);
 
-	ctrl_outb(ctrl_inb(PORT_PTDR) & ~0x08, PORT_PTDR);
+	gpio_set_value(GPIO_PTT3, 0);
 }
 
 #ifdef CONFIG_I2C
@@ -458,75 +453,135 @@ static struct i2c_board_info migor_i2c_devices[] = {
 
 static int __init migor_devices_setup(void)
 {
+	/* Lit D11 LED */
+	gpio_request(GPIO_PTJ7, NULL);
+	gpio_direction_output(GPIO_PTJ7, 1);
+	gpio_export(GPIO_PTJ7, 0);
+
+	/* Lit D12 LED */
+	gpio_request(GPIO_PTJ5, NULL);
+	gpio_direction_output(GPIO_PTJ5, 1);
+	gpio_export(GPIO_PTJ5, 0);
+
+	/* SMC91C111 - Enable IRQ0, Setup CS4 for 16-bit fast access */
+	gpio_request(GPIO_FN_IRQ0, NULL);
+	ctrl_outl(0x00003400, BSC_CS4BCR);
+	ctrl_outl(0x00110080, BSC_CS4WCR);
+
+	/* KEYSC */
 	clk_always_enable("mstp214"); /* KEYSC */
+	gpio_request(GPIO_FN_KEYOUT0, NULL);
+	gpio_request(GPIO_FN_KEYOUT1, NULL);
+	gpio_request(GPIO_FN_KEYOUT2, NULL);
+	gpio_request(GPIO_FN_KEYOUT3, NULL);
+	gpio_request(GPIO_FN_KEYOUT4_IN6, NULL);
+	gpio_request(GPIO_FN_KEYIN1, NULL);
+	gpio_request(GPIO_FN_KEYIN2, NULL);
+	gpio_request(GPIO_FN_KEYIN3, NULL);
+	gpio_request(GPIO_FN_KEYIN4, NULL);
+	gpio_request(GPIO_FN_KEYOUT5_IN5, NULL);
+
+	/* NAND Flash */
+	gpio_request(GPIO_FN_CS6A_CE2B, NULL);
+	ctrl_outl((ctrl_inl(BSC_CS6ABCR) & ~0x0600) | 0x0200, BSC_CS6ABCR);
+	gpio_request(GPIO_PTA1, NULL);
+	gpio_direction_input(GPIO_PTA1);
+
+	/* Touch Panel */
+	gpio_request(GPIO_FN_IRQ6, NULL);
+
+	/* LCD Panel */
 	clk_always_enable("mstp200"); /* LCDC */
+#ifdef CONFIG_SH_MIGOR_QVGA /* LCDC - QVGA - Enable SYS Interface signals */
+	gpio_request(GPIO_FN_LCDD17, NULL);
+	gpio_request(GPIO_FN_LCDD16, NULL);
+	gpio_request(GPIO_FN_LCDD15, NULL);
+	gpio_request(GPIO_FN_LCDD14, NULL);
+	gpio_request(GPIO_FN_LCDD13, NULL);
+	gpio_request(GPIO_FN_LCDD12, NULL);
+	gpio_request(GPIO_FN_LCDD11, NULL);
+	gpio_request(GPIO_FN_LCDD10, NULL);
+	gpio_request(GPIO_FN_LCDD8, NULL);
+	gpio_request(GPIO_FN_LCDD7, NULL);
+	gpio_request(GPIO_FN_LCDD6, NULL);
+	gpio_request(GPIO_FN_LCDD5, NULL);
+	gpio_request(GPIO_FN_LCDD4, NULL);
+	gpio_request(GPIO_FN_LCDD3, NULL);
+	gpio_request(GPIO_FN_LCDD2, NULL);
+	gpio_request(GPIO_FN_LCDD1, NULL);
+	gpio_request(GPIO_FN_LCDRS, NULL);
+	gpio_request(GPIO_FN_LCDCS, NULL);
+	gpio_request(GPIO_FN_LCDRD, NULL);
+	gpio_request(GPIO_FN_LCDWR, NULL);
+	gpio_request(GPIO_PTH2, NULL); /* LCD_DON */
+	gpio_direction_output(GPIO_PTH2, 1);
+#endif
+#ifdef CONFIG_SH_MIGOR_RTA_WVGA /* LCDC - WVGA - Enable RGB Interface signals */
+	gpio_request(GPIO_FN_LCDD15, NULL);
+	gpio_request(GPIO_FN_LCDD14, NULL);
+	gpio_request(GPIO_FN_LCDD13, NULL);
+	gpio_request(GPIO_FN_LCDD12, NULL);
+	gpio_request(GPIO_FN_LCDD11, NULL);
+	gpio_request(GPIO_FN_LCDD10, NULL);
+	gpio_request(GPIO_FN_LCDD9, NULL);
+	gpio_request(GPIO_FN_LCDD8, NULL);
+	gpio_request(GPIO_FN_LCDD7, NULL);
+	gpio_request(GPIO_FN_LCDD6, NULL);
+	gpio_request(GPIO_FN_LCDD5, NULL);
+	gpio_request(GPIO_FN_LCDD4, NULL);
+	gpio_request(GPIO_FN_LCDD3, NULL);
+	gpio_request(GPIO_FN_LCDD2, NULL);
+	gpio_request(GPIO_FN_LCDD1, NULL);
+	gpio_request(GPIO_FN_LCDD0, NULL);
+	gpio_request(GPIO_FN_LCDLCLK, NULL);
+	gpio_request(GPIO_FN_LCDDCK, NULL);
+	gpio_request(GPIO_FN_LCDVEPWC, NULL);
+	gpio_request(GPIO_FN_LCDVCPWC, NULL);
+	gpio_request(GPIO_FN_LCDVSYN, NULL);
+	gpio_request(GPIO_FN_LCDHSYN, NULL);
+	gpio_request(GPIO_FN_LCDDISP, NULL);
+	gpio_request(GPIO_FN_LCDDON, NULL);
+#endif
+
+	/* CEU */
 	clk_always_enable("mstp203"); /* CEU */
+	gpio_request(GPIO_FN_VIO_CLK2, NULL);
+	gpio_request(GPIO_FN_VIO_VD2, NULL);
+	gpio_request(GPIO_FN_VIO_HD2, NULL);
+	gpio_request(GPIO_FN_VIO_FLD, NULL);
+	gpio_request(GPIO_FN_VIO_CKO, NULL);
+	gpio_request(GPIO_FN_VIO_D15, NULL);
+	gpio_request(GPIO_FN_VIO_D14, NULL);
+	gpio_request(GPIO_FN_VIO_D13, NULL);
+	gpio_request(GPIO_FN_VIO_D12, NULL);
+	gpio_request(GPIO_FN_VIO_D11, NULL);
+	gpio_request(GPIO_FN_VIO_D10, NULL);
+	gpio_request(GPIO_FN_VIO_D9, NULL);
+	gpio_request(GPIO_FN_VIO_D8, NULL);
+
+	gpio_request(GPIO_PTT3, NULL); /* VIO_RST */
+	gpio_direction_output(GPIO_PTT3, 0);
+	gpio_request(GPIO_PTT2, NULL); /* TV_IN_EN */
+	gpio_direction_output(GPIO_PTT2, 1);
+	gpio_request(GPIO_PTT0, NULL); /* CAM_EN */
+#ifdef CONFIG_SH_MIGOR_RTA_WVGA
+	gpio_direction_output(GPIO_PTT0, 0);
+#else
+	gpio_direction_output(GPIO_PTT0, 1);
+#endif
+	ctrl_outw(ctrl_inw(PORT_MSELCRB) | 0x2000, PORT_MSELCRB); /* D15->D8 */
 
 	platform_resource_setup_memory(&migor_ceu_device, "ceu", 4 << 20);
 
 	i2c_register_board_info(0, migor_i2c_devices,
 				ARRAY_SIZE(migor_i2c_devices));
- 
+
 	return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
 }
 __initcall(migor_devices_setup);
 
 static void __init migor_setup(char **cmdline_p)
 {
-	/* SMC91C111 - Enable IRQ0 */
-	ctrl_outw(ctrl_inw(PORT_PJCR) & ~0x0003, PORT_PJCR);
-
-	/* KEYSC */
-	ctrl_outw(ctrl_inw(PORT_PYCR) & ~0x0fff, PORT_PYCR);
-	ctrl_outw(ctrl_inw(PORT_PZCR) & ~0x0ff0, PORT_PZCR);
-	ctrl_outw(ctrl_inw(PORT_PSELA) & ~0x4100, PORT_PSELA);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x4000, PORT_HIZCRA);
-	ctrl_outw(ctrl_inw(PORT_HIZCRC) & ~0xc000, PORT_HIZCRC);
-
-	/* NAND Flash */
-	ctrl_outw(ctrl_inw(PORT_PXCR) & 0x0fff, PORT_PXCR);
-	ctrl_outl((ctrl_inl(BSC_CS6ABCR) & ~0x00000600) | 0x00000200,
-		  BSC_CS6ABCR);
-
-	/* Touch Panel - Enable IRQ6 */
-	ctrl_outw(ctrl_inw(PORT_PZCR) & ~0xc, PORT_PZCR);
-	ctrl_outw((ctrl_inw(PORT_PSELA) | 0x8000), PORT_PSELA);
-	ctrl_outw((ctrl_inw(PORT_HIZCRC) & ~0x4000), PORT_HIZCRC);
-
-#ifdef CONFIG_SH_MIGOR_RTA_WVGA
-	/* LCDC - WVGA - Enable RGB Interface signals */
-	ctrl_outw(ctrl_inw(PORT_PACR) & ~0x0003, PORT_PACR);
-	ctrl_outw(0x0000, PORT_PHCR);
-	ctrl_outw(0x0000, PORT_PLCR);
-	ctrl_outw(0x0000, PORT_PMCR);
-	ctrl_outw(ctrl_inw(PORT_PRCR) & ~0x000f, PORT_PRCR);
-	ctrl_outw((ctrl_inw(PORT_PSELD) & ~0x000d) | 0x0400, PORT_PSELD);
-	ctrl_outw(ctrl_inw(PORT_MSELCRB) & ~0x0100, PORT_MSELCRB);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x01e0, PORT_HIZCRA);
-#endif
-#ifdef CONFIG_SH_MIGOR_QVGA
-	/* LCDC - QVGA - Enable SYS Interface signals */
-	ctrl_outw(ctrl_inw(PORT_PACR) & ~0x0003, PORT_PACR);
-	ctrl_outw((ctrl_inw(PORT_PHCR) & ~0xcfff) | 0x0010, PORT_PHCR);
-	ctrl_outw(0x0000, PORT_PLCR);
-	ctrl_outw(0x0000, PORT_PMCR);
-	ctrl_outw(ctrl_inw(PORT_PRCR) & ~0x030f, PORT_PRCR);
-	ctrl_outw((ctrl_inw(PORT_PSELD) & ~0x0001) | 0x0420, PORT_PSELD);
-	ctrl_outw(ctrl_inw(PORT_MSELCRB) | 0x0100, PORT_MSELCRB);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x01e0, PORT_HIZCRA);
-#endif
-
-	/* CEU */
-	ctrl_outw((ctrl_inw(PORT_PTCR) & ~0x03c3) | 0x0051, PORT_PTCR);
-	ctrl_outw(ctrl_inw(PORT_PUCR) & ~0x03ff, PORT_PUCR);
-	ctrl_outw(ctrl_inw(PORT_PVCR) & ~0x03ff, PORT_PVCR);
-	ctrl_outw(ctrl_inw(PORT_PWCR) & ~0x3c00, PORT_PWCR);
-	ctrl_outw(ctrl_inw(PORT_PSELC) | 0x0001, PORT_PSELC);
-	ctrl_outw(ctrl_inw(PORT_PSELD) & ~0x2000, PORT_PSELD);
-	ctrl_outw(ctrl_inw(PORT_PSELE) | 0x000f, PORT_PSELE);
-	ctrl_outw(ctrl_inw(PORT_MSELCRB) | 0x2200, PORT_MSELCRB);
-	ctrl_outw(ctrl_inw(PORT_HIZCRA) & ~0x0a00, PORT_HIZCRA);
-	ctrl_outw(ctrl_inw(PORT_HIZCRB) & ~0x0003, PORT_HIZCRB);
 }
 
 static struct sh_machine_vector mv_migor __initmv = {
diff --git a/arch/sh/boards/mach-r2d/irq.c b/arch/sh/boards/mach-r2d/irq.c
index 8e49f6e5124..c70fecedcac 100644
--- a/arch/sh/boards/mach-r2d/irq.c
+++ b/arch/sh/boards/mach-r2d/irq.c
@@ -13,7 +13,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <asm/rts7751r2d.h>
+#include <mach/r2d.h>
 
 #define R2D_NR_IRL 13
 
diff --git a/arch/sh/boards/mach-r2d/setup.c b/arch/sh/boards/mach-r2d/setup.c
index 2308e8753bc..c585be00956 100644
--- a/arch/sh/boards/mach-r2d/setup.c
+++ b/arch/sh/boards/mach-r2d/setup.c
@@ -18,7 +18,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <asm/machvec.h>
-#include <asm/rts7751r2d.h>
+#include <mach/r2d.h>
 #include <asm/io.h>
 #include <asm/io_trapped.h>
 #include <asm/spi.h>
diff --git a/arch/sh/boards/mach-sdk7780/irq.c b/arch/sh/boards/mach-sdk7780/irq.c
index 87cdc578f6f..855558163c5 100644
--- a/arch/sh/boards/mach-sdk7780/irq.c
+++ b/arch/sh/boards/mach-sdk7780/irq.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/sdk7780.h>
+#include <mach/sdk7780.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/boards/mach-sdk7780/setup.c b/arch/sh/boards/mach-sdk7780/setup.c
index acc5932587f..aad94a78dc7 100644
--- a/arch/sh/boards/mach-sdk7780/setup.c
+++ b/arch/sh/boards/mach-sdk7780/setup.c
@@ -13,7 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/ata_platform.h>
 #include <asm/machvec.h>
-#include <asm/sdk7780.h>
+#include <mach/sdk7780.h>
 #include <asm/heartbeat.h>
 #include <asm/io.h>
 #include <asm/addrspace.h>
diff --git a/arch/sh/boards/mach-sh7763rdp/irq.c b/arch/sh/boards/mach-sh7763rdp/irq.c
index fd850bad2de..d8ebfa7d8c7 100644
--- a/arch/sh/boards/mach-sh7763rdp/irq.c
+++ b/arch/sh/boards/mach-sh7763rdp/irq.c
@@ -15,7 +15,7 @@
 #include <linux/irq.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/sh7763rdp.h>
+#include <mach/sh7763rdp.h>
 
 #define INTC_BASE		(0xFFD00000)
 #define INTC_INT2PRI7   (INTC_BASE+0x4001C)
diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c b/arch/sh/boards/mach-sh7763rdp/setup.c
index 23850da05e3..6f926fd2162 100644
--- a/arch/sh/boards/mach-sh7763rdp/setup.c
+++ b/arch/sh/boards/mach-sh7763rdp/setup.c
@@ -17,7 +17,7 @@
 #include <linux/mtd/physmap.h>
 #include <linux/fb.h>
 #include <linux/io.h>
-#include <asm/sh7763rdp.h>
+#include <mach/sh7763rdp.h>
 #include <asm/sh_eth.h>
 #include <asm/sh7760fb.h>
 
diff --git a/arch/sh/boards/mach-snapgear/setup.c b/arch/sh/boards/mach-snapgear/setup.c
index a5e349d3dda..a3277a23cf1 100644
--- a/arch/sh/boards/mach-snapgear/setup.c
+++ b/arch/sh/boards/mach-snapgear/setup.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <asm/machvec.h>
-#include <asm/snapgear.h>
+#include <mach/snapgear.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <cpu/timer.h>
diff --git a/arch/sh/boards/mach-systemh/io.c b/arch/sh/boards/mach-systemh/io.c
index 1b767e1a142..dec3db0ee93 100644
--- a/arch/sh/boards/mach-systemh/io.c
+++ b/arch/sh/boards/mach-systemh/io.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/pci.h>
-#include <asm/systemh7751.h>
+#include <mach/systemh7751.h>
 #include <asm/addrspace.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/boards/mach-systemh/irq.c b/arch/sh/boards/mach-systemh/irq.c
index 601c9c8cdbe..538406872a8 100644
--- a/arch/sh/boards/mach-systemh/irq.c
+++ b/arch/sh/boards/mach-systemh/irq.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 
 #include <asm/io.h>
-#include <asm/systemh7751.h>
+#include <mach/systemh7751.h>
 #include <asm/smc37c93x.h>
 
 /* address of external interrupt mask register
diff --git a/arch/sh/boards/mach-systemh/setup.c b/arch/sh/boards/mach-systemh/setup.c
index ee78af84277..219fd800a43 100644
--- a/arch/sh/boards/mach-systemh/setup.c
+++ b/arch/sh/boards/mach-systemh/setup.c
@@ -16,7 +16,7 @@
  */
 #include <linux/init.h>
 #include <asm/machvec.h>
-#include <asm/systemh7751.h>
+#include <mach/systemh7751.h>
 
 extern void make_systemh_irq(unsigned int irq);
 
diff --git a/arch/sh/boards/mach-titan/io.c b/arch/sh/boards/mach-titan/io.c
index 4730c1dd697..4badad4c6f3 100644
--- a/arch/sh/boards/mach-titan/io.c
+++ b/arch/sh/boards/mach-titan/io.c
@@ -4,7 +4,7 @@
 #include <linux/pci.h>
 #include <asm/machvec.h>
 #include <asm/addrspace.h>
-#include <asm/titan.h>
+#include <mach/titan.h>
 #include <asm/io.h>
 
 static inline unsigned int port2adr(unsigned int port)
diff --git a/arch/sh/boards/mach-titan/setup.c b/arch/sh/boards/mach-titan/setup.c
index 5de3b2ad71a..81e7e0f0386 100644
--- a/arch/sh/boards/mach-titan/setup.c
+++ b/arch/sh/boards/mach-titan/setup.c
@@ -9,7 +9,7 @@
  */
 #include <linux/init.h>
 #include <linux/irq.h>
-#include <asm/titan.h>
+#include <mach/titan.h>
 #include <asm/io.h>
 
 static void __init init_titan_irq(void)
diff --git a/arch/sh/boot/.gitignore b/arch/sh/boot/.gitignore
index b6718de2369..aad5edddf93 100644
--- a/arch/sh/boot/.gitignore
+++ b/arch/sh/boot/.gitignore
@@ -1 +1,4 @@
 zImage
+vmlinux.srec
+uImage
+uImage.srec
diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile
index 5b54965eef9..c16ccd4bfa1 100644
--- a/arch/sh/boot/Makefile
+++ b/arch/sh/boot/Makefile
@@ -33,10 +33,16 @@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE
 $(obj)/compressed/vmlinux: FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
 
+ifeq ($(CONFIG_32BIT),y)
+KERNEL_LOAD	:= $(shell /bin/bash -c 'printf "0x%08x" \
+		     $$[$(CONFIG_PAGE_OFFSET)  + \
+			$(CONFIG_ZERO_PAGE_OFFSET)]')
+else
 KERNEL_LOAD	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_PAGE_OFFSET)  + \
 			$(CONFIG_MEMORY_START) + \
 			$(CONFIG_ZERO_PAGE_OFFSET)]')
+endif
 
 KERNEL_ENTRY	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_PAGE_OFFSET)  + \
diff --git a/arch/sh/boot/compressed/Makefile_32 b/arch/sh/boot/compressed/Makefile_32
index 47685f618ae..301e6d50325 100644
--- a/arch/sh/boot/compressed/Makefile_32
+++ b/arch/sh/boot/compressed/Makefile_32
@@ -23,6 +23,11 @@ IMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
 
 LIBGCC	:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
+ifeq ($(CONFIG_FTRACE),y)
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
+endif
+
 LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup -T $(obj)/../../kernel/vmlinux.lds
 
 $(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE
diff --git a/arch/sh/boot/compressed/misc_32.c b/arch/sh/boot/compressed/misc_32.c
index f386997e4d9..efdba6b2957 100644
--- a/arch/sh/boot/compressed/misc_32.c
+++ b/arch/sh/boot/compressed/misc_32.c
@@ -191,7 +191,7 @@ long* stack_start = &user_stack[STACK_SIZE];
 
 void decompress_kernel(void)
 {
-	output_data = 0;
+	output_data = NULL;
 	output_ptr = PHYSADDR((unsigned long)&_text+PAGE_SIZE);
 #ifdef CONFIG_29BIT
 	output_ptr |= P2SEG;
diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig
new file mode 100644
index 00000000000..bef07fa8d85
--- /dev/null
+++ b/arch/sh/configs/edosk7760_defconfig
@@ -0,0 +1,1050 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.26
+# Tue Aug 26 11:36:09 2008
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_ARCH_SUPPORTS_AOUT=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION="_edosk7760"
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_COMPAT_BRK=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLUB_DEBUG=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+CONFIG_CLASSIC_RCU=y
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+# CONFIG_CPU_SUBTYPE_SH7751R is not set
+CONFIG_CPU_SUBTYPE_SH7760=y
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+# CONFIG_CPU_SUBTYPE_SH7770 is not set
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+# CONFIG_CPU_SUBTYPE_SH5_101 is not set
+# CONFIG_CPU_SUBTYPE_SH5_103 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_MEMORY_START=0x0c000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_29BIT=y
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+
+#
+# Cache configuration
+#
+# CONFIG_SH_DIRECT_MAPPED is not set
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+CONFIG_SH_STORE_QUEUES=y
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_PTEA=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+CONFIG_SH_EDOSK7760=y
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TMU=y
+CONFIG_SH_TIMER_IRQ=16
+CONFIG_SH_PCLK_FREQ=33333333
+CONFIG_TICK_ONESHOT=y
+# CONFIG_NO_HZ is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# DMA support
+#
+CONFIG_SH_DMA_API=y
+CONFIG_SH_DMA=y
+CONFIG_NR_ONCHIP_DMA_CHANNELS=4
+# CONFIG_NR_DMA_CHANNELS_BOOL is not set
+# CONFIG_SH_DMABRG is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+# CONFIG_HEARTBEAT is not set
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+# CONFIG_PREEMPT_NONE is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREEMPT=y
+# CONFIG_PREEMPT_RCU is not set
+CONFIG_GUSA=y
+# CONFIG_GUSA_RB is not set
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00001000
+CONFIG_BOOT_LINK_OFFSET=0x02000000
+# CONFIG_UBC_WAKEUP is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="mem=64M console=ttySC2,115200 root=/dev/nfs rw nfsroot=192.168.0.3:/scripts/filesys ip=192.168.0.4"
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+# CONFIG_IP_PNP_DHCP is not set
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+CONFIG_DEBUG_DRIVER=y
+CONFIG_DEBUG_DEVRES=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+CONFIG_MTD_DEBUG=y
+CONFIG_MTD_DEBUG_VERBOSE=0
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_REDBOOT_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_GEN_PROBE=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_NOSWAP=y
+# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
+CONFIG_MTD_CFI_GEOMETRY=y
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+CONFIG_MTD_MAP_BANK_WIDTH_8=y
+CONFIG_MTD_MAP_BANK_WIDTH_16=y
+CONFIG_MTD_MAP_BANK_WIDTH_32=y
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+CONFIG_MTD_CFI_I4=y
+CONFIG_MTD_CFI_I8=y
+# CONFIG_MTD_OTP is not set
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_CFI_UTIL=y
+CONFIG_MTD_RAM=y
+CONFIG_MTD_ROM=y
+CONFIG_MTD_ABSENT=y
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_START=0xffffffff
+CONFIG_MTD_PHYSMAP_LEN=0x0
+CONFIG_MTD_PHYSMAP_BANKWIDTH=4
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=26000
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+# CONFIG_STNIC is not set
+# CONFIG_SMC9194 is not set
+CONFIG_SMC91X=y
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=3
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_SIMTEC is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_PCA_PLATFORM is not set
+CONFIG_I2C_SH7760=y
+# CONFIG_I2C_SH_MOBILE is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+CONFIG_I2C_DEBUG_CORE=y
+CONFIG_I2C_DEBUG_ALGO=y
+CONFIG_I2C_DEBUG_BUS=y
+CONFIG_I2C_DEBUG_CHIP=y
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=m
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+CONFIG_FB_CFB_FILLRECT=m
+CONFIG_FB_CFB_COPYAREA=m
+CONFIG_FB_CFB_IMAGEBLIT=m
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+CONFIG_FB_TILEBLITTING=y
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_MB86290_640X480_16BPP is not set
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE is not set
+# CONFIG_LOGO is not set
+
+#
+# Sound
+#
+CONFIG_SOUND=y
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=y
+CONFIG_SND_TIMER=y
+CONFIG_SND_PCM=y
+# CONFIG_SND_SEQUENCER is not set
+# CONFIG_SND_MIXER_OSS is not set
+# CONFIG_SND_PCM_OSS is not set
+# CONFIG_SND_DYNAMIC_MINORS is not set
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_VERBOSE_PRINTK=y
+# CONFIG_SND_DEBUG is not set
+
+#
+# Generic devices
+#
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+
+#
+# SUPERH devices
+#
+
+#
+# System on Chip audio support
+#
+CONFIG_SND_SOC=y
+
+#
+# SoC Audio support for SuperH
+#
+
+#
+# ALSA SoC audio for Freescale SOCs
+#
+
+#
+# SoC Audio for the Texas Instruments OMAP
+#
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+# CONFIG_EXT2_FS_POSIX_ACL is not set
+# CONFIG_EXT2_FS_SECURITY is not set
+CONFIG_EXT2_FS_XIP=y
+CONFIG_FS_XIP=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT4DEV_FS is not set
+CONFIG_JBD=y
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+CONFIG_GENERIC_ACL=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+# CONFIG_PROC_KCORE is not set
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V3 is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_SUNRPC_BIND34 is not set
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+CONFIG_NLS_ISO8859_15=y
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=y
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_UNUSED_SYMBOLS=y
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+CONFIG_TIMER_STATS=y
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+CONFIG_DEBUG_PREEMPT=y
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_SAMPLES is not set
+# CONFIG_SH_STANDARD_BIOS is not set
+CONFIG_EARLY_SCIF_CONSOLE=y
+CONFIG_EARLY_SCIF_CONSOLE_PORT=0xffe80000
+CONFIG_EARLY_PRINTK=y
+# CONFIG_DEBUG_BOOTMEM is not set
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_4KSTACKS is not set
+# CONFIG_IRQSTACKS is not set
+# CONFIG_SH_KGDB is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_ALGAPI=y
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+CONFIG_CRYPTO_HW=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
diff --git a/arch/sh/configs/rts7751r2dplus_qemu_defconfig b/arch/sh/configs/rts7751r2dplus_qemu_defconfig
new file mode 100644
index 00000000000..a72796c0293
--- /dev/null
+++ b/arch/sh/configs/rts7751r2dplus_qemu_defconfig
@@ -0,0 +1,909 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.27-rc2
+# Mon Aug 18 22:17:44 2008
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_SYS_SUPPORTS_PCI=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_IO_TRAPPED=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_COMPAT_BRK=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is not set
+# CONFIG_HAVE_IOREMAP_PROT is not set
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_ARCH_TRACEHOOK is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+# CONFIG_USE_GENERIC_SMP_HELPERS is not set
+CONFIG_HAVE_CLK=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+# CONFIG_MODULE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_CLASSIC_RCU=y
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+CONFIG_CPU_SUBTYPE_SH7751R=y
+# CONFIG_CPU_SUBTYPE_SH7760 is not set
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+# CONFIG_CPU_SUBTYPE_SH7770 is not set
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+# CONFIG_CPU_SUBTYPE_SH5_101 is not set
+# CONFIG_CPU_SUBTYPE_SH5_103 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_MEMORY_START=0x0c000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_29BIT=y
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_ENTRY_OFFSET=0x00001000
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+
+#
+# Cache configuration
+#
+# CONFIG_SH_DIRECT_MAPPED is not set
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH_STORE_QUEUES is not set
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_PTEA=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+# CONFIG_SH_7751_SYSTEMH is not set
+# CONFIG_SH_SECUREEDGE5410 is not set
+CONFIG_SH_RTS7751R2D=y
+# CONFIG_SH_LANDISK is not set
+# CONFIG_SH_TITAN is not set
+# CONFIG_SH_LBOX_RE2 is not set
+
+#
+# RTS7751R2D Board Revision
+#
+CONFIG_RTS7751R2D_PLUS=y
+# CONFIG_RTS7751R2D_1 is not set
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TMU=y
+CONFIG_SH_TIMER_IRQ=16
+CONFIG_SH_PCLK_FREQ=60000000
+# CONFIG_TICK_ONESHOT is not set
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# DMA support
+#
+# CONFIG_SH_DMA is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+CONFIG_HEARTBEAT=y
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+CONFIG_SECCOMP=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_GUSA=y
+# CONFIG_GUSA_RB is not set
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00010000
+CONFIG_BOOT_LINK_OFFSET=0x00800000
+# CONFIG_UBC_WAKEUP is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=tty0 console=ttySC0,115200 root=/dev/sda1 earlyprintk=serial"
+
+#
+# Bus options
+#
+# CONFIG_PCI is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_NET is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_DH is not set
+CONFIG_ATA=y
+# CONFIG_ATA_NONSTANDARD is not set
+CONFIG_SATA_PMP=y
+CONFIG_ATA_SFF=y
+# CONFIG_SATA_MV is not set
+# CONFIG_PATA_PLATFORM is not set
+# CONFIG_MD is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_CONSOLE is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=1
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_BITBANG=y
+# CONFIG_SPI_SH_SCI is not set
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_AT25 is not set
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM70 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+CONFIG_MFD_SM501=y
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+CONFIG_DAB=y
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+CONFIG_VIDEO_OUTPUT_CONTROL=m
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+CONFIG_FB_SH_MOBILE_LCDC=m
+CONFIG_FB_SM501=y
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+# CONFIG_LOGO_SUPERH_MONO is not set
+# CONFIG_LOGO_SUPERH_VGA16 is not set
+CONFIG_LOGO_SUPERH_CLUT224=y
+CONFIG_SOUND=y
+CONFIG_SND=m
+# CONFIG_SND_SEQUENCER is not set
+# CONFIG_SND_MIXER_OSS is not set
+# CONFIG_SND_PCM_OSS is not set
+# CONFIG_SND_DYNAMIC_MINORS is not set
+CONFIG_SND_SUPPORT_OLD_API=y
+CONFIG_SND_VERBOSE_PROCFS=y
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_DRIVERS=y
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+CONFIG_SND_SPI=y
+CONFIG_SND_SUPERH=y
+# CONFIG_SND_SOC is not set
+CONFIG_SOUND_PRIME=m
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+# CONFIG_HID_DEBUG is not set
+# CONFIG_HIDRAW is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# SPI RTC drivers
+#
+# CONFIG_RTC_DRV_M41T94 is not set
+# CONFIG_RTC_DRV_DS1305 is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+CONFIG_RTC_DRV_R9701=y
+# CONFIG_RTC_DRV_RS5C348 is not set
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+# CONFIG_RTC_DRV_SH is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4DEV_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+CONFIG_MINIX_FS=y
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+CONFIG_NLS_CODEPAGE_932=y
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+# CONFIG_NLS_ISO8859_1 is not set
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_SAMPLES is not set
+# CONFIG_SH_STANDARD_BIOS is not set
+CONFIG_EARLY_SCIF_CONSOLE=y
+CONFIG_EARLY_SCIF_CONSOLE_PORT=0xffe80000
+CONFIG_EARLY_PRINTK=y
+# CONFIG_DEBUG_BOOTMEM is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_4KSTACKS is not set
+# CONFIG_IRQSTACKS is not set
+# CONFIG_SH_KGDB is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+CONFIG_CRYPTO_HW=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+CONFIG_CRC_T10DIF=y
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
index a13cb764b0b..86c0b6fb737 100644
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ b/arch/sh/drivers/pci/ops-lboxre2.c
@@ -10,7 +10,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/io.h>
-#include <asm/lboxre2.h>
+#include <mach/lboxre2.h>
 #include "pci-sh4.h"
 
 static char lboxre2_irq_tab[] __initdata = {
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index 5fdadaeed6f..8555238e63e 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <asm/r7780rp.h>
+#include <mach/highlander.h>
 #include <asm/io.h>
 #include "pci-sh4.h"
 
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index b3fa3e2ef18..d6ca74b25d5 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/io.h>
-#include <asm/rts7751r2d.h>
+#include <mach/r2d.h>
 #include "pci-sh4.h"
 
 static u8 rts7751r2d_irq_tab[] __initdata = {
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
index 66a9b4047f2..4dcc64184b2 100644
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ b/arch/sh/drivers/pci/ops-sdk7780.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <asm/sdk7780.h>
+#include <mach/sdk7780.h>
 #include <asm/io.h>
 #include "pci-sh4.h"
 
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
index ac8ee2312cd..a8f7801a34a 100644
--- a/arch/sh/drivers/pci/ops-titan.c
+++ b/arch/sh/drivers/pci/ops-titan.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/io.h>
-#include <asm/titan.h>
+#include <mach/titan.h>
 #include "pci-sh4.h"
 
 static char titan_irq_tab[] __initdata = {
diff --git a/arch/sh/include/asm/bitops-llsc.h b/arch/sh/include/asm/bitops-llsc.h
new file mode 100644
index 00000000000..43b8e1a8239
--- /dev/null
+++ b/arch/sh/include/asm/bitops-llsc.h
@@ -0,0 +1,144 @@
+#ifndef __ASM_SH_BITOPS_LLSC_H
+#define __ASM_SH_BITOPS_LLSC_H
+
+static inline void set_bit(int nr, volatile void * addr)
+{
+	int	mask;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! set_bit		\n\t"
+		"or		%3, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=r" (a)
+		: "1" (a), "r" (mask)
+		: "t", "memory"
+	);
+}
+
+static inline void clear_bit(int nr, volatile void * addr)
+{
+	int	mask;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! clear_bit		\n\t"
+		"and		%3, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=r" (a)
+		: "1" (a), "r" (~mask)
+		: "t", "memory"
+	);
+}
+
+static inline void change_bit(int nr, volatile void * addr)
+{
+	int	mask;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! change_bit		\n\t"
+		"xor		%3, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=r" (a)
+		: "1" (a), "r" (mask)
+		: "t", "memory"
+	);
+}
+
+static inline int test_and_set_bit(int nr, volatile void * addr)
+{
+	int	mask, retval;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! test_and_set_bit	\n\t"
+		"mov		%0, %2				\n\t"
+		"or		%4, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		"and		%4, %2				\n\t"
+		: "=&z" (tmp), "=r" (a), "=&r" (retval)
+		: "1" (a), "r" (mask)
+		: "t", "memory"
+	);
+
+	return retval != 0;
+}
+
+static inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+	int	mask, retval;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! test_and_clear_bit	\n\t"
+		"mov		%0, %2				\n\t"
+		"and		%5, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		"and		%4, %2				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=r" (a), "=&r" (retval)
+		: "1" (a), "r" (mask), "r" (~mask)
+		: "t", "memory"
+	);
+
+	return retval != 0;
+}
+
+static inline int test_and_change_bit(int nr, volatile void * addr)
+{
+	int	mask, retval;
+	volatile unsigned int *a = addr;
+	unsigned long tmp;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! test_and_change_bit	\n\t"
+		"mov		%0, %2				\n\t"
+		"xor		%4, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		"and		%4, %2				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=r" (a), "=&r" (retval)
+		: "1" (a), "r" (mask)
+		: "t", "memory"
+	);
+
+	return retval != 0;
+}
+
+#endif /* __ASM_SH_BITOPS_LLSC_H */
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index d7d382f63ee..367930d8e5a 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -13,6 +13,8 @@
 
 #ifdef CONFIG_GUSA_RB
 #include <asm/bitops-grb.h>
+#elif defined(CONFIG_CPU_SH4A)
+#include <asm/bitops-llsc.h>
 #else
 #include <asm/bitops-irq.h>
 #endif
diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 720dfab7b15..f9c88583d90 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -39,6 +39,7 @@ struct clk {
 
 /* Should be defined by processor-specific code */
 void arch_init_clk_ops(struct clk_ops **, int type);
+int __init arch_clk_init(void);
 
 /* arch/sh/kernel/cpu/clock.c */
 int clk_init(void);
diff --git a/arch/sh/include/asm/cmpxchg-llsc.h b/arch/sh/include/asm/cmpxchg-llsc.h
new file mode 100644
index 00000000000..aee3bf28658
--- /dev/null
+++ b/arch/sh/include/asm/cmpxchg-llsc.h
@@ -0,0 +1,71 @@
+#ifndef __ASM_SH_CMPXCHG_LLSC_H
+#define __ASM_SH_CMPXCHG_LLSC_H
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long retval;
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:					\n\t"
+		"movli.l	@%1, %0	! xchg_u32	\n\t"
+		"mov		%0, %2			\n\t"
+		"mov		%4, %0			\n\t"
+		"movco.l	%0, @%1			\n\t"
+		"bf		1b			\n\t"
+		"synco					\n\t"
+		: "=&z"(tmp), "=r" (m), "=&r" (retval)
+		: "1" (m), "r" (val)
+		: "t", "memory"
+	);
+
+	return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+	unsigned long retval;
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:					\n\t"
+		"movli.l	@%1, %0	! xchg_u8	\n\t"
+		"mov		%0, %2			\n\t"
+		"mov		%4, %0			\n\t"
+		"movco.l	%0, @%1			\n\t"
+		"bf		1b			\n\t"
+		"synco					\n\t"
+		: "=&z"(tmp), "=r" (m), "=&r" (retval)
+		: "1" (m), "r" (val & 0xff)
+		: "t", "memory"
+	);
+
+	return retval;
+}
+
+static inline unsigned long
+__cmpxchg_u32(volatile int *m, unsigned long old, unsigned long new)
+{
+	unsigned long retval;
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! __cmpxchg_u32		\n\t"
+		"mov		%0, %2				\n\t"
+		"cmp/eq		%2, %4				\n\t"
+		"bf		2f				\n\t"
+		"mov		%5, %0				\n\t"
+		"2:						\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=r" (m), "=&r" (retval)
+		: "1" (m), "r" (old), "r" (new)
+		: "t", "memory"
+	);
+
+	return retval;
+}
+
+#endif /* __ASM_SH_CMPXCHG_LLSC_H */
diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h
index ee02db110f0..9eb9036a1bd 100644
--- a/arch/sh/include/asm/elf.h
+++ b/arch/sh/include/asm/elf.h
@@ -108,6 +108,14 @@ typedef struct user_fpu_struct elf_fpregset_t;
 #define elf_check_fdpic(x)		((x)->e_flags & EF_SH_FDPIC)
 #define elf_check_const_displacement(x)	((x)->e_flags & EF_SH_PIC)
 
+#ifdef CONFIG_SUPERH32
+/*
+ * Enable dump using regset.
+ * This covers all of general/DSP/FPU regs.
+ */
+#define CORE_DUMP_USE_REGSET
+#endif
+
 #define USE_ELF_CORE_DUMP
 #define ELF_FDPIC_CORE_EFLAGS	EF_SH_FDPIC
 #define ELF_EXEC_PAGESIZE	PAGE_SIZE
@@ -190,12 +198,6 @@ do {									\
 #endif
 
 #define SET_PERSONALITY(ex) set_personality(PER_LINUX_32BIT)
-struct task_struct;
-extern int dump_task_regs (struct task_struct *, elf_gregset_t *);
-extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
-
-#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
-#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 
 #ifdef CONFIG_VSYSCALL
 /* vDSO has arch_setup_additional_pages */
diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h
index 91462fea150..1d3aee04b5c 100644
--- a/arch/sh/include/asm/fpu.h
+++ b/arch/sh/include/asm/fpu.h
@@ -30,8 +30,15 @@ static inline void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
 }
 #endif
 
+struct user_regset;
+
 extern int do_fpu_inst(unsigned short, struct pt_regs *);
 
+extern int fpregs_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf);
+
 static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs)
 {
 	preempt_disable();
@@ -50,6 +57,18 @@ static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs)
 	preempt_enable();
 }
 
+static inline int init_fpu(struct task_struct *tsk)
+{
+	if (tsk_used_math(tsk)) {
+		if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current)
+			unlazy_fpu(tsk, task_pt_regs(tsk));
+		return 0;
+	}
+
+	set_stopped_child_used_math(tsk);
+	return 0;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_SH_FPU_H */
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
new file mode 100644
index 00000000000..3aed362c946
--- /dev/null
+++ b/arch/sh/include/asm/ftrace.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_SH_FTRACE_H
+#define __ASM_SH_FTRACE_H
+
+#ifndef __ASSEMBLY__
+extern void mcount(void);
+#endif
+
+#endif /* __ASM_SH_FTRACE_H */
diff --git a/arch/sh/include/asm/gpio.h b/arch/sh/include/asm/gpio.h
index cf32bd2df88..9650e7c9c39 100644
--- a/arch/sh/include/asm/gpio.h
+++ b/arch/sh/include/asm/gpio.h
@@ -1,9 +1,9 @@
 /*
  *  include/asm-sh/gpio.h
  *
- *  Copyright (C) 2007 Markus Brunner, Mark Jonas
+ * Generic GPIO API and pinmux table support for SuperH.
  *
- *  Addresses for the Pin Function Controller
+ * Copyright (c) 2008 Magnus Damm
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -16,4 +16,92 @@
 #include <cpu/gpio.h>
 #endif
 
+typedef unsigned short pinmux_enum_t;
+typedef unsigned char pinmux_flag_t;
+
+#define PINMUX_TYPE_NONE            0
+#define PINMUX_TYPE_FUNCTION        1
+#define PINMUX_TYPE_GPIO            2
+#define PINMUX_TYPE_OUTPUT          3
+#define PINMUX_TYPE_INPUT           4
+#define PINMUX_TYPE_INPUT_PULLUP    5
+#define PINMUX_TYPE_INPUT_PULLDOWN  6
+
+#define PINMUX_FLAG_TYPE            (0x7)
+#define PINMUX_FLAG_WANT_PULLUP     (1 << 3)
+#define PINMUX_FLAG_WANT_PULLDOWN   (1 << 4)
+
+struct pinmux_gpio {
+	pinmux_enum_t enum_id;
+	pinmux_flag_t flags;
+};
+
+#define PINMUX_GPIO(gpio, data_or_mark) [gpio] = { data_or_mark }
+#define PINMUX_DATA(data_or_mark, ids...) data_or_mark, ids, 0
+
+struct pinmux_cfg_reg {
+	unsigned long reg, reg_width, field_width;
+	unsigned long *cnt;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_CFG_REG(name, r, r_width, f_width) \
+	.reg = r, .reg_width = r_width, .field_width = f_width,		\
+	.cnt = (unsigned long [r_width / f_width]) {}, \
+	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)]) \
+
+struct pinmux_data_reg {
+	unsigned long reg, reg_width;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_DATA_REG(name, r, r_width) \
+	.reg = r, .reg_width = r_width,	\
+	.enum_ids = (pinmux_enum_t [r_width]) \
+
+struct pinmux_range {
+	pinmux_enum_t begin;
+	pinmux_enum_t end;
+};
+
+struct pinmux_info {
+	char *name;
+	pinmux_enum_t reserved_id;
+	struct pinmux_range data;
+	struct pinmux_range input;
+	struct pinmux_range input_pd;
+	struct pinmux_range input_pu;
+	struct pinmux_range output;
+	struct pinmux_range mark;
+	struct pinmux_range function;
+
+	unsigned first_gpio, last_gpio;
+
+	struct pinmux_gpio *gpios;
+	struct pinmux_cfg_reg *cfg_regs;
+	struct pinmux_data_reg *data_regs;
+
+	pinmux_enum_t *gpio_data;
+	unsigned int gpio_data_size;
+
+	unsigned long *gpio_in_use;
+};
+
+int register_pinmux(struct pinmux_info *pip);
+
+int __gpio_request(unsigned gpio);
+static inline int gpio_request(unsigned gpio, const char *label)
+{
+	return __gpio_request(gpio);
+}
+void gpio_free(unsigned gpio);
+int gpio_direction_input(unsigned gpio);
+int gpio_direction_output(unsigned gpio, int value);
+int gpio_get_value(unsigned gpio);
+void gpio_set_value(unsigned gpio, int value);
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return 0;
+}
+
 #endif /* __ASM_SH_GPIO_H */
diff --git a/arch/sh/include/asm/hw_irq.h b/arch/sh/include/asm/hw_irq.h
index d557b00111b..603cdde813d 100644
--- a/arch/sh/include/asm/hw_irq.h
+++ b/arch/sh/include/asm/hw_irq.h
@@ -2,6 +2,7 @@
 #define __ASM_SH_HW_IRQ_H
 
 #include <linux/init.h>
+#include <linux/sh_intc.h>
 #include <asm/atomic.h>
 
 extern atomic_t irq_err_count;
@@ -23,101 +24,12 @@ struct ipr_desc {
 
 void register_ipr_controller(struct ipr_desc *);
 
-typedef unsigned char intc_enum;
-
-struct intc_vect {
-	intc_enum enum_id;
-	unsigned short vect;
-};
-
-#define INTC_VECT(enum_id, vect) { enum_id, vect }
-#define INTC_IRQ(enum_id, irq) INTC_VECT(enum_id, irq2evt(irq))
-
-struct intc_group {
-	intc_enum enum_id;
-	intc_enum enum_ids[32];
-};
-
-#define INTC_GROUP(enum_id, ids...) { enum_id, { ids } }
-
-struct intc_mask_reg {
-	unsigned long set_reg, clr_reg, reg_width;
-	intc_enum enum_ids[32];
-#ifdef CONFIG_SMP
-	unsigned long smp;
-#endif
-};
-
-struct intc_prio_reg {
-	unsigned long set_reg, clr_reg, reg_width, field_width;
-	intc_enum enum_ids[16];
-#ifdef CONFIG_SMP
-	unsigned long smp;
-#endif
-};
-
-struct intc_sense_reg {
-	unsigned long reg, reg_width, field_width;
-	intc_enum enum_ids[16];
-};
-
-#ifdef CONFIG_SMP
-#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8)
-#else
-#define INTC_SMP(stride, nr)
-#endif
-
-struct intc_desc {
-	struct intc_vect *vectors;
-	unsigned int nr_vectors;
-	struct intc_group *groups;
-	unsigned int nr_groups;
-	struct intc_mask_reg *mask_regs;
-	unsigned int nr_mask_regs;
-	struct intc_prio_reg *prio_regs;
-	unsigned int nr_prio_regs;
-	struct intc_sense_reg *sense_regs;
-	unsigned int nr_sense_regs;
-	char *name;
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
-	struct intc_mask_reg *ack_regs;
-	unsigned int nr_ack_regs;
-#endif
-};
-
-#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a)
-#define DECLARE_INTC_DESC(symbol, chipname, vectors, groups,		\
-	mask_regs, prio_regs, sense_regs)				\
-struct intc_desc symbol __initdata = {					\
-	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
-	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
-	_INTC_ARRAY(sense_regs),					\
-	chipname,							\
-}
-
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
-#define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups,	\
-	mask_regs, prio_regs, sense_regs, ack_regs)			\
-struct intc_desc symbol __initdata = {					\
-	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
-	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
-	_INTC_ARRAY(sense_regs),					\
-	chipname,							\
-	_INTC_ARRAY(ack_regs),						\
-}
-#endif
-
-void __init register_intc_controller(struct intc_desc *desc);
-int intc_set_priority(unsigned int irq, unsigned int prio);
-
 void __init plat_irq_setup(void);
-#ifdef CONFIG_CPU_SH3
 void __init plat_irq_setup_sh3(void);
-#endif
+void __init plat_irq_setup_pins(int mode);
 
 enum { IRQ_MODE_IRQ, IRQ_MODE_IRQ7654, IRQ_MODE_IRQ3210,
        IRQ_MODE_IRL7654_MASK, IRQ_MODE_IRL3210_MASK,
        IRQ_MODE_IRL7654, IRQ_MODE_IRL3210 };
-void __init plat_irq_setup_pins(int mode);
 
 #endif /* __ASM_SH_HW_IRQ_H */
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index a4fbf0c84fb..436c2853957 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -1,27 +1,26 @@
 #ifndef __ASM_SH_IO_H
 #define __ASM_SH_IO_H
-
 /*
  * Convention:
- *    read{b,w,l}/write{b,w,l} are for PCI,
+ *    read{b,w,l,q}/write{b,w,l,q} are for PCI,
  *    while in{b,w,l}/out{b,w,l} are for ISA
- * These may (will) be platform specific function.
+ *
  * In addition we have 'pausing' versions: in{b,w,l}_p/out{b,w,l}_p
  * and 'string' versions: ins{b,w,l}/outs{b,w,l}
- * For read{b,w,l} and write{b,w,l} there are also __raw versions, which
- * do not have a memory barrier after them.
  *
- * In addition, we have
- *   ctrl_in{b,w,l}/ctrl_out{b,w,l} for SuperH specific I/O.
- *   which are processor specific.
- */
-
-/*
- * We follow the Alpha convention here:
- *  __inb expands to an inline function call (which calls via the mv)
- *  _inb  is a real function call (note ___raw fns are _ version of __raw)
- *  inb   by default expands to _inb, but the machine specific code may
- *        define it to __inb if it chooses.
+ * While read{b,w,l,q} and write{b,w,l,q} contain memory barriers
+ * automatically, there are also __raw versions, which do not.
+ *
+ * Historically, we have also had ctrl_in{b,w,l,q}/ctrl_out{b,w,l,q} for
+ * SuperH specific I/O (raw I/O to on-chip CPU peripherals). In practice
+ * these have the same semantics as the __raw variants, and as such, all
+ * new code should be using the __raw versions.
+ *
+ * All ISA I/O routines are wrapped through the machine vector. If a
+ * board does not provide overrides, a generic set that are copied in
+ * from the default machine vector are used instead. These are largely
+ * for old compat code for I/O offseting to SuperIOs, all of which are
+ * better handled through the machvec ioport mapping routines these days.
  */
 #include <asm/cache.h>
 #include <asm/system.h>
@@ -31,7 +30,6 @@
 #include <asm-generic/iomap.h>
 
 #ifdef __KERNEL__
-
 /*
  * Depending on which platform we are running on, we need different
  * I/O functions.
@@ -40,105 +38,68 @@
 #include <asm/io_generic.h>
 #include <asm/io_trapped.h>
 
-#define maybebadio(port) \
-  printk(KERN_ERR "bad PC-like io %s:%u for port 0x%lx at 0x%08x\n", \
-	 __FUNCTION__, __LINE__, (port), (u32)__builtin_return_address(0))
-
-/*
- * Since boards are able to define their own set of I/O routines through
- * their respective machine vector, we always wrap through the mv.
- *
- * Also, in the event that a board hasn't provided its own definition for
- * a given routine, it will be wrapped to generic code at run-time.
- */
+#define inb(p)			sh_mv.mv_inb((p))
+#define inw(p)			sh_mv.mv_inw((p))
+#define inl(p)			sh_mv.mv_inl((p))
+#define outb(x,p)		sh_mv.mv_outb((x),(p))
+#define outw(x,p)		sh_mv.mv_outw((x),(p))
+#define outl(x,p)		sh_mv.mv_outl((x),(p))
+
+#define inb_p(p)		sh_mv.mv_inb_p((p))
+#define inw_p(p)		sh_mv.mv_inw_p((p))
+#define inl_p(p)		sh_mv.mv_inl_p((p))
+#define outb_p(x,p)		sh_mv.mv_outb_p((x),(p))
+#define outw_p(x,p)		sh_mv.mv_outw_p((x),(p))
+#define outl_p(x,p)		sh_mv.mv_outl_p((x),(p))
+
+#define insb(p,b,c)		sh_mv.mv_insb((p), (b), (c))
+#define insw(p,b,c)		sh_mv.mv_insw((p), (b), (c))
+#define insl(p,b,c)		sh_mv.mv_insl((p), (b), (c))
+#define outsb(p,b,c)		sh_mv.mv_outsb((p), (b), (c))
+#define outsw(p,b,c)		sh_mv.mv_outsw((p), (b), (c))
+#define outsl(p,b,c)		sh_mv.mv_outsl((p), (b), (c))
+
+#define __raw_writeb(v,a)	(__chk_io_ptr(a), *(volatile u8  __force *)(a) = (v))
+#define __raw_writew(v,a)	(__chk_io_ptr(a), *(volatile u16 __force *)(a) = (v))
+#define __raw_writel(v,a)	(__chk_io_ptr(a), *(volatile u32 __force *)(a) = (v))
+#define __raw_writeq(v,a)	(__chk_io_ptr(a), *(volatile u64 __force *)(a) = (v))
+
+#define __raw_readb(a)		(__chk_io_ptr(a), *(volatile u8  __force *)(a))
+#define __raw_readw(a)		(__chk_io_ptr(a), *(volatile u16 __force *)(a))
+#define __raw_readl(a)		(__chk_io_ptr(a), *(volatile u32 __force *)(a))
+#define __raw_readq(a)		(__chk_io_ptr(a), *(volatile u64 __force *)(a))
+
+#define readb(a)		({ u8  r_ = __raw_readb(a); mb(); r_; })
+#define readw(a)		({ u16 r_ = __raw_readw(a); mb(); r_; })
+#define readl(a)		({ u32 r_ = __raw_readl(a); mb(); r_; })
+#define readq(a)		({ u64 r_ = __raw_readq(a); mb(); r_; })
+
+#define writeb(v,a)		({ __raw_writeb((v),(a)); mb(); })
+#define writew(v,a)		({ __raw_writew((v),(a)); mb(); })
+#define writel(v,a)		({ __raw_writel((v),(a)); mb(); })
+#define writeq(v,a)		({ __raw_writeq((v),(a)); mb(); })
 
-#define __inb(p)	sh_mv.mv_inb((p))
-#define __inw(p)	sh_mv.mv_inw((p))
-#define __inl(p)	sh_mv.mv_inl((p))
-#define __outb(x,p)	sh_mv.mv_outb((x),(p))
-#define __outw(x,p)	sh_mv.mv_outw((x),(p))
-#define __outl(x,p)	sh_mv.mv_outl((x),(p))
-
-#define __inb_p(p)	sh_mv.mv_inb_p((p))
-#define __inw_p(p)	sh_mv.mv_inw_p((p))
-#define __inl_p(p)	sh_mv.mv_inl_p((p))
-#define __outb_p(x,p)	sh_mv.mv_outb_p((x),(p))
-#define __outw_p(x,p)	sh_mv.mv_outw_p((x),(p))
-#define __outl_p(x,p)	sh_mv.mv_outl_p((x),(p))
-
-#define __insb(p,b,c)	sh_mv.mv_insb((p), (b), (c))
-#define __insw(p,b,c)	sh_mv.mv_insw((p), (b), (c))
-#define __insl(p,b,c)	sh_mv.mv_insl((p), (b), (c))
-#define __outsb(p,b,c)	sh_mv.mv_outsb((p), (b), (c))
-#define __outsw(p,b,c)	sh_mv.mv_outsw((p), (b), (c))
-#define __outsl(p,b,c)	sh_mv.mv_outsl((p), (b), (c))
-
-#define __readb(a)	sh_mv.mv_readb((a))
-#define __readw(a)	sh_mv.mv_readw((a))
-#define __readl(a)	sh_mv.mv_readl((a))
-#define __writeb(v,a)	sh_mv.mv_writeb((v),(a))
-#define __writew(v,a)	sh_mv.mv_writew((v),(a))
-#define __writel(v,a)	sh_mv.mv_writel((v),(a))
-
-#define inb		__inb
-#define inw		__inw
-#define inl		__inl
-#define outb		__outb
-#define outw		__outw
-#define outl		__outl
-
-#define inb_p		__inb_p
-#define inw_p		__inw_p
-#define inl_p		__inl_p
-#define outb_p		__outb_p
-#define outw_p		__outw_p
-#define outl_p		__outl_p
-
-#define insb		__insb
-#define insw		__insw
-#define insl		__insl
-#define outsb		__outsb
-#define outsw		__outsw
-#define outsl		__outsl
-
-#define __raw_readb(a)		__readb((void __iomem *)(a))
-#define __raw_readw(a)		__readw((void __iomem *)(a))
-#define __raw_readl(a)		__readl((void __iomem *)(a))
-#define __raw_writeb(v, a)	__writeb(v, (void __iomem *)(a))
-#define __raw_writew(v, a)	__writew(v, (void __iomem *)(a))
-#define __raw_writel(v, a)	__writel(v, (void __iomem *)(a))
-
-void __raw_writesl(unsigned long addr, const void *data, int longlen);
-void __raw_readsl(unsigned long addr, void *data, int longlen);
+/* SuperH on-chip I/O functions */
+#define ctrl_inb		__raw_readb
+#define ctrl_inw		__raw_readw
+#define ctrl_inl		__raw_readl
+#define ctrl_inq		__raw_readq
 
-/*
- * The platform header files may define some of these macros to use
- * the inlined versions where appropriate.  These macros may also be
- * redefined by userlevel programs.
- */
-#ifdef __readb
-# define readb(a)	({ unsigned int r_ = __raw_readb(a); mb(); r_; })
-#endif
-#ifdef __raw_readw
-# define readw(a)	({ unsigned int r_ = __raw_readw(a); mb(); r_; })
-#endif
-#ifdef __raw_readl
-# define readl(a)	({ unsigned int r_ = __raw_readl(a); mb(); r_; })
-#endif
+#define ctrl_outb		__raw_writeb
+#define ctrl_outw		__raw_writew
+#define ctrl_outl		__raw_writel
+#define ctrl_outq		__raw_writeq
 
-#ifdef __raw_writeb
-# define writeb(v,a)	({ __raw_writeb((v),(a)); mb(); })
-#endif
-#ifdef __raw_writew
-# define writew(v,a)	({ __raw_writew((v),(a)); mb(); })
-#endif
-#ifdef __raw_writel
-# define writel(v,a)	({ __raw_writel((v),(a)); mb(); })
+static inline void ctrl_delay(void)
+{
+#ifdef P2SEG
+	__raw_readw(P2SEG);
 #endif
+}
 
 #define __BUILD_MEMORY_STRING(bwlq, type)				\
 									\
-static inline void writes##bwlq(volatile void __iomem *mem,		\
+static inline void __raw_writes##bwlq(volatile void __iomem *mem,	\
 				const void *addr, unsigned int count)	\
 {									\
 	const volatile type *__addr = addr;				\
@@ -149,8 +110,8 @@ static inline void writes##bwlq(volatile void __iomem *mem,		\
 	}								\
 }									\
 									\
-static inline void reads##bwlq(volatile void __iomem *mem, void *addr,	\
-			       unsigned int count)			\
+static inline void __raw_reads##bwlq(volatile void __iomem *mem,	\
+			       void *addr, unsigned int count)		\
 {									\
 	volatile type *__addr = addr;					\
 									\
@@ -162,106 +123,71 @@ static inline void reads##bwlq(volatile void __iomem *mem, void *addr,	\
 
 __BUILD_MEMORY_STRING(b, u8)
 __BUILD_MEMORY_STRING(w, u16)
-#define writesl __raw_writesl
-#define readsl  __raw_readsl
+__BUILD_MEMORY_STRING(q, u64)
+
+void __raw_writesl(void __iomem *addr, const void *data, int longlen);
+void __raw_readsl(const void __iomem *addr, void *data, int longlen);
+
+#define writesb			__raw_writesb
+#define writesw			__raw_writesw
+#define writesl			__raw_writesl
+
+#define readsb			__raw_readsb
+#define readsw			__raw_readsw
+#define readsl			__raw_readsl
 
-#define readb_relaxed(a) readb(a)
-#define readw_relaxed(a) readw(a)
-#define readl_relaxed(a) readl(a)
+#define readb_relaxed(a)	readb(a)
+#define readw_relaxed(a)	readw(a)
+#define readl_relaxed(a)	readl(a)
+#define readq_relaxed(a)	readq(a)
 
 /* Simple MMIO */
-#define ioread8(a)		readb(a)
-#define ioread16(a)		readw(a)
+#define ioread8(a)		__raw_readb(a)
+#define ioread16(a)		__raw_readw(a)
 #define ioread16be(a)		be16_to_cpu(__raw_readw((a)))
-#define ioread32(a)		readl(a)
+#define ioread32(a)		__raw_readl(a)
 #define ioread32be(a)		be32_to_cpu(__raw_readl((a)))
 
-#define iowrite8(v,a)		writeb((v),(a))
-#define iowrite16(v,a)		writew((v),(a))
+#define iowrite8(v,a)		__raw_writeb((v),(a))
+#define iowrite16(v,a)		__raw_writew((v),(a))
 #define iowrite16be(v,a)	__raw_writew(cpu_to_be16((v)),(a))
-#define iowrite32(v,a)		writel((v),(a))
+#define iowrite32(v,a)		__raw_writel((v),(a))
 #define iowrite32be(v,a)	__raw_writel(cpu_to_be32((v)),(a))
 
-#define ioread8_rep(a, d, c)	readsb((a), (d), (c))
-#define ioread16_rep(a, d, c)	readsw((a), (d), (c))
-#define ioread32_rep(a, d, c)	readsl((a), (d), (c))
+#define ioread8_rep(a, d, c)	__raw_readsb((a), (d), (c))
+#define ioread16_rep(a, d, c)	__raw_readsw((a), (d), (c))
+#define ioread32_rep(a, d, c)	__raw_readsl((a), (d), (c))
 
-#define iowrite8_rep(a, s, c)	writesb((a), (s), (c))
-#define iowrite16_rep(a, s, c)	writesw((a), (s), (c))
-#define iowrite32_rep(a, s, c)	writesl((a), (s), (c))
+#define iowrite8_rep(a, s, c)	__raw_writesb((a), (s), (c))
+#define iowrite16_rep(a, s, c)	__raw_writesw((a), (s), (c))
+#define iowrite32_rep(a, s, c)	__raw_writesl((a), (s), (c))
 
-#define mmiowb()	wmb()	/* synco on SH-4A, otherwise a nop */
+/* synco on SH-4A, otherwise a nop */
+#define mmiowb()		wmb()
 
 #define IO_SPACE_LIMIT 0xffffffff
 
+extern unsigned long generic_io_base;
+
 /*
- * This function provides a method for the generic case where a board-specific
- * ioport_map simply needs to return the port + some arbitrary port base.
+ * This function provides a method for the generic case where a
+ * board-specific ioport_map simply needs to return the port + some
+ * arbitrary port base.
  *
  * We use this at board setup time to implicitly set the port base, and
  * as a result, we can use the generic ioport_map.
  */
 static inline void __set_io_port_base(unsigned long pbase)
 {
-	extern unsigned long generic_io_base;
-
 	generic_io_base = pbase;
 }
 
 #define __ioport_map(p, n) sh_mv.mv_ioport_map((p), (n))
 
 /* We really want to try and get these to memcpy etc */
-extern void memcpy_fromio(void *, volatile void __iomem *, unsigned long);
-extern void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
-extern void memset_io(volatile void __iomem *, int, unsigned long);
-
-/* SuperH on-chip I/O functions */
-static inline unsigned char ctrl_inb(unsigned long addr)
-{
-	return *(volatile unsigned char*)addr;
-}
-
-static inline unsigned short ctrl_inw(unsigned long addr)
-{
-	return *(volatile unsigned short*)addr;
-}
-
-static inline unsigned int ctrl_inl(unsigned long addr)
-{
-	return *(volatile unsigned long*)addr;
-}
-
-static inline unsigned long long ctrl_inq(unsigned long addr)
-{
-	return *(volatile unsigned long long*)addr;
-}
-
-static inline void ctrl_outb(unsigned char b, unsigned long addr)
-{
-	*(volatile unsigned char*)addr = b;
-}
-
-static inline void ctrl_outw(unsigned short b, unsigned long addr)
-{
-	*(volatile unsigned short*)addr = b;
-}
-
-static inline void ctrl_outl(unsigned int b, unsigned long addr)
-{
-        *(volatile unsigned long*)addr = b;
-}
-
-static inline void ctrl_outq(unsigned long long b, unsigned long addr)
-{
-	*(volatile unsigned long long*)addr = b;
-}
-
-static inline void ctrl_delay(void)
-{
-#ifdef P2SEG
-	ctrl_inw(P2SEG);
-#endif
-}
+void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
+void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
+void memset_io(volatile void __iomem *, int, unsigned long);
 
 /* Quad-word real-mode I/O, don't ask.. */
 unsigned long long peek_real_address_q(unsigned long long addr);
@@ -347,9 +273,15 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 	__ioremap_mode((offset), (size), _PAGE_CACHABLE)
 #define p3_ioremap(offset, size, flags)			\
 	__ioremap((offset), (size), (flags))
+#define ioremap_prot(offset, size, flags)		\
+	__ioremap_mode((offset), (size), (flags))
 #define iounmap(addr)					\
 	__iounmap((addr))
 
+#define maybebadio(port) \
+	printk(KERN_ERR "bad PC-like io %s:%u for port 0x%lx at 0x%08x\n", \
+	       __func__, __LINE__, (port), (u32)__builtin_return_address(0))
+
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
  * access
diff --git a/arch/sh/include/asm/io_generic.h b/arch/sh/include/asm/io_generic.h
index 92fc6070d7b..1e5d375f55d 100644
--- a/arch/sh/include/asm/io_generic.h
+++ b/arch/sh/include/asm/io_generic.h
@@ -33,13 +33,6 @@ void IO_CONCAT(__IO_PREFIX,outsb)(unsigned long, const void *src, unsigned long
 void IO_CONCAT(__IO_PREFIX,outsw)(unsigned long, const void *src, unsigned long count);
 void IO_CONCAT(__IO_PREFIX,outsl)(unsigned long, const void *src, unsigned long count);
 
-u8 IO_CONCAT(__IO_PREFIX,readb)(void __iomem *);
-u16 IO_CONCAT(__IO_PREFIX,readw)(void __iomem *);
-u32 IO_CONCAT(__IO_PREFIX,readl)(void __iomem *);
-void IO_CONCAT(__IO_PREFIX,writeb)(u8, void __iomem *);
-void IO_CONCAT(__IO_PREFIX,writew)(u16, void __iomem *);
-void IO_CONCAT(__IO_PREFIX,writel)(u32, void __iomem *);
-
 void *IO_CONCAT(__IO_PREFIX,ioremap)(unsigned long offset, unsigned long size);
 void IO_CONCAT(__IO_PREFIX,iounmap)(void *addr);
 
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index 6195a531c1b..d319baaf4fb 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -41,6 +41,9 @@ static inline int generic_irq_demux(int irq)
 #define irq_canonicalize(irq)	(irq)
 #define irq_demux(irq)		sh_mv.mv_irq_demux(irq)
 
+void init_IRQ(void);
+asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs);
+
 #ifdef CONFIG_IRQSTACKS
 extern void irq_ctx_init(int cpu);
 extern void irq_ctx_exit(int cpu);
diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
new file mode 100644
index 00000000000..6078d8e551d
--- /dev/null
+++ b/arch/sh/include/asm/kprobes.h
@@ -0,0 +1,58 @@
+#ifndef __ASM_SH_KPROBES_H
+#define __ASM_SH_KPROBES_H
+
+#ifdef CONFIG_KPROBES
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+typedef u16 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0xc33a
+
+#define MAX_INSN_SIZE 16
+#define MAX_STACK_SIZE 64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	? (MAX_STACK_SIZE) \
+	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+
+#define regs_return_value(regs)		((regs)->regs[0])
+#define flush_insn_slot(p)		do { } while (0)
+#define kretprobe_blacklist_size	0
+
+struct kprobe;
+
+void arch_remove_kprobe(struct kprobe *);
+void kretprobe_trampoline(void);
+void jprobe_return_end(void);
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+	/* copy of the original instruction */
+	kprobe_opcode_t insn[MAX_INSN_SIZE];
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long jprobe_saved_r15;
+	struct pt_regs jprobe_saved_regs;
+	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+	struct prev_kprobe prev_kprobe;
+};
+
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+extern int kprobe_handle_illslot(unsigned long pc);
+#else
+
+#define kprobe_handle_illslot(pc)	(-1)
+
+#endif /* CONFIG_KPROBES */
+#endif /* __ASM_SH_KPROBES_H */
diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h
index b2e4124070a..f1bae02ef7b 100644
--- a/arch/sh/include/asm/machvec.h
+++ b/arch/sh/include/asm/machvec.h
@@ -42,13 +42,6 @@ struct sh_machine_vector {
 	void (*mv_outsw)(unsigned long, const void *src, unsigned long count);
 	void (*mv_outsl)(unsigned long, const void *src, unsigned long count);
 
-	u8 (*mv_readb)(void __iomem *);
-	u16 (*mv_readw)(void __iomem *);
-	u32 (*mv_readl)(void __iomem *);
-	void (*mv_writeb)(u8, void __iomem *);
-	void (*mv_writew)(u16, void __iomem *);
-	void (*mv_writel)(u32, void __iomem *);
-
 	int (*mv_irq_demux)(int irq);
 
 	void (*mv_init_irq)(void);
diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h
index 2969253c404..7f5363b29ba 100644
--- a/arch/sh/include/asm/mmzone.h
+++ b/arch/sh/include/asm/mmzone.h
@@ -4,6 +4,8 @@
 #ifdef __KERNEL__
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
+#include <linux/numa.h>
+
 extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)		(node_data[nid])
 
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 77fb8bf02e4..5871d78e47e 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -104,6 +104,8 @@ typedef struct { unsigned long pgd; } pgd_t;
 
 typedef struct page *pgtable_t;
 
+#define pte_pgprot(x) __pgprot(pte_val(x) & PTE_FLAGS_MASK)
+
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index a4a8f8b9346..52220d70a09 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -76,6 +76,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #endif
 
 #define PTE_PHYS_MASK		(PHYS_ADDR_MASK & PAGE_MASK)
+#define PTE_FLAGS_MASK		(~(PTE_PHYS_MASK) << PAGE_SHIFT)
 
 #ifdef CONFIG_SUPERH32
 #define VMALLOC_START	(P3SEG)
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 15d9f92ca38..693364a20ad 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -3,6 +3,7 @@
 
 #include <asm/cpu-features.h>
 #include <asm/segment.h>
+#include <asm/cache.h>
 
 #ifndef __ASSEMBLY__
 /*
@@ -43,11 +44,52 @@ enum cpu_type {
 	CPU_SH_NONE
 };
 
+/*
+ * TLB information structure
+ *
+ * Defined for both I and D tlb, per-processor.
+ */
+struct tlb_info {
+	unsigned long long next;
+	unsigned long long first;
+	unsigned long long last;
+
+	unsigned int entries;
+	unsigned int step;
+
+	unsigned long flags;
+};
+
+struct sh_cpuinfo {
+	unsigned int type;
+	int cut_major, cut_minor;
+	unsigned long loops_per_jiffy;
+	unsigned long asid_cache;
+
+	struct cache_info icache;	/* Primary I-cache */
+	struct cache_info dcache;	/* Primary D-cache */
+	struct cache_info scache;	/* Secondary cache */
+
+	/* TLB info */
+	struct tlb_info itlb;
+	struct tlb_info dtlb;
+
+	unsigned long flags;
+} __attribute__ ((aligned(L1_CACHE_BYTES)));
+
+extern struct sh_cpuinfo cpu_data[];
+#define boot_cpu_data cpu_data[0]
+#define current_cpu_data cpu_data[smp_processor_id()]
+#define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
+
 /* Forward decl */
-struct sh_cpuinfo;
+struct seq_operations;
+
+extern struct pt_regs fake_swapper_regs;
 
 /* arch/sh/kernel/setup.c */
 const char *get_cpu_subtype(struct sh_cpuinfo *c);
+extern const struct seq_operations cpuinfo_op;
 
 #ifdef CONFIG_VSYSCALL
 int vsyscall_init(void);
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 0dadd75bd93..a46a0207e97 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -10,9 +10,9 @@
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
+#include <linux/linkage.h>
 #include <asm/page.h>
 #include <asm/types.h>
-#include <asm/cache.h>
 #include <asm/ptrace.h>
 
 /*
@@ -26,23 +26,7 @@
 #define CCN_CVR		0xff000040
 #define CCN_PRR		0xff000044
 
-struct sh_cpuinfo {
-	unsigned int type;
-	int cut_major, cut_minor;
-	unsigned long loops_per_jiffy;
-	unsigned long asid_cache;
-
-	struct cache_info icache;	/* Primary I-cache */
-	struct cache_info dcache;	/* Primary D-cache */
-	struct cache_info scache;	/* Secondary cache */
-
-	unsigned long flags;
-} __attribute__ ((aligned(L1_CACHE_BYTES)));
-
-extern struct sh_cpuinfo cpu_data[];
-#define boot_cpu_data cpu_data[0]
-#define current_cpu_data cpu_data[smp_processor_id()]
-#define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
+asmlinkage void __init sh_cpu_init(void);
 
 /*
  * User space process size: 2GB.
@@ -196,6 +180,8 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_EIP(tsk)  (task_pt_regs(tsk)->pc)
 #define KSTK_ESP(tsk)  (task_pt_regs(tsk)->regs[15])
 
+#define user_stack_pointer(regs)	((regs)->regs[15])
+
 #define cpu_sleep()	__asm__ __volatile__ ("sleep" : : : "memory")
 #define cpu_relax()	barrier()
 
diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h
index 770d5169983..b0b4824dfc4 100644
--- a/arch/sh/include/asm/processor_64.h
+++ b/arch/sh/include/asm/processor_64.h
@@ -17,7 +17,6 @@
 #include <linux/compiler.h>
 #include <asm/page.h>
 #include <asm/types.h>
-#include <asm/cache.h>
 #include <asm/ptrace.h>
 #include <cpu/registers.h>
 
@@ -36,46 +35,6 @@ __asm__("gettr	tr0, %1\n\t" \
 	: "1" (__dummy)); \
 pc; })
 
-/*
- * TLB information structure
- *
- * Defined for both I and D tlb, per-processor.
- */
-struct tlb_info {
-	unsigned long long next;
-	unsigned long long first;
-	unsigned long long last;
-
-	unsigned int entries;
-	unsigned int step;
-
-	unsigned long flags;
-};
-
-struct sh_cpuinfo {
-	enum cpu_type type;
-	unsigned long loops_per_jiffy;
-	unsigned long asid_cache;
-
-	unsigned int cpu_clock, master_clock, bus_clock, module_clock;
-
-	/* Cache info */
-	struct cache_info icache;
-	struct cache_info dcache;
-	struct cache_info scache;
-
-	/* TLB info */
-	struct tlb_info itlb;
-	struct tlb_info dtlb;
-
-	unsigned long flags;
-};
-
-extern struct sh_cpuinfo cpu_data[];
-#define boot_cpu_data cpu_data[0]
-#define current_cpu_data cpu_data[smp_processor_id()]
-#define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
-
 #endif
 
 /*
@@ -169,8 +128,6 @@ struct thread_struct {
 #define INIT_MMAP \
 { &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
 
-extern  struct pt_regs fake_swapper_regs;
-
 #define INIT_THREAD  {				\
 	.sp		= sizeof(init_stack) +	\
 			  (long) &init_stack,	\
@@ -269,6 +226,8 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_EIP(tsk)  ((tsk)->thread.pc)
 #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
 
+#define user_stack_pointer(regs)	((regs)->sp)
+
 #define cpu_relax()	barrier()
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index b86aeabba61..3ad18e91bca 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -87,12 +87,18 @@ struct pt_dspregs {
 	unsigned long	mod;
 };
 
+#define PTRACE_GETREGS		12	/* General registers */
+#define PTRACE_SETREGS		13
+
+#define PTRACE_GETFPREGS	14	/* FPU registers */
+#define PTRACE_SETFPREGS	15
+
 #define PTRACE_GETFDPIC		31	/* get the ELF fdpic loadmap address */
 
 #define PTRACE_GETFDPIC_EXEC	0	/* [addr] request the executable loadmap */
 #define PTRACE_GETFDPIC_INTERP	1	/* [addr] request the interpreter loadmap */
 
-#define	PTRACE_GETDSPREGS	55
+#define	PTRACE_GETDSPREGS	55	/* DSP registers */
 #define	PTRACE_SETDSPREGS	56
 #endif
 
@@ -117,6 +123,9 @@ extern void user_disable_single_step(struct task_struct *);
 #define task_pt_regs(task) \
 	((struct pt_regs *) (task_stack_page(task) + THREAD_SIZE \
 		 - sizeof(struct pt_dspregs) - sizeof(unsigned long)) - 1)
+#define task_pt_dspregs(task) \
+	((struct pt_dspregs *) (task_stack_page(task) + THREAD_SIZE \
+		 - sizeof(unsigned long)) - 1)
 #else
 #define task_pt_regs(task) \
 	((struct pt_regs *) (task_stack_page(task) + THREAD_SIZE \
diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h
index 1813f4202a2..f7b010d48af 100644
--- a/arch/sh/include/asm/rtc.h
+++ b/arch/sh/include/asm/rtc.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_RTC_H
 #define _ASM_RTC_H
 
+void time_init(void);
 extern void (*board_time_init)(void);
 extern void (*rtc_sh_get_time)(struct timespec *);
 extern int (*rtc_sh_set_time)(const time_t);
diff --git a/arch/sh/include/asm/setup.h b/arch/sh/include/asm/setup.h
index 55a2bd328d9..d450bcf59ee 100644
--- a/arch/sh/include/asm/setup.h
+++ b/arch/sh/include/asm/setup.h
@@ -4,7 +4,6 @@
 #define COMMAND_LINE_SIZE 256
 
 #ifdef __KERNEL__
-
 /*
  * This is set up by the setup-routine at boot-time
  */
diff --git a/arch/sh/include/asm/sizes.h b/arch/sh/include/asm/sizes.h
new file mode 100644
index 00000000000..3a1fb97770f
--- /dev/null
+++ b/arch/sh/include/asm/sizes.h
@@ -0,0 +1,61 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/* DO NOT EDIT!! - this file automatically generated
+ *                 from .s file by awk -f s2h.awk
+ */
+/*  Size definitions
+ *  Copyright (C) ARM Limited 1998. All rights reserved.
+ */
+
+#ifndef __sizes_h
+#define __sizes_h                       1
+
+/* handy sizes */
+#define SZ_16				0x00000010
+#define SZ_32				0x00000020
+#define SZ_64				0x00000040
+#define SZ_128				0x00000080
+#define SZ_256				0x00000100
+#define SZ_512				0x00000200
+
+#define SZ_1K                           0x00000400
+#define SZ_4K                           0x00001000
+#define SZ_8K                           0x00002000
+#define SZ_16K                          0x00004000
+#define SZ_32K				0x00008000
+#define SZ_64K                          0x00010000
+#define SZ_128K                         0x00020000
+#define SZ_256K                         0x00040000
+#define SZ_512K                         0x00080000
+
+#define SZ_1M                           0x00100000
+#define SZ_2M                           0x00200000
+#define SZ_4M                           0x00400000
+#define SZ_8M                           0x00800000
+#define SZ_16M                          0x01000000
+#define SZ_26M				0x01a00000
+#define SZ_32M                          0x02000000
+#define SZ_64M                          0x04000000
+#define SZ_128M                         0x08000000
+#define SZ_256M                         0x10000000
+#define SZ_512M                         0x20000000
+
+#define SZ_1G                           0x40000000
+#define SZ_2G                           0x80000000
+
+#endif
+
+/*         END */
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 593343cd26e..85b660c17eb 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -21,25 +21,29 @@ extern int __cpu_number_map[NR_CPUS];
 extern int __cpu_logical_map[NR_CPUS];
 #define cpu_logical_map(cpu)  __cpu_logical_map[cpu]
 
-/* I've no idea what the real meaning of this is */
-#define PROC_CHANGE_PENALTY	20
+enum {
+	SMP_MSG_FUNCTION,
+	SMP_MSG_RESCHEDULE,
+	SMP_MSG_FUNCTION_SINGLE,
+	SMP_MSG_TIMER,
 
-#define NO_PROC_ID	(-1)
+	SMP_MSG_NR,	/* must be last */
+};
 
-#define SMP_MSG_FUNCTION	0
-#define SMP_MSG_RESCHEDULE	1
-#define SMP_MSG_FUNCTION_SINGLE	2
-#define SMP_MSG_NR		3
+void smp_message_recv(unsigned int msg);
+void smp_timer_broadcast(cpumask_t mask);
+
+void local_timer_interrupt(void);
+void local_timer_setup(unsigned int cpu);
 
 void plat_smp_setup(void);
 void plat_prepare_cpus(unsigned int max_cpus);
 int plat_smp_processor_id(void);
 void plat_start_cpu(unsigned int cpu, unsigned long entry_point);
 void plat_send_ipi(unsigned int cpu, unsigned int message);
-int plat_register_ipi_handler(unsigned int message,
-			      void (*handler)(void *), void *arg);
-extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+
+void arch_send_call_function_single_ipi(int cpu);
+void arch_send_call_function_ipi(cpumask_t mask);
 
 #else
 
diff --git a/arch/sh/include/asm/syscall.h b/arch/sh/include/asm/syscall.h
new file mode 100644
index 00000000000..6a381429ee9
--- /dev/null
+++ b/arch/sh/include/asm/syscall.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_SH_SYSCALL_H
+#define __ASM_SH_SYSCALL_H
+
+#ifdef CONFIG_SUPERH32
+# include "syscall_32.h"
+#else
+# include "syscall_64.h"
+#endif
+
+#endif /* __ASM_SH_SYSCALL_H */
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
new file mode 100644
index 00000000000..54773f26cd4
--- /dev/null
+++ b/arch/sh/include/asm/syscall_32.h
@@ -0,0 +1,110 @@
+#ifndef __ASM_SH_SYSCALL_32_H
+#define __ASM_SH_SYSCALL_32_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+/* The system call number is given by the user in %g1 */
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return (regs->tra >= 0) ? regs->regs[3] : -1L;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/*
+	 * XXX: This needs some thought. On SH we don't
+	 * save away the original r0 value anywhere.
+	 */
+}
+
+static inline bool syscall_has_error(struct pt_regs *regs)
+{
+	return (regs->sr & 0x1) ? true : false;
+}
+static inline void syscall_set_error(struct pt_regs *regs)
+{
+	regs->sr |= 0x1;
+}
+static inline void syscall_clear_error(struct pt_regs *regs)
+{
+	regs->sr &= ~0x1;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return syscall_has_error(regs) ? regs->regs[0] : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error) {
+		syscall_set_error(regs);
+		regs->regs[0] = -error;
+	} else {
+		syscall_clear_error(regs);
+		regs->regs[0] = val;
+	}
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	/*
+	 * Do this simply for now. If we need to start supporting
+	 * fetching arguments from arbitrary indices, this will need some
+	 * extra logic. Presently there are no in-tree users that depend
+	 * on this behaviour.
+	 */
+	BUG_ON(i);
+
+	/* Argument pattern is: R4, R5, R6, R7, R0, R1 */
+	switch (n) {
+	case 6: args[5] = regs->regs[1];
+	case 5: args[4] = regs->regs[0];
+	case 4: args[3] = regs->regs[7];
+	case 3: args[2] = regs->regs[6];
+	case 2: args[1] = regs->regs[5];
+	case 1:	args[0] = regs->regs[4];
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	/* Same note as above applies */
+	BUG_ON(i);
+
+	switch (n) {
+	case 6: regs->regs[1] = args[5];
+	case 5: regs->regs[0] = args[4];
+	case 4: regs->regs[7] = args[3];
+	case 3: regs->regs[6] = args[2];
+	case 2: regs->regs[5] = args[1];
+	case 1: regs->regs[4] = args[0];
+		break;
+	default:
+		BUG();
+	}
+}
+
+#endif /* __ASM_SH_SYSCALL_32_H */
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
new file mode 100644
index 00000000000..bcaaa8ca4d7
--- /dev/null
+++ b/arch/sh/include/asm/syscall_64.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_SH_SYSCALL_64_H
+#define __ASM_SH_SYSCALL_64_H
+
+#include <asm-generic/syscall.h>
+
+#endif /* __ASM_SH_SYSCALL_64_H */
diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h
new file mode 100644
index 00000000000..c1e2b8deb83
--- /dev/null
+++ b/arch/sh/include/asm/syscalls.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_SH_SYSCALLS_H
+#define __ASM_SH_SYSCALLS_H
+
+#ifdef __KERNEL__
+
+struct old_utsname;
+
+asmlinkage int old_mmap(unsigned long addr, unsigned long len,
+			unsigned long prot, unsigned long flags,
+			int fd, unsigned long off);
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, unsigned long pgoff);
+asmlinkage int sys_ipc(uint call, int first, int second,
+		       int third, void __user *ptr, long fifth);
+asmlinkage int sys_uname(struct old_utsname __user *name);
+
+#ifdef CONFIG_SUPERH32
+# include "syscalls_32.h"
+#else
+# include "syscalls_64.h"
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_SH_SYSCALLS_H */
diff --git a/arch/sh/include/asm/syscalls_32.h b/arch/sh/include/asm/syscalls_32.h
new file mode 100644
index 00000000000..104c5e68610
--- /dev/null
+++ b/arch/sh/include/asm/syscalls_32.h
@@ -0,0 +1,56 @@
+#ifndef __ASM_SH_SYSCALLS_32_H
+#define __ASM_SH_SYSCALLS_32_H
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+struct pt_regs;
+
+asmlinkage int sys_fork(unsigned long r4, unsigned long r5,
+			unsigned long r6, unsigned long r7,
+			struct pt_regs __regs);
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long parent_tidptr,
+			 unsigned long child_tidptr,
+			 struct pt_regs __regs);
+asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7,
+			 struct pt_regs __regs);
+asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv,
+			  char __user * __user *uenvp, unsigned long r7,
+			  struct pt_regs __regs);
+asmlinkage int sys_sigsuspend(old_sigset_t mask, unsigned long r5,
+			      unsigned long r6, unsigned long r7,
+			      struct pt_regs __regs);
+asmlinkage int sys_sigaction(int sig, const struct old_sigaction __user *act,
+			     struct old_sigaction __user *oact);
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+			       unsigned long r6, unsigned long r7,
+			       struct pt_regs __regs);
+asmlinkage int sys_sigreturn(unsigned long r4, unsigned long r5,
+			     unsigned long r6, unsigned long r7,
+			     struct pt_regs __regs);
+asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs __regs);
+asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
+			unsigned long r6, unsigned long r7,
+			struct pt_regs __regs);
+asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char __user *buf,
+				     size_t count, long dummy, loff_t pos);
+asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char __user *buf,
+				      size_t count, long dummy, loff_t pos);
+asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
+					u32 len0, u32 len1, int advice);
+
+/* Misc syscall related bits */
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
+				 unsigned long thread_info_flags);
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_SH_SYSCALLS_32_H */
diff --git a/arch/sh/include/asm/syscalls_64.h b/arch/sh/include/asm/syscalls_64.h
new file mode 100644
index 00000000000..751fd881136
--- /dev/null
+++ b/arch/sh/include/asm/syscalls_64.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_SH_SYSCALLS_64_H
+#define __ASM_SH_SYSCALLS_64_H
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+struct pt_regs;
+
+asmlinkage int sys_fork(unsigned long r2, unsigned long r3,
+			unsigned long r4, unsigned long r5,
+			unsigned long r6, unsigned long r7,
+			struct pt_regs *pregs);
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7,
+			 struct pt_regs *pregs);
+asmlinkage int sys_vfork(unsigned long r2, unsigned long r3,
+			 unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7,
+			 struct pt_regs *pregs);
+asmlinkage int sys_execve(char *ufilename, char **uargv,
+			  char **uenvp, unsigned long r5,
+			  unsigned long r6, unsigned long r7,
+			  struct pt_regs *pregs);
+
+/* Misc syscall related bits */
+asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_SH_SYSCALLS_64_H */
diff --git a/arch/sh/include/asm/system.h b/arch/sh/include/asm/system.h
index 056d68cd210..6160fe44516 100644
--- a/arch/sh/include/asm/system.h
+++ b/arch/sh/include/asm/system.h
@@ -70,6 +70,8 @@
 
 #ifdef CONFIG_GUSA_RB
 #include <asm/cmpxchg-grb.h>
+#elif defined(CONFIG_CPU_SH4A)
+#include <asm/cmpxchg-llsc.h>
 #else
 #include <asm/cmpxchg-irq.h>
 #endif
@@ -125,6 +127,8 @@ static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
   })
 
 extern void die(const char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
+void free_initmem(void);
+void free_initrd_mem(unsigned long start, unsigned long end);
 
 extern void *set_exception_table_vec(unsigned int vec, void *handler);
 
@@ -177,8 +181,8 @@ BUILD_TRAP_HANDLER(fpu_state_restore);
 #define arch_align_stack(x) (x)
 
 struct mem_access {
-	unsigned long (*from)(void *dst, const void *src, unsigned long cnt);
-	unsigned long (*to)(void *dst, const void *src, unsigned long cnt);
+	unsigned long (*from)(void *dst, const void __user *src, unsigned long cnt);
+	unsigned long (*to)(void __user *dst, const void *src, unsigned long cnt);
 };
 
 #ifdef CONFIG_SUPERH32
diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index f11bcf0855e..a726d5d0727 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -58,7 +58,8 @@ do {									\
 	last = __last;							\
 } while (0)
 
-#define __uses_jump_to_uncached __attribute__ ((__section__ (".uncached.text")))
+#define __uses_jump_to_uncached \
+	noinline __attribute__ ((__section__ (".uncached.text")))
 
 /*
  * Jump to uncached area.
@@ -96,7 +97,48 @@ do {							\
 		: "=&r" (__dummy));			\
 } while (0)
 
+#ifdef CONFIG_CPU_HAS_SR_RB
+#define lookup_exception_vector()	\
+({					\
+	unsigned long _vec;		\
+					\
+	__asm__ __volatile__ (		\
+		"stc r2_bank, %0\n\t"	\
+		: "=r" (_vec)		\
+	);				\
+					\
+	_vec;				\
+})
+#else
+#define lookup_exception_vector()	\
+({					\
+	unsigned long _vec;		\
+	__asm__ __volatile__ (		\
+		"mov r4, %0\n\t"	\
+		: "=r" (_vec)		\
+	);				\
+					\
+	_vec;				\
+})
+#endif
+
 int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma);
 
+asmlinkage void do_address_error(struct pt_regs *regs,
+				 unsigned long writeaccess,
+				 unsigned long address);
+asmlinkage void do_divide_error(unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs __regs);
+asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs __regs);
+asmlinkage void do_illegal_slot_inst(unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs __regs);
+asmlinkage void do_exception_error(unsigned long r4, unsigned long r5,
+				   unsigned long r6, unsigned long r7,
+				   struct pt_regs __regs);
+
 #endif /* __ASM_SH_SYSTEM_32_H */
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 0a894cafb1d..f09ac480629 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -33,20 +33,12 @@ struct thread_info {
 #define PREEMPT_ACTIVE		0x10000000
 
 #if defined(CONFIG_4KSTACKS)
-#define THREAD_SIZE_ORDER	(0)
-#elif defined(CONFIG_PAGE_SIZE_4KB)
-#define THREAD_SIZE_ORDER	(1)
-#elif defined(CONFIG_PAGE_SIZE_8KB)
-#define THREAD_SIZE_ORDER	(1)
-#elif defined(CONFIG_PAGE_SIZE_16KB)
-#define THREAD_SIZE_ORDER	(0)
-#elif defined(CONFIG_PAGE_SIZE_64KB)
-#define THREAD_SIZE_ORDER	(0)
+#define THREAD_SHIFT	12
 #else
-#error "Unknown thread size"
+#define THREAD_SHIFT	13
 #endif
 
-#define THREAD_SIZE	(PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SIZE	(1 << THREAD_SHIFT)
 #define STACK_WARN	(THREAD_SIZE >> 3)
 
 /*
@@ -94,15 +86,19 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
+/* thread information allocation */
+#if THREAD_SHIFT >= PAGE_SHIFT
+
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
+
+#else /* THREAD_SHIFT < PAGE_SHIFT */
+
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(ti)	kzalloc(THREAD_SIZE, GFP_KERNEL)
-#else
-#define alloc_thread_info(ti)	kmalloc(THREAD_SIZE, GFP_KERNEL)
-#endif
-#define free_thread_info(ti)	kfree(ti)
+extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
+extern void free_thread_info(struct thread_info *ti);
+ 
+#endif /* THREAD_SHIFT < PAGE_SHIFT */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h
index 5580fd47100..56fd20b8cdc 100644
--- a/arch/sh/include/asm/uaccess_64.h
+++ b/arch/sh/include/asm/uaccess_64.h
@@ -26,16 +26,20 @@ do {								\
 	retval = 0;						\
 	switch (size) {						\
 	case 1:							\
-		retval = __get_user_asm_b(x, ptr);		\
+		retval = __get_user_asm_b((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 2:							\
-		retval = __get_user_asm_w(x, ptr);		\
+		retval = __get_user_asm_w((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 4:							\
-		retval = __get_user_asm_l(x, ptr);		\
+		retval = __get_user_asm_l((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 8:							\
-		retval = __get_user_asm_q(x, ptr);		\
+		retval = __get_user_asm_q((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	default:						\
 		__get_user_unknown();				\
@@ -54,16 +58,20 @@ do {								\
 	retval = 0;						\
 	switch (size) {						\
 	case 1:							\
-		retval = __put_user_asm_b(x, ptr);		\
+		retval = __put_user_asm_b((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 2:							\
-		retval = __put_user_asm_w(x, ptr);		\
+		retval = __put_user_asm_w((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 4:							\
-		retval = __put_user_asm_l(x, ptr);		\
+		retval = __put_user_asm_l((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	case 8:							\
-		retval = __put_user_asm_q(x, ptr);		\
+		retval = __put_user_asm_q((void *)&x,		\
+					  (long)ptr);		\
 		break;						\
 	default:						\
 		__put_user_unknown();				\
@@ -77,5 +85,7 @@ extern long __put_user_asm_q(void *, long);
 extern void __put_user_unknown(void);
 
 extern long __strnlen_user(const char *__s, long __n);
+extern int __strncpy_from_user(unsigned long __dest,
+	       unsigned long __user __src, int __count);
 
 #endif /* __ASM_SH_UACCESS_64_H */
diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7203.h b/arch/sh/include/cpu-sh2a/cpu/sh7203.h
new file mode 100644
index 00000000000..79f93159018
--- /dev/null
+++ b/arch/sh/include/cpu-sh2a/cpu/sh7203.h
@@ -0,0 +1,143 @@
+#ifndef __ASM_SH7203_H__
+#define __ASM_SH7203_H__
+
+enum {
+	/* PA */
+	GPIO_PA7, GPIO_PA6, GPIO_PA5, GPIO_PA4,
+	GPIO_PA3, GPIO_PA2, GPIO_PA1, GPIO_PA0,
+
+	/* PB */
+	GPIO_PB12,
+	GPIO_PB11, GPIO_PB10, GPIO_PB9, GPIO_PB8,
+	GPIO_PB7, GPIO_PB6, GPIO_PB5, GPIO_PB4,
+	GPIO_PB3, GPIO_PB2, GPIO_PB1, GPIO_PB0,
+
+	/* PC */
+	GPIO_PC14, GPIO_PC13, GPIO_PC12,
+	GPIO_PC11, GPIO_PC10, GPIO_PC9, GPIO_PC8,
+	GPIO_PC7, GPIO_PC6, GPIO_PC5, GPIO_PC4,
+	GPIO_PC3, GPIO_PC2, GPIO_PC1, GPIO_PC0,
+
+	/* PD */
+	GPIO_PD15, GPIO_PD14, GPIO_PD13, GPIO_PD12,
+	GPIO_PD11, GPIO_PD10, GPIO_PD9, GPIO_PD8,
+	GPIO_PD7, GPIO_PD6, GPIO_PD5, GPIO_PD4,
+	GPIO_PD3, GPIO_PD2, GPIO_PD1, GPIO_PD0,
+
+	/* PE */
+	GPIO_PE15, GPIO_PE14, GPIO_PE13, GPIO_PE12,
+	GPIO_PE11, GPIO_PE10, GPIO_PE9, GPIO_PE8,
+	GPIO_PE7, GPIO_PE6, GPIO_PE5, GPIO_PE4,
+	GPIO_PE3, GPIO_PE2, GPIO_PE1, GPIO_PE0,
+
+	/* PF */
+	GPIO_PF30, GPIO_PF29, GPIO_PF28,
+	GPIO_PF27, GPIO_PF26, GPIO_PF25, GPIO_PF24,
+	GPIO_PF23, GPIO_PF22, GPIO_PF21, GPIO_PF20,
+	GPIO_PF19, GPIO_PF18, GPIO_PF17, GPIO_PF16,
+	GPIO_PF15, GPIO_PF14, GPIO_PF13, GPIO_PF12,
+	GPIO_PF11, GPIO_PF10, GPIO_PF9, GPIO_PF8,
+	GPIO_PF7, GPIO_PF6, GPIO_PF5, GPIO_PF4,
+	GPIO_PF3, GPIO_PF2, GPIO_PF1, GPIO_PF0,
+
+	/* INTC: IRQ and PINT on PB/PD/PE */
+	GPIO_FN_PINT7_PB, GPIO_FN_PINT6_PB, GPIO_FN_PINT5_PB, GPIO_FN_PINT4_PB,
+	GPIO_FN_PINT3_PB, GPIO_FN_PINT2_PB, GPIO_FN_PINT1_PB, GPIO_FN_PINT0_PB,
+	GPIO_FN_PINT7_PD, GPIO_FN_PINT6_PD, GPIO_FN_PINT5_PD, GPIO_FN_PINT4_PD,
+	GPIO_FN_PINT3_PD, GPIO_FN_PINT2_PD, GPIO_FN_PINT1_PD, GPIO_FN_PINT0_PD,
+	GPIO_FN_IRQ7_PB, GPIO_FN_IRQ6_PB, GPIO_FN_IRQ5_PB, GPIO_FN_IRQ4_PB,
+	GPIO_FN_IRQ3_PB, GPIO_FN_IRQ2_PB, GPIO_FN_IRQ1_PB, GPIO_FN_IRQ0_PB,
+	GPIO_FN_IRQ7_PD, GPIO_FN_IRQ6_PD, GPIO_FN_IRQ5_PD, GPIO_FN_IRQ4_PD,
+	GPIO_FN_IRQ3_PD, GPIO_FN_IRQ2_PD, GPIO_FN_IRQ1_PD, GPIO_FN_IRQ0_PD,
+	GPIO_FN_IRQ7_PE, GPIO_FN_IRQ6_PE, GPIO_FN_IRQ5_PE, GPIO_FN_IRQ4_PE,
+	GPIO_FN_IRQ3_PE, GPIO_FN_IRQ2_PE, GPIO_FN_IRQ1_PE, GPIO_FN_IRQ0_PE,
+
+	GPIO_FN_WDTOVF, GPIO_FN_IRQOUT, GPIO_FN_REFOUT, GPIO_FN_IRQOUT_REFOUT,
+	GPIO_FN_UBCTRG,
+
+	/* CAN */
+	GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1,
+	GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1,
+
+	/* IIC3 */
+	GPIO_FN_SDA3, GPIO_FN_SCL3,
+	GPIO_FN_SDA2, GPIO_FN_SCL2,
+	GPIO_FN_SDA1, GPIO_FN_SCL1,
+	GPIO_FN_SDA0, GPIO_FN_SCL0,
+
+	/* DMAC */
+	GPIO_FN_TEND0_PD, GPIO_FN_TEND0_PE, GPIO_FN_DACK0_PD,
+	GPIO_FN_DACK0_PE, GPIO_FN_DREQ0_PD, GPIO_FN_DREQ0_PE,
+	GPIO_FN_TEND1_PD, GPIO_FN_TEND1_PE, GPIO_FN_DACK1_PD,
+	GPIO_FN_DACK1_PE, GPIO_FN_DREQ1_PD, GPIO_FN_DREQ1_PE,
+	GPIO_FN_DACK2, GPIO_FN_DREQ2,
+	GPIO_FN_DACK3, GPIO_FN_DREQ3,
+
+	/* ADC */
+	GPIO_FN_ADTRG_PD, GPIO_FN_ADTRG_PE,
+
+	/* BSC */
+	GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28,
+	GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24,
+	GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20,
+	GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16,
+	GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_A21, GPIO_FN_CS4, GPIO_FN_MRES, GPIO_FN_BS,
+	GPIO_FN_IOIS16, GPIO_FN_CS1, GPIO_FN_CS6_CE1B,
+	GPIO_FN_CE2B, GPIO_FN_CS5_CE1A, GPIO_FN_CE2A,
+	GPIO_FN_FRAME, GPIO_FN_WAIT, GPIO_FN_RDWR,
+	GPIO_FN_CKE, GPIO_FN_CASU, GPIO_FN_BREQ, GPIO_FN_RASU,
+	GPIO_FN_BACK, GPIO_FN_CASL, GPIO_FN_RASL,
+	GPIO_FN_WE3_DQMUU_AH_ICIO_WR, GPIO_FN_WE2_DQMUL_ICIORD,
+	GPIO_FN_WE1_DQMLU_WE, GPIO_FN_WE0_DQMLL,
+	GPIO_FN_CS3, GPIO_FN_CS2, GPIO_FN_A1, GPIO_FN_A0, GPIO_FN_CS7,
+
+	/* TMU */
+	GPIO_FN_TIOC4D, GPIO_FN_TIOC4C, GPIO_FN_TIOC4B, GPIO_FN_TIOC4A,
+	GPIO_FN_TIOC3D, GPIO_FN_TIOC3C, GPIO_FN_TIOC3B, GPIO_FN_TIOC3A,
+	GPIO_FN_TIOC2B, GPIO_FN_TIOC1B, GPIO_FN_TIOC2A, GPIO_FN_TIOC1A,
+	GPIO_FN_TIOC0D, GPIO_FN_TIOC0C, GPIO_FN_TIOC0B, GPIO_FN_TIOC0A,
+	GPIO_FN_TCLKD_PD, GPIO_FN_TCLKC_PD, GPIO_FN_TCLKB_PD, GPIO_FN_TCLKA_PD,
+	GPIO_FN_TCLKD_PF, GPIO_FN_TCLKC_PF, GPIO_FN_TCLKB_PF, GPIO_FN_TCLKA_PF,
+
+	/* SSU */
+	GPIO_FN_SCS0_PD, GPIO_FN_SSO0_PD, GPIO_FN_SSI0_PD, GPIO_FN_SSCK0_PD,
+	GPIO_FN_SCS0_PF, GPIO_FN_SSO0_PF, GPIO_FN_SSI0_PF, GPIO_FN_SSCK0_PF,
+	GPIO_FN_SCS1_PD, GPIO_FN_SSO1_PD, GPIO_FN_SSI1_PD, GPIO_FN_SSCK1_PD,
+	GPIO_FN_SCS1_PF, GPIO_FN_SSO1_PF, GPIO_FN_SSI1_PF, GPIO_FN_SSCK1_PF,
+
+	/* SCIF */
+	GPIO_FN_TXD0, GPIO_FN_RXD0, GPIO_FN_SCK0,
+	GPIO_FN_TXD1, GPIO_FN_RXD1, GPIO_FN_SCK1,
+	GPIO_FN_TXD2, GPIO_FN_RXD2, GPIO_FN_SCK2,
+	GPIO_FN_RTS3, GPIO_FN_CTS3, GPIO_FN_TXD3, GPIO_FN_RXD3, GPIO_FN_SCK3,
+
+	/* SSI */
+	GPIO_FN_AUDIO_CLK,
+	GPIO_FN_SSIDATA3, GPIO_FN_SSIWS3, GPIO_FN_SSISCK3,
+	GPIO_FN_SSIDATA2, GPIO_FN_SSIWS2, GPIO_FN_SSISCK2,
+	GPIO_FN_SSIDATA1, GPIO_FN_SSIWS1, GPIO_FN_SSISCK1,
+	GPIO_FN_SSIDATA0, GPIO_FN_SSIWS0, GPIO_FN_SSISCK0,
+
+	/* FLCTL */
+	GPIO_FN_FCE, GPIO_FN_FRB,
+	GPIO_FN_NAF7, GPIO_FN_NAF6, GPIO_FN_NAF5, GPIO_FN_NAF4,
+	GPIO_FN_NAF3, GPIO_FN_NAF2, GPIO_FN_NAF1, GPIO_FN_NAF0,
+	GPIO_FN_FSC, GPIO_FN_FOE, GPIO_FN_FCDE, GPIO_FN_FWE,
+
+	/* LCDC */
+	GPIO_FN_LCD_VEPWC, GPIO_FN_LCD_VCPWC,
+	GPIO_FN_LCD_CLK, GPIO_FN_LCD_FLM,
+	GPIO_FN_LCD_M_DISP, GPIO_FN_LCD_CL2,
+	GPIO_FN_LCD_CL1, GPIO_FN_LCD_DON,
+	GPIO_FN_LCD_DATA15, GPIO_FN_LCD_DATA14,
+	GPIO_FN_LCD_DATA13, GPIO_FN_LCD_DATA12,
+	GPIO_FN_LCD_DATA11, GPIO_FN_LCD_DATA10,
+	GPIO_FN_LCD_DATA9, GPIO_FN_LCD_DATA8,
+	GPIO_FN_LCD_DATA7, GPIO_FN_LCD_DATA6,
+	GPIO_FN_LCD_DATA5, GPIO_FN_LCD_DATA4,
+	GPIO_FN_LCD_DATA3, GPIO_FN_LCD_DATA2,
+	GPIO_FN_LCD_DATA1, GPIO_FN_LCD_DATA0,
+};
+
+#endif /* __ASM_SH7203_H__ */
diff --git a/arch/sh/include/cpu-sh3/cpu/sh7720.h b/arch/sh/include/cpu-sh3/cpu/sh7720.h
new file mode 100644
index 00000000000..41c1406d6da
--- /dev/null
+++ b/arch/sh/include/cpu-sh3/cpu/sh7720.h
@@ -0,0 +1,174 @@
+#ifndef __ASM_SH7720_H__
+#define __ASM_SH7720_H__
+
+enum {
+	/* PTA */
+	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
+	GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0,
+
+	/* PTB */
+	GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4,
+	GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0,
+
+	/* PTC */
+	GPIO_PTC7, GPIO_PTC6, GPIO_PTC5, GPIO_PTC4,
+	GPIO_PTC3, GPIO_PTC2, GPIO_PTC1, GPIO_PTC0,
+
+	/* PTD */
+	GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4,
+	GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0,
+
+	/* PTE */
+	GPIO_PTE6, GPIO_PTE5, GPIO_PTE4, GPIO_PTE3,
+	GPIO_PTE2, GPIO_PTE1, GPIO_PTE0,
+
+	/* PTF */
+	GPIO_PTF6, GPIO_PTF5, GPIO_PTF4, GPIO_PTF3,
+	GPIO_PTF2, GPIO_PTF1, GPIO_PTF0, GPIO_PTG6,
+
+	/* PTG */
+	GPIO_PTG5, GPIO_PTG4, GPIO_PTG3, GPIO_PTG2,
+	GPIO_PTG1, GPIO_PTG0,
+
+	/* PTH */
+	GPIO_PTH6, GPIO_PTH5, GPIO_PTH4, GPIO_PTH3,
+	GPIO_PTH2, GPIO_PTH1, GPIO_PTH0,
+
+	/* PTJ */
+	GPIO_PTJ6, GPIO_PTJ5, GPIO_PTJ4, GPIO_PTJ3,
+	GPIO_PTJ2, GPIO_PTJ1, GPIO_PTJ0,
+
+	/* PTK */
+	GPIO_PTK3, GPIO_PTK2, GPIO_PTK1, GPIO_PTK0,
+
+	/* PTL */
+	GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4, GPIO_PTL3,
+
+	/* PTM */
+	GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4,
+	GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0,
+
+	/* PTP */
+	GPIO_PTP4, GPIO_PTP3, GPIO_PTP2, GPIO_PTP1, GPIO_PTP0,
+
+	/* PTR */
+	GPIO_PTR7, GPIO_PTR6, GPIO_PTR5, GPIO_PTR4,
+	GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0,
+
+	/* PTS */
+	GPIO_PTS4, GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0,
+
+	/* PTT */
+	GPIO_PTT4, GPIO_PTT3, GPIO_PTT2, GPIO_PTT1, GPIO_PTT0,
+
+	/* PTU */
+	GPIO_PTU4, GPIO_PTU3, GPIO_PTU2, GPIO_PTU1, GPIO_PTU0,
+
+	/* PTV */
+	GPIO_PTV4, GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0,
+
+	/* BSC */
+	GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28,
+	GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24,
+	GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20,
+	GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16,
+	GPIO_FN_IOIS16, GPIO_FN_RAS, GPIO_FN_CAS, GPIO_FN_CKE,
+	GPIO_FN_CS5B_CE1A, GPIO_FN_CS6B_CE1B,
+	GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_A21, GPIO_FN_A20, GPIO_FN_A19, GPIO_FN_A0,
+	GPIO_FN_REFOUT, GPIO_FN_IRQOUT,
+
+	/* LCDC */
+	GPIO_FN_LCD_DATA15, GPIO_FN_LCD_DATA14,
+	GPIO_FN_LCD_DATA13, GPIO_FN_LCD_DATA12,
+	GPIO_FN_LCD_DATA11, GPIO_FN_LCD_DATA10,
+	GPIO_FN_LCD_DATA9, GPIO_FN_LCD_DATA8,
+	GPIO_FN_LCD_DATA7, GPIO_FN_LCD_DATA6,
+	GPIO_FN_LCD_DATA5, GPIO_FN_LCD_DATA4,
+	GPIO_FN_LCD_DATA3, GPIO_FN_LCD_DATA2,
+	GPIO_FN_LCD_DATA1, GPIO_FN_LCD_DATA0,
+	GPIO_FN_LCD_M_DISP,
+	GPIO_FN_LCD_CL1, GPIO_FN_LCD_CL2,
+	GPIO_FN_LCD_DON, GPIO_FN_LCD_FLM,
+	GPIO_FN_LCD_VEPWC, GPIO_FN_LCD_VCPWC,
+
+	/* AFEIF */
+	GPIO_FN_AFE_RXIN, GPIO_FN_AFE_RDET,
+	GPIO_FN_AFE_FS, GPIO_FN_AFE_TXOUT,
+	GPIO_FN_AFE_SCLK, GPIO_FN_AFE_RLYCNT,
+	GPIO_FN_AFE_HC1,
+
+	/* IIC */
+	GPIO_FN_IIC_SCL, GPIO_FN_IIC_SDA,
+
+	/* DAC */
+	GPIO_FN_DA1, GPIO_FN_DA0,
+
+	/* ADC */
+	GPIO_FN_AN3, GPIO_FN_AN2, GPIO_FN_AN1, GPIO_FN_AN0, GPIO_FN_ADTRG,
+
+	/* USB */
+	GPIO_FN_USB1D_RCV, GPIO_FN_USB1D_TXSE0,
+	GPIO_FN_USB1D_TXDPLS, GPIO_FN_USB1D_DMNS,
+	GPIO_FN_USB1D_DPLS, GPIO_FN_USB1D_SPEED,
+	GPIO_FN_USB1D_TXENL, GPIO_FN_USB2_PWR_EN,
+	GPIO_FN_USB1_PWR_EN_USBF_UPLUP, GPIO_FN_USB1D_SUSPEND,
+
+	/* INTC */
+	GPIO_FN_IRQ5, GPIO_FN_IRQ4,
+	GPIO_FN_IRQ3_IRL3, GPIO_FN_IRQ2_IRL2,
+	GPIO_FN_IRQ1_IRL1, GPIO_FN_IRQ0_IRL0,
+
+	/* PCC */
+	GPIO_FN_PCC_REG, GPIO_FN_PCC_DRV,
+	GPIO_FN_PCC_BVD2, GPIO_FN_PCC_BVD1,
+	GPIO_FN_PCC_CD2, GPIO_FN_PCC_CD1,
+	GPIO_FN_PCC_RESET, GPIO_FN_PCC_RDY,
+	GPIO_FN_PCC_VS2, GPIO_FN_PCC_VS1,
+
+	/* HUDI */
+	GPIO_FN_AUDATA3, GPIO_FN_AUDATA2, GPIO_FN_AUDATA1, GPIO_FN_AUDATA0,
+	GPIO_FN_AUDCK, GPIO_FN_AUDSYNC, GPIO_FN_ASEBRKAK, GPIO_FN_TRST,
+	GPIO_FN_TMS, GPIO_FN_TDO, GPIO_FN_TDI, GPIO_FN_TCK,
+
+	/* DMAC */
+	GPIO_FN_DACK1, GPIO_FN_DREQ1, GPIO_FN_DACK0, GPIO_FN_DREQ0,
+	GPIO_FN_TEND1, GPIO_FN_TEND0,
+
+	/* SIOF0 */
+	GPIO_FN_SIOF0_SYNC, GPIO_FN_SIOF0_MCLK,
+	GPIO_FN_SIOF0_TXD, GPIO_FN_SIOF0_RXD,
+	GPIO_FN_SIOF0_SCK,
+
+	/* SIOF1 */
+	GPIO_FN_SIOF1_SYNC, GPIO_FN_SIOF1_MCLK,
+	GPIO_FN_SIOF1_TXD, GPIO_FN_SIOF1_RXD,
+	GPIO_FN_SIOF1_SCK,
+
+	/* SCIF0 */
+	GPIO_FN_SCIF0_TXD, GPIO_FN_SCIF0_RXD,
+	GPIO_FN_SCIF0_RTS, GPIO_FN_SCIF0_CTS, GPIO_FN_SCIF0_SCK,
+
+	/* SCIF1 */
+	GPIO_FN_SCIF1_TXD, GPIO_FN_SCIF1_RXD,
+	GPIO_FN_SCIF1_RTS, GPIO_FN_SCIF1_CTS, GPIO_FN_SCIF1_SCK,
+
+	/* TPU */
+	GPIO_FN_TPU_TO1, GPIO_FN_TPU_TO0,
+	GPIO_FN_TPU_TI3B, GPIO_FN_TPU_TI3A,
+	GPIO_FN_TPU_TI2B, GPIO_FN_TPU_TI2A,
+	GPIO_FN_TPU_TO3, GPIO_FN_TPU_TO2,
+
+	/* SIM */
+	GPIO_FN_SIM_D, GPIO_FN_SIM_CLK, GPIO_FN_SIM_RST,
+
+	/* MMC */
+	GPIO_FN_MMC_DAT, GPIO_FN_MMC_CMD,
+	GPIO_FN_MMC_CLK, GPIO_FN_MMC_VDDON,
+	GPIO_FN_MMC_ODMOD,
+
+	/* SYSC */
+	GPIO_FN_STATUS0, GPIO_FN_STATUS1,
+};
+
+#endif /* __ASM_SH7720_H__ */
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h
new file mode 100644
index 00000000000..4b3096f5307
--- /dev/null
+++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h
@@ -0,0 +1,210 @@
+#ifndef __ASM_SH7722_H__
+#define __ASM_SH7722_H__
+
+enum {
+	/* PTA */
+	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
+	GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0,
+
+	/* PTB */
+	GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4,
+	GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0,
+
+	/* PTC */
+	GPIO_PTC7, GPIO_PTC5, GPIO_PTC4, GPIO_PTC3,
+	GPIO_PTC2, GPIO_PTC0,
+
+	/* PTD */
+	GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4,
+	GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0,
+
+	/* PTE */
+	GPIO_PTE7, GPIO_PTE6, GPIO_PTE5, GPIO_PTE4,
+	GPIO_PTE1, GPIO_PTE0,
+
+	/* PTF */
+	GPIO_PTF6, GPIO_PTF5, GPIO_PTF4, GPIO_PTF3,
+	GPIO_PTF2, GPIO_PTF1, GPIO_PTF0,
+
+	/* PTG */
+	GPIO_PTG4, GPIO_PTG3, GPIO_PTG2, GPIO_PTG1, GPIO_PTG0,
+
+	/* PTH */
+	GPIO_PTH7, GPIO_PTH6, GPIO_PTH5, GPIO_PTH4,
+	GPIO_PTH3, GPIO_PTH2, GPIO_PTH1, GPIO_PTH0,
+
+	/* PTJ */
+	GPIO_PTJ7, GPIO_PTJ6, GPIO_PTJ5, GPIO_PTJ1, GPIO_PTJ0,
+
+	/* PTK */
+	GPIO_PTK6, GPIO_PTK5, GPIO_PTK4, GPIO_PTK3,
+	GPIO_PTK2, GPIO_PTK1, GPIO_PTK0,
+
+	/* PTL */
+	GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4,
+	GPIO_PTL3, GPIO_PTL2, GPIO_PTL1, GPIO_PTL0,
+
+	/* PTM */
+	GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4,
+	GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0,
+
+	/* PTN */
+	GPIO_PTN7, GPIO_PTN6, GPIO_PTN5, GPIO_PTN4,
+	GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0,
+
+	/* PTQ */
+	GPIO_PTQ7, GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4,
+	GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0,
+
+	/* PTR */
+	GPIO_PTR4, GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0,
+
+	/* PTS */
+	GPIO_PTS4, GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0,
+
+	/* PTT */
+	GPIO_PTT4, GPIO_PTT3, GPIO_PTT2, GPIO_PTT1, GPIO_PTT0,
+
+	/* PTU */
+	GPIO_PTU4, GPIO_PTU3, GPIO_PTU2, GPIO_PTU1, GPIO_PTU0,
+
+	/* PTV */
+	GPIO_PTV4, GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0,
+
+	/* PTW */
+	GPIO_PTW6, GPIO_PTW5, GPIO_PTW4, GPIO_PTW3,
+	GPIO_PTW2, GPIO_PTW1, GPIO_PTW0,
+
+	/* PTX */
+	GPIO_PTX6, GPIO_PTX5, GPIO_PTX4, GPIO_PTX3,
+	GPIO_PTX2, GPIO_PTX1, GPIO_PTX0,
+
+	/* PTY */
+	GPIO_PTY5, GPIO_PTY4, GPIO_PTY3, GPIO_PTY2,
+	GPIO_PTY1, GPIO_PTY0,
+
+	/* PTZ */
+	GPIO_PTZ5, GPIO_PTZ4, GPIO_PTZ3, GPIO_PTZ2, GPIO_PTZ1,
+
+	/* SCIF0 */
+	GPIO_FN_SCIF0_TXD, GPIO_FN_SCIF0_RXD,
+	GPIO_FN_SCIF0_RTS, GPIO_FN_SCIF0_CTS, GPIO_FN_SCIF0_SCK,
+
+	/* SCIF1 */
+	GPIO_FN_SCIF1_TXD, GPIO_FN_SCIF1_RXD,
+	GPIO_FN_SCIF1_RTS, GPIO_FN_SCIF1_CTS, GPIO_FN_SCIF1_SCK,
+
+	/* SCIF2 */
+	GPIO_FN_SCIF2_TXD, GPIO_FN_SCIF2_RXD,
+	GPIO_FN_SCIF2_RTS, GPIO_FN_SCIF2_CTS, GPIO_FN_SCIF2_SCK,
+
+	/* SIO */
+	GPIO_FN_SIOTXD, GPIO_FN_SIORXD,
+	GPIO_FN_SIOD, GPIO_FN_SIOSTRB0, GPIO_FN_SIOSTRB1,
+	GPIO_FN_SIOSCK, GPIO_FN_SIOMCK,
+
+	/* CEU */
+	GPIO_FN_VIO_D15, GPIO_FN_VIO_D14, GPIO_FN_VIO_D13, GPIO_FN_VIO_D12,
+	GPIO_FN_VIO_D11, GPIO_FN_VIO_D10, GPIO_FN_VIO_D9, GPIO_FN_VIO_D8,
+	GPIO_FN_VIO_D7, GPIO_FN_VIO_D6, GPIO_FN_VIO_D5, GPIO_FN_VIO_D4,
+	GPIO_FN_VIO_D3, GPIO_FN_VIO_D2, GPIO_FN_VIO_D1, GPIO_FN_VIO_D0,
+	GPIO_FN_VIO_FLD, GPIO_FN_VIO_CKO, GPIO_FN_VIO_STEX, GPIO_FN_VIO_STEM,
+	GPIO_FN_VIO_VD, GPIO_FN_VIO_HD, GPIO_FN_VIO_CLK,
+	GPIO_FN_VIO_VD2, GPIO_FN_VIO_HD2, GPIO_FN_VIO_CLK2,
+
+	/* LCDC */
+	GPIO_FN_LCDD23, GPIO_FN_LCDD22, GPIO_FN_LCDD21, GPIO_FN_LCDD20,
+	GPIO_FN_LCDD19, GPIO_FN_LCDD18, GPIO_FN_LCDD17, GPIO_FN_LCDD16,
+	GPIO_FN_LCDD15, GPIO_FN_LCDD14, GPIO_FN_LCDD13, GPIO_FN_LCDD12,
+	GPIO_FN_LCDD11, GPIO_FN_LCDD10, GPIO_FN_LCDD9, GPIO_FN_LCDD8,
+	GPIO_FN_LCDD7, GPIO_FN_LCDD6, GPIO_FN_LCDD5, GPIO_FN_LCDD4,
+	GPIO_FN_LCDD3, GPIO_FN_LCDD2, GPIO_FN_LCDD1, GPIO_FN_LCDD0,
+	GPIO_FN_LCDLCLK,
+	/* Main LCD */
+	GPIO_FN_LCDDON, GPIO_FN_LCDVCPWC, GPIO_FN_LCDVEPWC, GPIO_FN_LCDVSYN,
+	/* Main LCD - RGB Mode */
+	GPIO_FN_LCDDCK, GPIO_FN_LCDHSYN, GPIO_FN_LCDDISP,
+	/* Main LCD - SYS Mode */
+	GPIO_FN_LCDRS, GPIO_FN_LCDCS, GPIO_FN_LCDWR, GPIO_FN_LCDRD,
+	/* Sub LCD - SYS Mode */
+	GPIO_FN_LCDDON2, GPIO_FN_LCDVCPWC2, GPIO_FN_LCDVEPWC2,
+	GPIO_FN_LCDVSYN2, GPIO_FN_LCDCS2,
+
+	/* BSC */
+	GPIO_FN_IOIS16, GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_BS, GPIO_FN_CS6B_CE1B, GPIO_FN_WAIT, GPIO_FN_CS6A_CE2B,
+
+	/* SBSC */
+	GPIO_FN_HPD63, GPIO_FN_HPD62, GPIO_FN_HPD61, GPIO_FN_HPD60,
+	GPIO_FN_HPD59, GPIO_FN_HPD58, GPIO_FN_HPD57, GPIO_FN_HPD56,
+	GPIO_FN_HPD55, GPIO_FN_HPD54, GPIO_FN_HPD53, GPIO_FN_HPD52,
+	GPIO_FN_HPD51, GPIO_FN_HPD50, GPIO_FN_HPD49, GPIO_FN_HPD48,
+	GPIO_FN_HPDQM7, GPIO_FN_HPDQM6, GPIO_FN_HPDQM5, GPIO_FN_HPDQM4,
+
+	/* IRQ */
+	GPIO_FN_IRQ0, GPIO_FN_IRQ1, GPIO_FN_IRQ2, GPIO_FN_IRQ3,
+	GPIO_FN_IRQ4, GPIO_FN_IRQ5, GPIO_FN_IRQ6, GPIO_FN_IRQ7,
+
+	/* SDHI */
+	GPIO_FN_SDHICD, GPIO_FN_SDHIWP, GPIO_FN_SDHID3, GPIO_FN_SDHID2,
+	GPIO_FN_SDHID1, GPIO_FN_SDHID0, GPIO_FN_SDHICMD, GPIO_FN_SDHICLK,
+
+	/* SIU - Port A */
+	GPIO_FN_SIUAOLR, GPIO_FN_SIUAOBT, GPIO_FN_SIUAISLD, GPIO_FN_SIUAILR,
+	GPIO_FN_SIUAIBT, GPIO_FN_SIUAOSLD, GPIO_FN_SIUMCKA, GPIO_FN_SIUFCKA,
+
+	/* SIU - Port B */
+	GPIO_FN_SIUBOLR, GPIO_FN_SIUBOBT, GPIO_FN_SIUBISLD, GPIO_FN_SIUBILR,
+	GPIO_FN_SIUBIBT, GPIO_FN_SIUBOSLD, GPIO_FN_SIUMCKB, GPIO_FN_SIUFCKB,
+
+	/* AUD */
+	GPIO_FN_AUDSYNC, GPIO_FN_AUDATA3, GPIO_FN_AUDATA2, GPIO_FN_AUDATA1,
+	GPIO_FN_AUDATA0,
+
+	/* DMAC */
+	GPIO_FN_DACK, GPIO_FN_DREQ0,
+
+	/* VOU */
+	GPIO_FN_DV_CLKI, GPIO_FN_DV_CLK, GPIO_FN_DV_HSYNC, GPIO_FN_DV_VSYNC,
+	GPIO_FN_DV_D15, GPIO_FN_DV_D14, GPIO_FN_DV_D13, GPIO_FN_DV_D12,
+	GPIO_FN_DV_D11, GPIO_FN_DV_D10, GPIO_FN_DV_D9, GPIO_FN_DV_D8,
+	GPIO_FN_DV_D7, GPIO_FN_DV_D6, GPIO_FN_DV_D5, GPIO_FN_DV_D4,
+	GPIO_FN_DV_D3, GPIO_FN_DV_D2, GPIO_FN_DV_D1, GPIO_FN_DV_D0,
+
+	/* CPG */
+	GPIO_FN_STATUS0, GPIO_FN_PDSTATUS,
+
+	/* SIOF0 */
+	GPIO_FN_SIOF0_MCK, GPIO_FN_SIOF0_SCK,
+	GPIO_FN_SIOF0_SYNC, GPIO_FN_SIOF0_SS1, GPIO_FN_SIOF0_SS2,
+	GPIO_FN_SIOF0_TXD, GPIO_FN_SIOF0_RXD,
+
+	/* SIOF1 */
+	GPIO_FN_SIOF1_MCK, GPIO_FN_SIOF1_SCK,
+	GPIO_FN_SIOF1_SYNC, GPIO_FN_SIOF1_SS1, GPIO_FN_SIOF1_SS2,
+	GPIO_FN_SIOF1_TXD, GPIO_FN_SIOF1_RXD,
+
+	/* SIM */
+	GPIO_FN_SIM_D, GPIO_FN_SIM_CLK, GPIO_FN_SIM_RST,
+
+	/* TSIF */
+	GPIO_FN_TS_SDAT, GPIO_FN_TS_SCK, GPIO_FN_TS_SDEN, GPIO_FN_TS_SPSYNC,
+
+	/* IRDA */
+	GPIO_FN_IRDA_IN, GPIO_FN_IRDA_OUT,
+
+	/* TPU */
+	GPIO_FN_TPUTO,
+
+	/* FLCTL */
+	GPIO_FN_FCE, GPIO_FN_NAF7, GPIO_FN_NAF6, GPIO_FN_NAF5, GPIO_FN_NAF4,
+	GPIO_FN_NAF3, GPIO_FN_NAF2, GPIO_FN_NAF1, GPIO_FN_NAF0, GPIO_FN_FCDE,
+	GPIO_FN_FOE, GPIO_FN_FSC, GPIO_FN_FWE, GPIO_FN_FRB,
+
+	/* KEYSC */
+	GPIO_FN_KEYIN0, GPIO_FN_KEYIN1, GPIO_FN_KEYIN2, GPIO_FN_KEYIN3,
+	GPIO_FN_KEYIN4, GPIO_FN_KEYOUT0, GPIO_FN_KEYOUT1, GPIO_FN_KEYOUT2,
+	GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYOUT5_IN5,
+};
+
+#endif /* __ASM_SH7722_H__ */
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7723.h b/arch/sh/include/cpu-sh4/cpu/sh7723.h
new file mode 100644
index 00000000000..9d2f6d7aa93
--- /dev/null
+++ b/arch/sh/include/cpu-sh4/cpu/sh7723.h
@@ -0,0 +1,254 @@
+#ifndef __ASM_SH7723_H__
+#define __ASM_SH7723_H__
+
+enum {
+	/* PTA */
+	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
+	GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0,
+
+	/* PTB */
+	GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4,
+	GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0,
+
+	/* PTC */
+	GPIO_PTC7, GPIO_PTC6, GPIO_PTC5, GPIO_PTC4,
+	GPIO_PTC3, GPIO_PTC2, GPIO_PTC1, GPIO_PTC0,
+
+	/* PTD */
+	GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4,
+	GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0,
+
+	/* PTE */
+	GPIO_PTE5, GPIO_PTE4, GPIO_PTE3, GPIO_PTE2,
+	GPIO_PTE1, GPIO_PTE0,
+
+	/* PTF */
+	GPIO_PTF7, GPIO_PTF6, GPIO_PTF5, GPIO_PTF4,
+	GPIO_PTF3, GPIO_PTF2, GPIO_PTF1, GPIO_PTF0,
+
+	/* PTG */
+	GPIO_PTG5, GPIO_PTG4, GPIO_PTG3, GPIO_PTG2,
+	GPIO_PTG1, GPIO_PTG0,
+
+	/* PTH */
+	GPIO_PTH7, GPIO_PTH6, GPIO_PTH5, GPIO_PTH4,
+	GPIO_PTH3, GPIO_PTH2, GPIO_PTH1, GPIO_PTH0,
+
+	/* PTJ */
+	GPIO_PTJ7, GPIO_PTJ5, GPIO_PTJ3, GPIO_PTJ2,
+	GPIO_PTJ1, GPIO_PTJ0,
+
+	/* PTK */
+	GPIO_PTK7, GPIO_PTK6, GPIO_PTK5, GPIO_PTK4,
+	GPIO_PTK3, GPIO_PTK2, GPIO_PTK1, GPIO_PTK0,
+
+	/* PTL */
+	GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4,
+	GPIO_PTL3, GPIO_PTL2, GPIO_PTL1, GPIO_PTL0,
+
+	/* PTM */
+	GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4,
+	GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0,
+
+	/* PTN */
+	GPIO_PTN7, GPIO_PTN6, GPIO_PTN5, GPIO_PTN4,
+	GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0,
+
+	/* PTQ */
+	GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0,
+
+	/* PTR */
+	GPIO_PTR7, GPIO_PTR6, GPIO_PTR5, GPIO_PTR4,
+	GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0,
+
+	/* PTS */
+	GPIO_PTS7, GPIO_PTS6, GPIO_PTS5, GPIO_PTS4,
+	GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0,
+
+	/* PTT */
+	GPIO_PTT5, GPIO_PTT4, GPIO_PTT3, GPIO_PTT2,
+	GPIO_PTT1, GPIO_PTT0,
+
+	/* PTU */
+	GPIO_PTU5, GPIO_PTU4, GPIO_PTU3, GPIO_PTU2,
+	GPIO_PTU1, GPIO_PTU0,
+
+	/* PTV */
+	GPIO_PTV7, GPIO_PTV6, GPIO_PTV5, GPIO_PTV4,
+	GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0,
+
+	/* PTW */
+	GPIO_PTW7, GPIO_PTW6, GPIO_PTW5, GPIO_PTW4,
+	GPIO_PTW3, GPIO_PTW2, GPIO_PTW1, GPIO_PTW0,
+
+	/* PTX */
+	GPIO_PTX7, GPIO_PTX6, GPIO_PTX5, GPIO_PTX4,
+	GPIO_PTX3, GPIO_PTX2, GPIO_PTX1, GPIO_PTX0,
+
+	/* PTY */
+	GPIO_PTY7, GPIO_PTY6, GPIO_PTY5, GPIO_PTY4,
+	GPIO_PTY3, GPIO_PTY2, GPIO_PTY1, GPIO_PTY0,
+
+	/* PTZ */
+	GPIO_PTZ7, GPIO_PTZ6, GPIO_PTZ5, GPIO_PTZ4,
+	GPIO_PTZ3, GPIO_PTZ2, GPIO_PTZ1, GPIO_PTZ0,
+
+	/* SCIF0 (SCIF: 3 pin PTT/PTU) */
+	GPIO_FN_SCIF0_PTT_TXD, GPIO_FN_SCIF0_PTT_RXD, GPIO_FN_SCIF0_PTT_SCK,
+	GPIO_FN_SCIF0_PTU_TXD, GPIO_FN_SCIF0_PTU_RXD, GPIO_FN_SCIF0_PTU_SCK,
+
+	/* SCIF1 (SCIF: 3 pin PTS/PTV) */
+	GPIO_FN_SCIF1_PTS_TXD, GPIO_FN_SCIF1_PTS_RXD, GPIO_FN_SCIF1_PTS_SCK,
+	GPIO_FN_SCIF1_PTV_TXD, GPIO_FN_SCIF1_PTV_RXD, GPIO_FN_SCIF1_PTV_SCK,
+
+	/* SCIF2 (SCIF: 3 pin PTT/PTU) */
+	GPIO_FN_SCIF2_PTT_TXD, GPIO_FN_SCIF2_PTT_RXD, GPIO_FN_SCIF2_PTT_SCK,
+	GPIO_FN_SCIF2_PTU_TXD, GPIO_FN_SCIF2_PTU_RXD, GPIO_FN_SCIF2_PTU_SCK,
+
+	/* SCIF3 (SCIFA: 5 pin PTS/PTV) */
+	GPIO_FN_SCIF3_PTS_TXD, GPIO_FN_SCIF3_PTS_RXD, GPIO_FN_SCIF3_PTS_SCK,
+	GPIO_FN_SCIF3_PTS_RTS, GPIO_FN_SCIF3_PTS_CTS,
+	GPIO_FN_SCIF3_PTV_TXD, GPIO_FN_SCIF3_PTV_RXD, GPIO_FN_SCIF3_PTV_SCK,
+	GPIO_FN_SCIF3_PTV_RTS, GPIO_FN_SCIF3_PTV_CTS,
+
+	/* SCIF4 (SCIFA: 3 pin PTE/PTN) */
+	GPIO_FN_SCIF4_PTE_TXD, GPIO_FN_SCIF4_PTE_RXD, GPIO_FN_SCIF4_PTE_SCK,
+	GPIO_FN_SCIF4_PTN_TXD, GPIO_FN_SCIF4_PTN_RXD, GPIO_FN_SCIF4_PTN_SCK,
+
+	/* SCIF5 (SCIFA: 3 pin PTE/PTN) */
+	GPIO_FN_SCIF5_PTE_TXD, GPIO_FN_SCIF5_PTE_RXD, GPIO_FN_SCIF5_PTE_SCK,
+	GPIO_FN_SCIF5_PTN_TXD, GPIO_FN_SCIF5_PTN_RXD, GPIO_FN_SCIF5_PTN_SCK,
+
+	/* CEU */
+	GPIO_FN_VIO_D15, GPIO_FN_VIO_D14, GPIO_FN_VIO_D13, GPIO_FN_VIO_D12,
+	GPIO_FN_VIO_D11, GPIO_FN_VIO_D10, GPIO_FN_VIO_D9, GPIO_FN_VIO_D8,
+	GPIO_FN_VIO_D7, GPIO_FN_VIO_D6, GPIO_FN_VIO_D5, GPIO_FN_VIO_D4,
+	GPIO_FN_VIO_D3, GPIO_FN_VIO_D2, GPIO_FN_VIO_D1, GPIO_FN_VIO_D0,
+	GPIO_FN_VIO_FLD, GPIO_FN_VIO_CKO,
+	GPIO_FN_VIO_VD1, GPIO_FN_VIO_HD1, GPIO_FN_VIO_CLK1,
+	GPIO_FN_VIO_VD2, GPIO_FN_VIO_HD2, GPIO_FN_VIO_CLK2,
+
+	/* LCDC */
+	GPIO_FN_LCDD23, GPIO_FN_LCDD22, GPIO_FN_LCDD21, GPIO_FN_LCDD20,
+	GPIO_FN_LCDD19, GPIO_FN_LCDD18, GPIO_FN_LCDD17, GPIO_FN_LCDD16,
+	GPIO_FN_LCDD15, GPIO_FN_LCDD14, GPIO_FN_LCDD13, GPIO_FN_LCDD12,
+	GPIO_FN_LCDD11, GPIO_FN_LCDD10, GPIO_FN_LCDD9, GPIO_FN_LCDD8,
+	GPIO_FN_LCDD7, GPIO_FN_LCDD6, GPIO_FN_LCDD5, GPIO_FN_LCDD4,
+	GPIO_FN_LCDD3, GPIO_FN_LCDD2, GPIO_FN_LCDD1, GPIO_FN_LCDD0,
+	GPIO_FN_LCDLCLK_PTR, GPIO_FN_LCDLCLK_PTW,
+	/* Main LCD */
+	GPIO_FN_LCDDON, GPIO_FN_LCDVCPWC, GPIO_FN_LCDVEPWC, GPIO_FN_LCDVSYN,
+	/* Main LCD - RGB Mode */
+	GPIO_FN_LCDDCK, GPIO_FN_LCDHSYN, GPIO_FN_LCDDISP,
+	/* Main LCD - SYS Mode */
+	GPIO_FN_LCDRS, GPIO_FN_LCDCS, GPIO_FN_LCDWR, GPIO_FN_LCDRD,
+
+	/* IRQ */
+	GPIO_FN_IRQ0, GPIO_FN_IRQ1, GPIO_FN_IRQ2, GPIO_FN_IRQ3,
+	GPIO_FN_IRQ4, GPIO_FN_IRQ5, GPIO_FN_IRQ6, GPIO_FN_IRQ7,
+
+	/* AUD */
+	GPIO_FN_AUDATA3, GPIO_FN_AUDATA2, GPIO_FN_AUDATA1, GPIO_FN_AUDATA0,
+	GPIO_FN_AUDCK, GPIO_FN_AUDSYNC,
+
+	/* SDHI0 (PTD) */
+	GPIO_FN_SDHI0CD_PTD, GPIO_FN_SDHI0WP_PTD,
+	GPIO_FN_SDHI0D3_PTD, GPIO_FN_SDHI0D2_PTD,
+	GPIO_FN_SDHI0D1_PTD, GPIO_FN_SDHI0D0_PTD,
+	GPIO_FN_SDHI0CMD_PTD, GPIO_FN_SDHI0CLK_PTD,
+
+	/* SDHI0 (PTS) */
+	GPIO_FN_SDHI0CD_PTS, GPIO_FN_SDHI0WP_PTS,
+	GPIO_FN_SDHI0D3_PTS, GPIO_FN_SDHI0D2_PTS,
+	GPIO_FN_SDHI0D1_PTS, GPIO_FN_SDHI0D0_PTS,
+	GPIO_FN_SDHI0CMD_PTS, GPIO_FN_SDHI0CLK_PTS,
+
+	/* SDHI1 */
+	GPIO_FN_SDHI1CD, GPIO_FN_SDHI1WP, GPIO_FN_SDHI1D3, GPIO_FN_SDHI1D2,
+	GPIO_FN_SDHI1D1, GPIO_FN_SDHI1D0, GPIO_FN_SDHI1CMD, GPIO_FN_SDHI1CLK,
+
+	/* SIUA */
+	GPIO_FN_SIUAFCK, GPIO_FN_SIUAILR, GPIO_FN_SIUAIBT, GPIO_FN_SIUAISLD,
+	GPIO_FN_SIUAOLR, GPIO_FN_SIUAOBT, GPIO_FN_SIUAOSLD, GPIO_FN_SIUAMCK,
+	GPIO_FN_SIUAISPD, GPIO_FN_SIUOSPD,
+
+	/* SIUB */
+	GPIO_FN_SIUBFCK, GPIO_FN_SIUBILR, GPIO_FN_SIUBIBT, GPIO_FN_SIUBISLD,
+	GPIO_FN_SIUBOLR, GPIO_FN_SIUBOBT, GPIO_FN_SIUBOSLD, GPIO_FN_SIUBMCK,
+
+	/* IRDA */
+	GPIO_FN_IRDA_IN, GPIO_FN_IRDA_OUT,
+
+	/* VOU */
+	GPIO_FN_DV_CLKI, GPIO_FN_DV_CLK, GPIO_FN_DV_HSYNC, GPIO_FN_DV_VSYNC,
+	GPIO_FN_DV_D15, GPIO_FN_DV_D14, GPIO_FN_DV_D13, GPIO_FN_DV_D12,
+	GPIO_FN_DV_D11, GPIO_FN_DV_D10, GPIO_FN_DV_D9, GPIO_FN_DV_D8,
+	GPIO_FN_DV_D7, GPIO_FN_DV_D6, GPIO_FN_DV_D5, GPIO_FN_DV_D4,
+	GPIO_FN_DV_D3, GPIO_FN_DV_D2, GPIO_FN_DV_D1, GPIO_FN_DV_D0,
+
+	/* KEYSC */
+	GPIO_FN_KEYIN0, GPIO_FN_KEYIN1, GPIO_FN_KEYIN2, GPIO_FN_KEYIN3,
+	GPIO_FN_KEYIN4, GPIO_FN_KEYOUT0, GPIO_FN_KEYOUT1, GPIO_FN_KEYOUT2,
+	GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYOUT5_IN5,
+
+	/* MSIOF0 (PTF) */
+	GPIO_FN_MSIOF0_PTF_TXD, GPIO_FN_MSIOF0_PTF_RXD, GPIO_FN_MSIOF0_PTF_MCK,
+	GPIO_FN_MSIOF0_PTF_TSYNC, GPIO_FN_MSIOF0_PTF_TSCK,
+	GPIO_FN_MSIOF0_PTF_RSYNC, GPIO_FN_MSIOF0_PTF_RSCK,
+	GPIO_FN_MSIOF0_PTF_SS1, GPIO_FN_MSIOF0_PTF_SS2,
+
+	/* MSIOF0 (PTT+PTX) */
+	GPIO_FN_MSIOF0_PTT_TXD, GPIO_FN_MSIOF0_PTT_RXD, GPIO_FN_MSIOF0_PTX_MCK,
+	GPIO_FN_MSIOF0_PTT_TSYNC, GPIO_FN_MSIOF0_PTT_TSCK,
+	GPIO_FN_MSIOF0_PTT_RSYNC, GPIO_FN_MSIOF0_PTT_RSCK,
+	GPIO_FN_MSIOF0_PTT_SS1, GPIO_FN_MSIOF0_PTT_SS2,
+
+	/* MSIOF1 */
+	GPIO_FN_MSIOF1_TXD, GPIO_FN_MSIOF1_RXD, GPIO_FN_MSIOF1_MCK,
+	GPIO_FN_MSIOF1_TSYNC, GPIO_FN_MSIOF1_TSCK,
+	GPIO_FN_MSIOF1_RSYNC, GPIO_FN_MSIOF1_RSCK,
+	GPIO_FN_MSIOF1_SS1, GPIO_FN_MSIOF1_SS2,
+
+	/* TSIF */
+	GPIO_FN_TS0_SDAT, GPIO_FN_TS0_SCK, GPIO_FN_TS0_SDEN, GPIO_FN_TS0_SPSYNC,
+
+	/* FLCTL */
+	GPIO_FN_FCE, GPIO_FN_NAF7, GPIO_FN_NAF6, GPIO_FN_NAF5, GPIO_FN_NAF4,
+	GPIO_FN_NAF3, GPIO_FN_NAF2, GPIO_FN_NAF1, GPIO_FN_NAF0, GPIO_FN_FCDE,
+	GPIO_FN_FOE, GPIO_FN_FSC, GPIO_FN_FWE, GPIO_FN_FRB,
+
+	/* DMAC */
+	GPIO_FN_DACK1, GPIO_FN_DREQ1, GPIO_FN_DACK0, GPIO_FN_DREQ0,
+
+	/* ADC */
+	GPIO_FN_AN3, GPIO_FN_AN2, GPIO_FN_AN1, GPIO_FN_AN0, GPIO_FN_ADTRG,
+
+	/* CPG */
+	GPIO_FN_STATUS0, GPIO_FN_PDSTATUS,
+
+	/* TPU */
+	GPIO_FN_TPUTO3, GPIO_FN_TPUTO2, GPIO_FN_TPUTO1, GPIO_FN_TPUTO0,
+
+	/* BSC */
+	GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28,
+	GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24,
+	GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20,
+	GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16,
+	GPIO_FN_IOIS16, GPIO_FN_WAIT, GPIO_FN_BS,
+	GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_CS6B_CE1B, GPIO_FN_CS6A_CE2B,
+	GPIO_FN_CS5B_CE1A, GPIO_FN_CS5A_CE2A,
+	GPIO_FN_WE3_ICIOWR, GPIO_FN_WE2_ICIORD,
+
+	/* ATAPI */
+	GPIO_FN_IDED15, GPIO_FN_IDED14, GPIO_FN_IDED13, GPIO_FN_IDED12,
+	GPIO_FN_IDED11, GPIO_FN_IDED10, GPIO_FN_IDED9, GPIO_FN_IDED8,
+	GPIO_FN_IDED7, GPIO_FN_IDED6, GPIO_FN_IDED5, GPIO_FN_IDED4,
+	GPIO_FN_IDED3, GPIO_FN_IDED2, GPIO_FN_IDED1, GPIO_FN_IDED0,
+	GPIO_FN_DIRECTION, GPIO_FN_EXBUF_ENB, GPIO_FN_IDERST, GPIO_FN_IODACK,
+	GPIO_FN_IODREQ, GPIO_FN_IDEIORDY, GPIO_FN_IDEINT, GPIO_FN_IDEIOWR,
+	GPIO_FN_IDEIORD, GPIO_FN_IDECS1, GPIO_FN_IDECS0, GPIO_FN_IDEA2,
+	GPIO_FN_IDEA1, GPIO_FN_IDEA0,
+};
+
+#endif /* __ASM_SH7723_H__ */
diff --git a/arch/sh/include/asm/edosk7705.h b/arch/sh/include/mach-common/mach/edosk7705.h
index 5bdc9d9be3d..5bdc9d9be3d 100644
--- a/arch/sh/include/asm/edosk7705.h
+++ b/arch/sh/include/mach-common/mach/edosk7705.h
diff --git a/arch/sh/include/asm/r7780rp.h b/arch/sh/include/mach-common/mach/highlander.h
index 306f7359f7d..306f7359f7d 100644
--- a/arch/sh/include/asm/r7780rp.h
+++ b/arch/sh/include/mach-common/mach/highlander.h
diff --git a/arch/sh/include/asm/hp6xx.h b/arch/sh/include/mach-common/mach/hp6xx.h
index 0d4165a32dc..0d4165a32dc 100644
--- a/arch/sh/include/asm/hp6xx.h
+++ b/arch/sh/include/mach-common/mach/hp6xx.h
diff --git a/arch/sh/include/asm/lboxre2.h b/arch/sh/include/mach-common/mach/lboxre2.h
index e6d16050492..e6d16050492 100644
--- a/arch/sh/include/asm/lboxre2.h
+++ b/arch/sh/include/mach-common/mach/lboxre2.h
diff --git a/arch/sh/include/asm/magicpanelr2.h b/arch/sh/include/mach-common/mach/magicpanelr2.h
index c644a77ee35..c644a77ee35 100644
--- a/arch/sh/include/asm/magicpanelr2.h
+++ b/arch/sh/include/mach-common/mach/magicpanelr2.h
diff --git a/arch/sh/include/asm/microdev.h b/arch/sh/include/mach-common/mach/microdev.h
index 1aed15856e1..1aed15856e1 100644
--- a/arch/sh/include/asm/microdev.h
+++ b/arch/sh/include/mach-common/mach/microdev.h
diff --git a/arch/sh/include/asm/migor.h b/arch/sh/include/mach-common/mach/migor.h
index c12b632c540..e451f0229e0 100644
--- a/arch/sh/include/asm/migor.h
+++ b/arch/sh/include/mach-common/mach/migor.h
@@ -52,9 +52,11 @@
 #define PORT_HIZCRB 0xa405015a
 #define PORT_HIZCRC 0xa405015c
 
+#define BSC_CS4BCR 0xfec10010
 #define BSC_CS6ABCR 0xfec1001c
+#define BSC_CS4WCR 0xfec10030
 
-#include <asm/sh_mobile_lcdc.h>
+#include <video/sh_mobile_lcdc.h>
 
 int migor_lcd_qvga_setup(void *board_data, void *sys_ops_handle,
 			 struct sh_mobile_lcdc_sys_bus_ops *sys_ops);
diff --git a/arch/sh/include/asm/rts7751r2d.h b/arch/sh/include/mach-common/mach/r2d.h
index 0a800157b82..0a800157b82 100644
--- a/arch/sh/include/asm/rts7751r2d.h
+++ b/arch/sh/include/mach-common/mach/r2d.h
diff --git a/arch/sh/include/asm/sdk7780.h b/arch/sh/include/mach-common/mach/sdk7780.h
index 697dc865f21..697dc865f21 100644
--- a/arch/sh/include/asm/sdk7780.h
+++ b/arch/sh/include/mach-common/mach/sdk7780.h
diff --git a/arch/sh/include/asm/sh7763rdp.h b/arch/sh/include/mach-common/mach/sh7763rdp.h
index 8750cc85297..8750cc85297 100644
--- a/arch/sh/include/asm/sh7763rdp.h
+++ b/arch/sh/include/mach-common/mach/sh7763rdp.h
diff --git a/arch/sh/include/asm/sh7785lcr.h b/arch/sh/include/mach-common/mach/sh7785lcr.h
index 1ce27d5c749..1ce27d5c749 100644
--- a/arch/sh/include/asm/sh7785lcr.h
+++ b/arch/sh/include/mach-common/mach/sh7785lcr.h
diff --git a/arch/sh/include/asm/shmin.h b/arch/sh/include/mach-common/mach/shmin.h
index 36ba138a81f..36ba138a81f 100644
--- a/arch/sh/include/asm/shmin.h
+++ b/arch/sh/include/mach-common/mach/shmin.h
diff --git a/arch/sh/include/asm/snapgear.h b/arch/sh/include/mach-common/mach/snapgear.h
index 042d95f51c4..042d95f51c4 100644
--- a/arch/sh/include/asm/snapgear.h
+++ b/arch/sh/include/mach-common/mach/snapgear.h
diff --git a/arch/sh/include/asm/systemh7751.h b/arch/sh/include/mach-common/mach/systemh7751.h
index 4161122c84e..4161122c84e 100644
--- a/arch/sh/include/asm/systemh7751.h
+++ b/arch/sh/include/mach-common/mach/systemh7751.h
diff --git a/arch/sh/include/asm/titan.h b/arch/sh/include/mach-common/mach/titan.h
index 03f3583c891..03f3583c891 100644
--- a/arch/sh/include/asm/titan.h
+++ b/arch/sh/include/mach-common/mach/titan.h
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 0e6905fe9fe..48edfb145fb 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -21,7 +21,8 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_PM)		+= pm.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-obj-$(CONFIG_ELF_CORE)		+= dump_task.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index 6edf53b93d9..c97660b2b48 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -17,7 +17,7 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_PM)		+= pm.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-obj-$(CONFIG_BINFMT_ELF)	+= dump_task.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
+obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index f5eb56e6bc5..b7e46d5bba4 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -294,9 +294,10 @@ arch_init_clk_ops(struct clk_ops **ops, int type)
 {
 }
 
-void __init __attribute__ ((weak))
+int __init __attribute__ ((weak))
 arch_clk_init(void)
 {
+	return 0;
 }
 
 static int show_clocks(char *buf, char **start, off_t off,
@@ -331,7 +332,7 @@ int __init clk_init(void)
 		ret |= clk_register(clk);
 	}
 
-	arch_clk_init();
+	ret |= arch_clk_init();
 
 	/* Kick the child clocks.. */
 	propagate_rate(&master_clk);
diff --git a/arch/sh/kernel/cpu/irq/Makefile b/arch/sh/kernel/cpu/irq/Makefile
index 462a8f6dfee..f0c7025a67d 100644
--- a/arch/sh/kernel/cpu/irq/Makefile
+++ b/arch/sh/kernel/cpu/irq/Makefile
@@ -1,8 +1,6 @@
 #
 # Makefile for the Linux/SuperH CPU-specifc IRQ handlers.
 #
-obj-y	+= intc.o
-
 obj-$(CONFIG_SUPERH32)			+= imask.o
 obj-$(CONFIG_CPU_SH5)			+= intc-sh5.o
 obj-$(CONFIG_CPU_HAS_IPR_IRQ)		+= ipr.o
diff --git a/arch/sh/kernel/cpu/irq/ipr.c b/arch/sh/kernel/cpu/irq/ipr.c
index 56ea7b269b5..3eb17ee5540 100644
--- a/arch/sh/kernel/cpu/irq/ipr.c
+++ b/arch/sh/kernel/cpu/irq/ipr.c
@@ -33,7 +33,7 @@ static void disable_ipr_irq(unsigned int irq)
 	struct ipr_data *p = get_irq_chip_data(irq);
 	unsigned long addr = get_ipr_desc(irq)->ipr_offsets[p->ipr_idx];
 	/* Set the priority in IPR to 0 */
-	ctrl_outw(ctrl_inw(addr) & (0xffff ^ (0xf << p->shift)), addr);
+	__raw_writew(__raw_readw(addr) & (0xffff ^ (0xf << p->shift)), addr);
 }
 
 static void enable_ipr_irq(unsigned int irq)
@@ -41,7 +41,7 @@ static void enable_ipr_irq(unsigned int irq)
 	struct ipr_data *p = get_irq_chip_data(irq);
 	unsigned long addr = get_ipr_desc(irq)->ipr_offsets[p->ipr_idx];
 	/* Set priority in IPR back to original value */
-	ctrl_outw(ctrl_inw(addr) | (p->priority << p->shift), addr);
+	__raw_writew(__raw_readw(addr) | (p->priority << p->shift), addr);
 }
 
 /*
diff --git a/arch/sh/kernel/cpu/sh2a/Makefile b/arch/sh/kernel/cpu/sh2a/Makefile
index 1ab1ecf4c76..428450cc080 100644
--- a/arch/sh/kernel/cpu/sh2a/Makefile
+++ b/arch/sh/kernel/cpu/sh2a/Makefile
@@ -12,3 +12,8 @@ obj-$(CONFIG_CPU_SUBTYPE_SH7206)	+= setup-sh7206.o clock-sh7206.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7203)	+= setup-sh7203.o clock-sh7203.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7263)	+= setup-sh7203.o clock-sh7203.o
 obj-$(CONFIG_CPU_SUBTYPE_MXG)		+= setup-mxg.o clock-sh7206.o
+
+# Pinmux setup
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7203)	:= pinmux-sh7203.o
+
+obj-$(CONFIG_GENERIC_GPIO)	+= $(pinmux-y)
diff --git a/arch/sh/kernel/cpu/sh2a/pinmux-sh7203.c b/arch/sh/kernel/cpu/sh2a/pinmux-sh7203.c
new file mode 100644
index 00000000000..39a5b880418
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh2a/pinmux-sh7203.c
@@ -0,0 +1,1599 @@
+/*
+ * SH7203 Pinmux
+ *
+ *  Copyright (C) 2008  Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <asm/sh7203.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PA7_DATA, PA6_DATA, PA5_DATA, PA4_DATA,
+	PA3_DATA, PA2_DATA, PA1_DATA, PA0_DATA,
+	PB12_DATA,
+	PB11_DATA, PB10_DATA, PB9_DATA, PB8_DATA,
+	PB7_DATA, PB6_DATA, PB5_DATA, PB4_DATA,
+	PB3_DATA, PB2_DATA, PB1_DATA, PB0_DATA,
+	PC14_DATA, PC13_DATA, PC12_DATA,
+	PC11_DATA, PC10_DATA, PC9_DATA, PC8_DATA,
+	PC7_DATA, PC6_DATA, PC5_DATA, PC4_DATA,
+	PC3_DATA, PC2_DATA, PC1_DATA, PC0_DATA,
+	PD15_DATA, PD14_DATA, PD13_DATA, PD12_DATA,
+	PD11_DATA, PD10_DATA, PD9_DATA, PD8_DATA,
+	PD7_DATA, PD6_DATA, PD5_DATA, PD4_DATA,
+	PD3_DATA, PD2_DATA, PD1_DATA, PD0_DATA,
+	PE15_DATA, PE14_DATA, PE13_DATA, PE12_DATA,
+	PE11_DATA, PE10_DATA, PE9_DATA, PE8_DATA,
+	PE7_DATA, PE6_DATA, PE5_DATA, PE4_DATA,
+	PE3_DATA, PE2_DATA, PE1_DATA, PE0_DATA,
+	PF30_DATA, PF29_DATA, PF28_DATA,
+	PF27_DATA, PF26_DATA, PF25_DATA, PF24_DATA,
+	PF23_DATA, PF22_DATA, PF21_DATA, PF20_DATA,
+	PF19_DATA, PF18_DATA, PF17_DATA, PF16_DATA,
+	PF15_DATA, PF14_DATA, PF13_DATA, PF12_DATA,
+	PF11_DATA, PF10_DATA, PF9_DATA, PF8_DATA,
+	PF7_DATA, PF6_DATA, PF5_DATA, PF4_DATA,
+	PF3_DATA, PF2_DATA, PF1_DATA, PF0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PA7_IN, PA6_IN, PA5_IN, PA4_IN,
+	PA3_IN, PA2_IN, PA1_IN, PA0_IN,
+	PB11_IN, PB10_IN, PB9_IN, PB8_IN,
+	PB7_IN, PB6_IN, PB5_IN, PB4_IN,
+	PB3_IN, PB2_IN, PB1_IN, PB0_IN,
+	PC14_IN, PC13_IN, PC12_IN,
+	PC11_IN, PC10_IN, PC9_IN, PC8_IN,
+	PC7_IN, PC6_IN, PC5_IN, PC4_IN,
+	PC3_IN, PC2_IN, PC1_IN, PC0_IN,
+	PD15_IN, PD14_IN, PD13_IN, PD12_IN,
+	PD11_IN, PD10_IN, PD9_IN, PD8_IN,
+	PD7_IN, PD6_IN, PD5_IN, PD4_IN,
+	PD3_IN, PD2_IN, PD1_IN, PD0_IN,
+	PE15_IN, PE14_IN, PE13_IN, PE12_IN,
+	PE11_IN, PE10_IN, PE9_IN, PE8_IN,
+	PE7_IN, PE6_IN, PE5_IN, PE4_IN,
+	PE3_IN, PE2_IN, PE1_IN, PE0_IN,
+	PF30_IN, PF29_IN, PF28_IN,
+	PF27_IN, PF26_IN, PF25_IN, PF24_IN,
+	PF23_IN, PF22_IN, PF21_IN, PF20_IN,
+	PF19_IN, PF18_IN, PF17_IN, PF16_IN,
+	PF15_IN, PF14_IN, PF13_IN, PF12_IN,
+	PF11_IN, PF10_IN, PF9_IN, PF8_IN,
+	PF7_IN, PF6_IN, PF5_IN, PF4_IN,
+	PF3_IN, PF2_IN, PF1_IN, PF0_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PB12_OUT,
+	PB11_OUT, PB10_OUT, PB9_OUT, PB8_OUT,
+	PC14_OUT, PC13_OUT, PC12_OUT,
+	PC11_OUT, PC10_OUT, PC9_OUT, PC8_OUT,
+	PC7_OUT, PC6_OUT, PC5_OUT, PC4_OUT,
+	PC3_OUT, PC2_OUT, PC1_OUT, PC0_OUT,
+	PD15_OUT, PD14_OUT, PD13_OUT, PD12_OUT,
+	PD11_OUT, PD10_OUT, PD9_OUT, PD8_OUT,
+	PD7_OUT, PD6_OUT, PD5_OUT, PD4_OUT,
+	PD3_OUT, PD2_OUT, PD1_OUT, PD0_OUT,
+	PE15_OUT, PE14_OUT, PE13_OUT, PE12_OUT,
+	PE11_OUT, PE10_OUT, PE9_OUT, PE8_OUT,
+	PE7_OUT, PE6_OUT, PE5_OUT, PE4_OUT,
+	PE3_OUT, PE2_OUT, PE1_OUT, PE0_OUT,
+	PF30_OUT, PF29_OUT, PF28_OUT,
+	PF27_OUT, PF26_OUT, PF25_OUT, PF24_OUT,
+	PF23_OUT, PF22_OUT, PF21_OUT, PF20_OUT,
+	PF19_OUT, PF18_OUT, PF17_OUT, PF16_OUT,
+	PF15_OUT, PF14_OUT, PF13_OUT, PF12_OUT,
+	PF11_OUT, PF10_OUT, PF9_OUT, PF8_OUT,
+	PF7_OUT, PF6_OUT, PF5_OUT, PF4_OUT,
+	PF3_OUT, PF2_OUT, PF1_OUT, PF0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	PB11_IOR_IN, PB11_IOR_OUT,
+	PB10_IOR_IN, PB10_IOR_OUT,
+	PB9_IOR_IN, PB9_IOR_OUT,
+	PB8_IOR_IN, PB8_IOR_OUT,
+	PB12MD_00, PB12MD_01, PB12MD_10, PB12MD_11,
+	PB11MD_0, PB11MD_1,
+	PB10MD_0, PB10MD_1,
+	PB9MD_00, PB9MD_01, PB9MD_10,
+	PB8MD_00, PB8MD_01, PB8MD_10,
+	PB7MD_00, PB7MD_01, PB7MD_10, PB7MD_11,
+	PB6MD_00, PB6MD_01, PB6MD_10, PB6MD_11,
+	PB5MD_00, PB5MD_01, PB5MD_10, PB5MD_11,
+	PB4MD_00, PB4MD_01, PB4MD_10, PB4MD_11,
+	PB3MD_00, PB3MD_01, PB3MD_10, PB3MD_11,
+	PB2MD_00, PB2MD_01, PB2MD_10, PB2MD_11,
+	PB1MD_00, PB1MD_01, PB1MD_10, PB1MD_11,
+	PB0MD_00, PB0MD_01, PB0MD_10, PB0MD_11,
+
+	PB12IRQ_00, PB12IRQ_01, PB12IRQ_10,
+
+	PC14MD_0, PC14MD_1,
+	PC13MD_0, PC13MD_1,
+	PC12MD_0, PC12MD_1,
+	PC11MD_00, PC11MD_01, PC11MD_10,
+	PC10MD_00, PC10MD_01, PC10MD_10,
+	PC9MD_0, PC9MD_1,
+	PC8MD_0, PC8MD_1,
+	PC7MD_0, PC7MD_1,
+	PC6MD_0, PC6MD_1,
+	PC5MD_0, PC5MD_1,
+	PC4MD_0, PC4MD_1,
+	PC3MD_0, PC3MD_1,
+	PC2MD_0, PC2MD_1,
+	PC1MD_0, PC1MD_1,
+	PC0MD_00, PC0MD_01, PC0MD_10,
+
+	PD15MD_000, PD15MD_001, PD15MD_010, PD15MD_100, PD15MD_101,
+	PD14MD_000, PD14MD_001, PD14MD_010, PD14MD_101,
+	PD13MD_000, PD13MD_001, PD13MD_010, PD13MD_100, PD13MD_101,
+	PD12MD_000, PD12MD_001, PD12MD_010, PD12MD_100, PD12MD_101,
+	PD11MD_000, PD11MD_001, PD11MD_010, PD11MD_100, PD11MD_101,
+	PD10MD_000, PD10MD_001, PD10MD_010, PD10MD_100, PD10MD_101,
+	PD9MD_000, PD9MD_001, PD9MD_010, PD9MD_100, PD9MD_101,
+	PD8MD_000, PD8MD_001, PD8MD_010, PD8MD_100, PD8MD_101,
+	PD7MD_000, PD7MD_001, PD7MD_010, PD7MD_011, PD7MD_100, PD7MD_101,
+	PD6MD_000, PD6MD_001, PD6MD_010, PD6MD_011, PD6MD_100, PD6MD_101,
+	PD5MD_000, PD5MD_001, PD5MD_010, PD5MD_011, PD5MD_100, PD5MD_101,
+	PD4MD_000, PD4MD_001, PD4MD_010, PD4MD_011, PD4MD_100, PD4MD_101,
+	PD3MD_000, PD3MD_001, PD3MD_010, PD3MD_011, PD3MD_100, PD3MD_101,
+	PD2MD_000, PD2MD_001, PD2MD_010, PD2MD_011, PD2MD_100, PD2MD_101,
+	PD1MD_000, PD1MD_001, PD1MD_010, PD1MD_011, PD1MD_100, PD1MD_101,
+	PD0MD_000, PD0MD_001, PD0MD_010, PD0MD_011, PD0MD_100, PD0MD_101,
+
+	PE15MD_00, PE15MD_01, PE15MD_11,
+	PE14MD_00, PE14MD_01, PE14MD_11,
+	PE13MD_00, PE13MD_11,
+	PE12MD_00, PE12MD_11,
+	PE11MD_000, PE11MD_001, PE11MD_010, PE11MD_100,
+	PE10MD_000, PE10MD_001, PE10MD_010, PE10MD_100,
+	PE9MD_00, PE9MD_01, PE9MD_10, PE9MD_11,
+	PE8MD_00, PE8MD_01, PE8MD_10, PE8MD_11,
+	PE7MD_000, PE7MD_001, PE7MD_010, PE7MD_011, PE7MD_100,
+	PE6MD_000, PE6MD_001, PE6MD_010, PE6MD_011, PE6MD_100,
+	PE5MD_000, PE5MD_001, PE5MD_010, PE5MD_011, PE5MD_100,
+	PE4MD_000, PE4MD_001, PE4MD_010, PE4MD_011, PE4MD_100,
+	PE3MD_00, PE3MD_01, PE3MD_11,
+	PE2MD_00, PE2MD_01, PE2MD_11,
+	PE1MD_00, PE1MD_01, PE1MD_10, PE1MD_11,
+	PE0MD_000, PE0MD_001, PE0MD_011, PE0MD_100,
+
+	PF30MD_0, PF30MD_1,
+	PF29MD_0, PF29MD_1,
+	PF28MD_0, PF28MD_1,
+	PF27MD_0, PF27MD_1,
+	PF26MD_0, PF26MD_1,
+	PF25MD_0, PF25MD_1,
+	PF24MD_0, PF24MD_1,
+	PF23MD_00, PF23MD_01, PF23MD_10,
+	PF22MD_00, PF22MD_01, PF22MD_10,
+	PF21MD_00, PF21MD_01, PF21MD_10,
+	PF20MD_00, PF20MD_01, PF20MD_10,
+	PF19MD_00, PF19MD_01, PF19MD_10,
+	PF18MD_00, PF18MD_01, PF18MD_10,
+	PF17MD_00, PF17MD_01, PF17MD_10,
+	PF16MD_00, PF16MD_01, PF16MD_10,
+	PF15MD_00, PF15MD_01, PF15MD_10,
+	PF14MD_00, PF14MD_01, PF14MD_10,
+	PF13MD_00, PF13MD_01, PF13MD_10,
+	PF12MD_00, PF12MD_01, PF12MD_10,
+	PF11MD_00, PF11MD_01, PF11MD_10,
+	PF10MD_00, PF10MD_01, PF10MD_10,
+	PF9MD_00, PF9MD_01, PF9MD_10,
+	PF8MD_00, PF8MD_01, PF8MD_10,
+	PF7MD_00, PF7MD_01, PF7MD_10, PF7MD_11,
+	PF6MD_00, PF6MD_01, PF6MD_10, PF6MD_11,
+	PF5MD_00, PF5MD_01, PF5MD_10, PF5MD_11,
+	PF4MD_00, PF4MD_01, PF4MD_10, PF4MD_11,
+	PF3MD_00, PF3MD_01, PF3MD_10, PF3MD_11,
+	PF2MD_00, PF2MD_01, PF2MD_10, PF2MD_11,
+	PF1MD_00, PF1MD_01, PF1MD_10, PF1MD_11,
+	PF0MD_00, PF0MD_01, PF0MD_10, PF0MD_11,
+	PINMUX_FUNCTION_END,
+
+	PINMUX_MARK_BEGIN,
+	PINT7_PB_MARK, PINT6_PB_MARK, PINT5_PB_MARK, PINT4_PB_MARK,
+	PINT3_PB_MARK, PINT2_PB_MARK, PINT1_PB_MARK, PINT0_PB_MARK,
+	PINT7_PD_MARK, PINT6_PD_MARK, PINT5_PD_MARK, PINT4_PD_MARK,
+	PINT3_PD_MARK, PINT2_PD_MARK, PINT1_PD_MARK, PINT0_PD_MARK,
+	IRQ7_PB_MARK, IRQ6_PB_MARK, IRQ5_PB_MARK, IRQ4_PB_MARK,
+	IRQ3_PB_MARK, IRQ2_PB_MARK, IRQ1_PB_MARK, IRQ0_PB_MARK,
+	IRQ7_PD_MARK, IRQ6_PD_MARK, IRQ5_PD_MARK, IRQ4_PD_MARK,
+	IRQ3_PD_MARK, IRQ2_PD_MARK, IRQ1_PD_MARK, IRQ0_PD_MARK,
+	IRQ7_PE_MARK, IRQ6_PE_MARK, IRQ5_PE_MARK, IRQ4_PE_MARK,
+	IRQ3_PE_MARK, IRQ2_PE_MARK, IRQ1_PE_MARK, IRQ0_PE_MARK,
+	WDTOVF_MARK, IRQOUT_MARK, REFOUT_MARK, IRQOUT_REFOUT_MARK,
+	UBCTRG_MARK,
+	CTX1_MARK, CRX1_MARK, CTX0_MARK, CTX0_CTX1_MARK,
+	CRX0_MARK, CRX0_CRX1_MARK,
+	SDA3_MARK, SCL3_MARK,
+	SDA2_MARK, SCL2_MARK,
+	SDA1_MARK, SCL1_MARK,
+	SDA0_MARK, SCL0_MARK,
+	TEND0_PD_MARK, TEND0_PE_MARK, DACK0_PD_MARK, DACK0_PE_MARK,
+	DREQ0_PD_MARK, DREQ0_PE_MARK, TEND1_PD_MARK, TEND1_PE_MARK,
+	DACK1_PD_MARK, DACK1_PE_MARK, DREQ1_PD_MARK, DREQ1_PE_MARK,
+	DACK2_MARK, DREQ2_MARK, DACK3_MARK, DREQ3_MARK,
+	ADTRG_PD_MARK, ADTRG_PE_MARK,
+	D31_MARK, D30_MARK, D29_MARK, D28_MARK,
+	D27_MARK, D26_MARK, D25_MARK, D24_MARK,
+	D23_MARK, D22_MARK, D21_MARK, D20_MARK,
+	D19_MARK, D18_MARK, D17_MARK, D16_MARK,
+	A25_MARK, A24_MARK, A23_MARK, A22_MARK,
+	A21_MARK, CS4_MARK, MRES_MARK, BS_MARK,
+	IOIS16_MARK, CS1_MARK, CS6_CE1B_MARK, CE2B_MARK,
+	CS5_CE1A_MARK, CE2A_MARK, FRAME_MARK, WAIT_MARK,
+	RDWR_MARK, CKE_MARK, CASU_MARK,	BREQ_MARK,
+	RASU_MARK, BACK_MARK, CASL_MARK, RASL_MARK,
+	WE3_DQMUU_AH_ICIO_WR_MARK, WE2_DQMUL_ICIORD_MARK,
+	WE1_DQMLU_WE_MARK, WE0_DQMLL_MARK,
+	CS3_MARK, CS2_MARK, A1_MARK, A0_MARK, CS7_MARK,
+	TIOC4D_MARK, TIOC4C_MARK, TIOC4B_MARK, TIOC4A_MARK,
+	TIOC3D_MARK, TIOC3C_MARK, TIOC3B_MARK, TIOC3A_MARK,
+	TIOC2B_MARK, TIOC1B_MARK, TIOC2A_MARK, TIOC1A_MARK,
+	TIOC0D_MARK, TIOC0C_MARK, TIOC0B_MARK, TIOC0A_MARK,
+	TCLKD_PD_MARK, TCLKC_PD_MARK, TCLKB_PD_MARK, TCLKA_PD_MARK,
+	TCLKD_PF_MARK, TCLKC_PF_MARK, TCLKB_PF_MARK, TCLKA_PF_MARK,
+	SCS0_PD_MARK, SSO0_PD_MARK, SSI0_PD_MARK, SSCK0_PD_MARK,
+	SCS0_PF_MARK, SSO0_PF_MARK, SSI0_PF_MARK, SSCK0_PF_MARK,
+	SCS1_PD_MARK, SSO1_PD_MARK, SSI1_PD_MARK, SSCK1_PD_MARK,
+	SCS1_PF_MARK, SSO1_PF_MARK, SSI1_PF_MARK, SSCK1_PF_MARK,
+	TXD0_MARK, RXD0_MARK, SCK0_MARK,
+	TXD1_MARK, RXD1_MARK, SCK1_MARK,
+	TXD2_MARK, RXD2_MARK, SCK2_MARK,
+	RTS3_MARK, CTS3_MARK, TXD3_MARK,
+	RXD3_MARK, SCK3_MARK,
+	AUDIO_CLK_MARK,
+	SSIDATA3_MARK, SSIWS3_MARK, SSISCK3_MARK,
+	SSIDATA2_MARK, SSIWS2_MARK, SSISCK2_MARK,
+	SSIDATA1_MARK, SSIWS1_MARK, SSISCK1_MARK,
+	SSIDATA0_MARK, SSIWS0_MARK, SSISCK0_MARK,
+	FCE_MARK, FRB_MARK,
+	NAF7_MARK, NAF6_MARK, NAF5_MARK, NAF4_MARK,
+	NAF3_MARK, NAF2_MARK, NAF1_MARK, NAF0_MARK,
+	FSC_MARK, FOE_MARK, FCDE_MARK, FWE_MARK,
+	LCD_VEPWC_MARK, LCD_VCPWC_MARK,	LCD_CLK_MARK, LCD_FLM_MARK,
+	LCD_M_DISP_MARK, LCD_CL2_MARK, LCD_CL1_MARK, LCD_DON_MARK,
+	LCD_DATA15_MARK, LCD_DATA14_MARK, LCD_DATA13_MARK, LCD_DATA12_MARK,
+	LCD_DATA11_MARK, LCD_DATA10_MARK, LCD_DATA9_MARK, LCD_DATA8_MARK,
+	LCD_DATA7_MARK, LCD_DATA6_MARK, LCD_DATA5_MARK, LCD_DATA4_MARK,
+	LCD_DATA3_MARK, LCD_DATA2_MARK, LCD_DATA1_MARK, LCD_DATA0_MARK,
+	PINMUX_MARK_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+
+	/* PA */
+	PINMUX_DATA(PA7_DATA, PA7_IN),
+	PINMUX_DATA(PA6_DATA, PA6_IN),
+	PINMUX_DATA(PA5_DATA, PA5_IN),
+	PINMUX_DATA(PA4_DATA, PA4_IN),
+	PINMUX_DATA(PA3_DATA, PA3_IN),
+	PINMUX_DATA(PA2_DATA, PA2_IN),
+	PINMUX_DATA(PA1_DATA, PA1_IN),
+	PINMUX_DATA(PA0_DATA, PA0_IN),
+
+	/* PB */
+	PINMUX_DATA(PB12_DATA, PB12MD_00, PB12_OUT),
+	PINMUX_DATA(WDTOVF_MARK, PB12MD_01),
+	PINMUX_DATA(IRQOUT_MARK, PB12MD_10, PB12IRQ_00),
+	PINMUX_DATA(REFOUT_MARK, PB12MD_10, PB12IRQ_01),
+	PINMUX_DATA(IRQOUT_REFOUT_MARK, PB12MD_10, PB12IRQ_10),
+	PINMUX_DATA(UBCTRG_MARK, PB12MD_11),
+
+	PINMUX_DATA(PB11_DATA, PB11MD_0, PB11_IN, PB11_OUT),
+	PINMUX_DATA(CTX1_MARK, PB11MD_1),
+
+	PINMUX_DATA(PB10_DATA, PB10MD_0, PB10_IN, PB10_OUT),
+	PINMUX_DATA(CRX1_MARK, PB10MD_1),
+
+	PINMUX_DATA(PB9_DATA, PB9MD_00, PB9_IN, PB9_OUT),
+	PINMUX_DATA(CTX0_MARK, PB9MD_01),
+	PINMUX_DATA(CTX0_CTX1_MARK, PB9MD_10),
+
+	PINMUX_DATA(PB8_DATA, PB8MD_00, PB8_IN, PB8_OUT),
+	PINMUX_DATA(CRX0_MARK, PB8MD_01),
+	PINMUX_DATA(CRX0_CRX1_MARK, PB8MD_10),
+
+	PINMUX_DATA(PB7_DATA, PB7MD_00, PB7_IN),
+	PINMUX_DATA(SDA3_MARK, PB7MD_01),
+	PINMUX_DATA(PINT7_PB_MARK, PB7MD_10),
+	PINMUX_DATA(IRQ7_PB_MARK, PB7MD_11),
+
+	PINMUX_DATA(PB6_DATA, PB6MD_00, PB6_IN),
+	PINMUX_DATA(SCL3_MARK, PB6MD_01),
+	PINMUX_DATA(PINT6_PB_MARK, PB6MD_10),
+	PINMUX_DATA(IRQ6_PB_MARK, PB6MD_11),
+
+	PINMUX_DATA(PB5_DATA, PB5MD_00, PB5_IN),
+	PINMUX_DATA(SDA2_MARK, PB6MD_01),
+	PINMUX_DATA(PINT5_PB_MARK, PB6MD_10),
+	PINMUX_DATA(IRQ5_PB_MARK, PB6MD_11),
+
+	PINMUX_DATA(PB4_DATA, PB4MD_00, PB4_IN),
+	PINMUX_DATA(SCL2_MARK, PB4MD_01),
+	PINMUX_DATA(PINT4_PB_MARK, PB4MD_10),
+	PINMUX_DATA(IRQ4_PB_MARK, PB4MD_11),
+
+	PINMUX_DATA(PB3_DATA, PB3MD_00, PB3_IN),
+	PINMUX_DATA(SDA1_MARK, PB3MD_01),
+	PINMUX_DATA(PINT3_PB_MARK, PB3MD_10),
+	PINMUX_DATA(IRQ3_PB_MARK, PB3MD_11),
+
+	PINMUX_DATA(PB2_DATA, PB2MD_00, PB2_IN),
+	PINMUX_DATA(SCL1_MARK, PB2MD_01),
+	PINMUX_DATA(PINT2_PB_MARK, PB2MD_10),
+	PINMUX_DATA(IRQ2_PB_MARK, PB2MD_11),
+
+	PINMUX_DATA(PB1_DATA, PB1MD_00, PB1_IN),
+	PINMUX_DATA(SDA0_MARK, PB1MD_01),
+	PINMUX_DATA(PINT1_PB_MARK, PB1MD_10),
+	PINMUX_DATA(IRQ1_PB_MARK, PB1MD_11),
+
+	PINMUX_DATA(PB0_DATA, PB0MD_00, PB0_IN),
+	PINMUX_DATA(SCL0_MARK, PB0MD_01),
+	PINMUX_DATA(PINT0_PB_MARK, PB0MD_10),
+	PINMUX_DATA(IRQ0_PB_MARK, PB0MD_11),
+
+	/* PC */
+	PINMUX_DATA(PC14_DATA, PC14MD_0, PC14_IN, PC14_OUT),
+	PINMUX_DATA(WAIT_MARK, PC14MD_1),
+
+	PINMUX_DATA(PC13_DATA, PC13MD_0, PC13_IN, PC13_OUT),
+	PINMUX_DATA(RDWR_MARK, PC13MD_1),
+
+	PINMUX_DATA(PC12_DATA, PC12MD_0, PC12_IN, PC12_OUT),
+	PINMUX_DATA(CKE_MARK, PC12MD_1),
+
+	PINMUX_DATA(PC11_DATA, PC11MD_00, PC11_IN, PC11_OUT),
+	PINMUX_DATA(CASU_MARK, PC11MD_01),
+	PINMUX_DATA(BREQ_MARK, PC11MD_10),
+
+	PINMUX_DATA(PC10_DATA, PC10MD_00, PC10_IN, PC10_OUT),
+	PINMUX_DATA(RASU_MARK, PC10MD_01),
+	PINMUX_DATA(BACK_MARK, PC10MD_10),
+
+	PINMUX_DATA(PC9_DATA, PC9MD_0, PC9_IN, PC9_OUT),
+	PINMUX_DATA(CASL_MARK, PC9MD_1),
+
+	PINMUX_DATA(PC8_DATA, PC8MD_0, PC8_IN, PC8_OUT),
+	PINMUX_DATA(RASL_MARK, PC8MD_1),
+
+	PINMUX_DATA(PC7_DATA, PC7MD_0, PC7_IN, PC7_OUT),
+	PINMUX_DATA(WE3_DQMUU_AH_ICIO_WR_MARK, PC7MD_1),
+
+	PINMUX_DATA(PC6_DATA, PC6MD_0, PC6_IN, PC6_OUT),
+	PINMUX_DATA(WE2_DQMUL_ICIORD_MARK, PC6MD_1),
+
+	PINMUX_DATA(PC5_DATA, PC5MD_0, PC5_IN, PC5_OUT),
+	PINMUX_DATA(WE1_DQMLU_WE_MARK, PC5MD_1),
+
+	PINMUX_DATA(PC4_DATA, PC4MD_0, PC4_IN, PC4_OUT),
+	PINMUX_DATA(WE0_DQMLL_MARK, PC4MD_1),
+
+	PINMUX_DATA(PC3_DATA, PC3MD_0, PC3_IN, PC3_OUT),
+	PINMUX_DATA(CS3_MARK, PC3MD_1),
+
+	PINMUX_DATA(PC2_DATA, PC2MD_0, PC2_IN, PC2_OUT),
+	PINMUX_DATA(CS2_MARK, PC2MD_1),
+
+	PINMUX_DATA(PC1_DATA, PC1MD_0, PC1_IN, PC1_OUT),
+	PINMUX_DATA(A1_MARK, PC1MD_1),
+
+	PINMUX_DATA(PC0_DATA, PC0MD_00, PC0_IN, PC0_OUT),
+	PINMUX_DATA(A0_MARK, PC0MD_01),
+	PINMUX_DATA(CS7_MARK, PC0MD_10),
+
+	/* PD */
+	PINMUX_DATA(PD15_DATA, PD15MD_000, PD15_IN, PD15_OUT),
+	PINMUX_DATA(D31_MARK, PD15MD_001),
+	PINMUX_DATA(PINT7_PD_MARK, PD15MD_010),
+	PINMUX_DATA(ADTRG_PD_MARK, PD15MD_100),
+	PINMUX_DATA(TIOC4D_MARK, PD15MD_101),
+
+	PINMUX_DATA(PD14_DATA, PD14MD_000, PD14_IN, PD14_OUT),
+	PINMUX_DATA(D30_MARK, PD14MD_001),
+	PINMUX_DATA(PINT6_PD_MARK, PD14MD_010),
+	PINMUX_DATA(TIOC4C_MARK, PD14MD_101),
+
+	PINMUX_DATA(PD13_DATA, PD13MD_000, PD13_IN, PD13_OUT),
+	PINMUX_DATA(D29_MARK, PD13MD_001),
+	PINMUX_DATA(PINT5_PD_MARK, PD13MD_010),
+	PINMUX_DATA(TEND1_PD_MARK, PD13MD_100),
+	PINMUX_DATA(TIOC4B_MARK, PD13MD_101),
+
+	PINMUX_DATA(PD12_DATA, PD12MD_000, PD12_IN, PD12_OUT),
+	PINMUX_DATA(D28_MARK, PD12MD_001),
+	PINMUX_DATA(PINT4_PD_MARK, PD12MD_010),
+	PINMUX_DATA(DACK1_PD_MARK, PD12MD_100),
+	PINMUX_DATA(TIOC4A_MARK, PD12MD_101),
+
+	PINMUX_DATA(PD11_DATA, PD11MD_000, PD11_IN, PD11_OUT),
+	PINMUX_DATA(D27_MARK, PD11MD_001),
+	PINMUX_DATA(PINT3_PD_MARK, PD11MD_010),
+	PINMUX_DATA(DREQ1_PD_MARK, PD11MD_100),
+	PINMUX_DATA(TIOC3D_MARK, PD11MD_101),
+
+	PINMUX_DATA(PD10_DATA, PD10MD_000, PD10_IN, PD10_OUT),
+	PINMUX_DATA(D26_MARK, PD10MD_001),
+	PINMUX_DATA(PINT2_PD_MARK, PD10MD_010),
+	PINMUX_DATA(TEND0_PD_MARK, PD10MD_100),
+	PINMUX_DATA(TIOC3C_MARK, PD10MD_101),
+
+	PINMUX_DATA(PD9_DATA, PD9MD_000, PD9_IN, PD9_OUT),
+	PINMUX_DATA(D25_MARK, PD9MD_001),
+	PINMUX_DATA(PINT1_PD_MARK, PD9MD_010),
+	PINMUX_DATA(DACK0_PD_MARK, PD9MD_100),
+	PINMUX_DATA(TIOC3B_MARK, PD9MD_101),
+
+	PINMUX_DATA(PD8_DATA, PD8MD_000, PD8_IN, PD8_OUT),
+	PINMUX_DATA(D24_MARK, PD8MD_001),
+	PINMUX_DATA(PINT0_PD_MARK, PD8MD_010),
+	PINMUX_DATA(DREQ0_PD_MARK, PD8MD_100),
+	PINMUX_DATA(TIOC3A_MARK, PD8MD_101),
+
+	PINMUX_DATA(PD7_DATA, PD7MD_000, PD7_IN, PD7_OUT),
+	PINMUX_DATA(D23_MARK, PD7MD_001),
+	PINMUX_DATA(IRQ7_PD_MARK, PD7MD_010),
+	PINMUX_DATA(SCS1_PD_MARK, PD7MD_011),
+	PINMUX_DATA(TCLKD_PD_MARK, PD7MD_100),
+	PINMUX_DATA(TIOC2B_MARK, PD7MD_101),
+
+	PINMUX_DATA(PD6_DATA, PD6MD_000, PD6_IN, PD6_OUT),
+	PINMUX_DATA(D22_MARK, PD6MD_001),
+	PINMUX_DATA(IRQ6_PD_MARK, PD6MD_010),
+	PINMUX_DATA(SSO1_PD_MARK, PD6MD_011),
+	PINMUX_DATA(TCLKC_PD_MARK, PD6MD_100),
+	PINMUX_DATA(TIOC2A_MARK, PD6MD_101),
+
+	PINMUX_DATA(PD5_DATA, PD5MD_000, PD5_IN, PD5_OUT),
+	PINMUX_DATA(D21_MARK, PD5MD_001),
+	PINMUX_DATA(IRQ5_PD_MARK, PD5MD_010),
+	PINMUX_DATA(SSI1_PD_MARK, PD5MD_011),
+	PINMUX_DATA(TCLKB_PD_MARK, PD5MD_100),
+	PINMUX_DATA(TIOC1B_MARK, PD5MD_101),
+
+	PINMUX_DATA(PD4_DATA, PD4MD_000, PD4_IN, PD4_OUT),
+	PINMUX_DATA(D20_MARK, PD4MD_001),
+	PINMUX_DATA(IRQ4_PD_MARK, PD4MD_010),
+	PINMUX_DATA(SSCK1_PD_MARK, PD4MD_011),
+	PINMUX_DATA(TCLKA_PD_MARK, PD4MD_100),
+	PINMUX_DATA(TIOC1A_MARK, PD4MD_101),
+
+	PINMUX_DATA(PD3_DATA, PD3MD_000, PD3_IN, PD3_OUT),
+	PINMUX_DATA(D19_MARK, PD3MD_001),
+	PINMUX_DATA(IRQ3_PD_MARK, PD3MD_010),
+	PINMUX_DATA(SCS0_PD_MARK, PD3MD_011),
+	PINMUX_DATA(DACK3_MARK, PD3MD_100),
+	PINMUX_DATA(TIOC0D_MARK, PD3MD_101),
+
+	PINMUX_DATA(PD2_DATA, PD2MD_000, PD2_IN, PD2_OUT),
+	PINMUX_DATA(D18_MARK, PD2MD_001),
+	PINMUX_DATA(IRQ2_PD_MARK, PD2MD_010),
+	PINMUX_DATA(SSO0_PD_MARK, PD2MD_011),
+	PINMUX_DATA(DREQ3_MARK, PD2MD_100),
+	PINMUX_DATA(TIOC0C_MARK, PD2MD_101),
+
+	PINMUX_DATA(PD1_DATA, PD1MD_000, PD1_IN, PD1_OUT),
+	PINMUX_DATA(D17_MARK, PD1MD_001),
+	PINMUX_DATA(IRQ1_PD_MARK, PD1MD_010),
+	PINMUX_DATA(SSI0_PD_MARK, PD1MD_011),
+	PINMUX_DATA(DACK2_MARK, PD1MD_100),
+	PINMUX_DATA(TIOC0B_MARK, PD1MD_101),
+
+	PINMUX_DATA(PD0_DATA, PD0MD_000, PD0_IN, PD0_OUT),
+	PINMUX_DATA(D16_MARK, PD0MD_001),
+	PINMUX_DATA(IRQ0_PD_MARK, PD0MD_010),
+	PINMUX_DATA(SSCK0_PD_MARK, PD0MD_011),
+	PINMUX_DATA(DREQ2_MARK, PD0MD_100),
+	PINMUX_DATA(TIOC0A_MARK, PD0MD_101),
+
+	/* PE */
+	PINMUX_DATA(PE15_DATA, PE15MD_00, PE15_IN, PE15_OUT),
+	PINMUX_DATA(IOIS16_MARK, PE15MD_01),
+	PINMUX_DATA(RTS3_MARK, PE15MD_11),
+
+	PINMUX_DATA(PE14_DATA, PE14MD_00, PE14_IN, PE14_OUT),
+	PINMUX_DATA(CS1_MARK, PE14MD_01),
+	PINMUX_DATA(CTS3_MARK, PE14MD_11),
+
+	PINMUX_DATA(PE13_DATA, PE13MD_00, PE13_IN, PE13_OUT),
+	PINMUX_DATA(TXD3_MARK, PE13MD_11),
+
+	PINMUX_DATA(PE12_DATA, PE12MD_00, PE12_IN, PE12_OUT),
+	PINMUX_DATA(RXD3_MARK, PE12MD_11),
+
+	PINMUX_DATA(PE11_DATA, PE11MD_000, PE11_IN, PE11_OUT),
+	PINMUX_DATA(CS6_CE1B_MARK, PE11MD_001),
+	PINMUX_DATA(IRQ7_PE_MARK, PE11MD_010),
+	PINMUX_DATA(TEND1_PE_MARK, PE11MD_100),
+
+	PINMUX_DATA(PE10_DATA, PE10MD_000, PE10_IN, PE10_OUT),
+	PINMUX_DATA(CE2B_MARK, PE10MD_001),
+	PINMUX_DATA(IRQ6_PE_MARK, PE10MD_010),
+	PINMUX_DATA(TEND0_PE_MARK, PE10MD_100),
+
+	PINMUX_DATA(PE9_DATA, PE9MD_00, PE9_IN, PE9_OUT),
+	PINMUX_DATA(CS5_CE1A_MARK, PE9MD_01),
+	PINMUX_DATA(IRQ5_PE_MARK, PE9MD_10),
+	PINMUX_DATA(SCK3_MARK, PE9MD_11),
+
+	PINMUX_DATA(PE8_DATA, PE8MD_00, PE8_IN, PE8_OUT),
+	PINMUX_DATA(CE2A_MARK, PE8MD_01),
+	PINMUX_DATA(IRQ4_PE_MARK, PE8MD_10),
+	PINMUX_DATA(SCK2_MARK, PE8MD_11),
+
+	PINMUX_DATA(PE7_DATA, PE7MD_000, PE7_IN, PE7_OUT),
+	PINMUX_DATA(FRAME_MARK, PE7MD_001),
+	PINMUX_DATA(IRQ3_PE_MARK, PE7MD_010),
+	PINMUX_DATA(TXD2_MARK, PE7MD_011),
+	PINMUX_DATA(DACK1_PE_MARK, PE7MD_100),
+
+	PINMUX_DATA(PE6_DATA, PE6MD_000, PE6_IN, PE6_OUT),
+	PINMUX_DATA(A25_MARK, PE6MD_001),
+	PINMUX_DATA(IRQ2_PE_MARK, PE6MD_010),
+	PINMUX_DATA(RXD2_MARK, PE6MD_011),
+	PINMUX_DATA(DREQ1_PE_MARK, PE6MD_100),
+
+	PINMUX_DATA(PE5_DATA, PE5MD_000, PE5_IN, PE5_OUT),
+	PINMUX_DATA(A24_MARK, PE5MD_001),
+	PINMUX_DATA(IRQ1_PE_MARK, PE5MD_010),
+	PINMUX_DATA(TXD1_MARK, PE5MD_011),
+	PINMUX_DATA(DACK0_PE_MARK, PE5MD_100),
+
+	PINMUX_DATA(PE4_DATA, PE4MD_000, PE4_IN, PE4_OUT),
+	PINMUX_DATA(A23_MARK, PE4MD_001),
+	PINMUX_DATA(IRQ0_PE_MARK, PE4MD_010),
+	PINMUX_DATA(RXD1_MARK, PE4MD_011),
+	PINMUX_DATA(DREQ0_PE_MARK, PE4MD_100),
+
+	PINMUX_DATA(PE3_DATA, PE3MD_00, PE3_IN, PE3_OUT),
+	PINMUX_DATA(A22_MARK, PE3MD_01),
+	PINMUX_DATA(SCK1_MARK, PE3MD_11),
+
+	PINMUX_DATA(PE2_DATA, PE2MD_00, PE2_IN, PE2_OUT),
+	PINMUX_DATA(A21_MARK, PE2MD_01),
+	PINMUX_DATA(SCK0_MARK, PE2MD_11),
+
+	PINMUX_DATA(PE1_DATA, PE1MD_00, PE1_IN, PE1_OUT),
+	PINMUX_DATA(CS4_MARK, PE1MD_01),
+	PINMUX_DATA(MRES_MARK, PE1MD_10),
+	PINMUX_DATA(TXD0_MARK, PE1MD_11),
+
+	PINMUX_DATA(PE0_DATA, PE0MD_000, PE0_IN, PE0_OUT),
+	PINMUX_DATA(BS_MARK, PE0MD_001),
+	PINMUX_DATA(RXD0_MARK, PE0MD_011),
+	PINMUX_DATA(ADTRG_PE_MARK, PE0MD_100),
+
+	/* PF */
+	PINMUX_DATA(PF30_DATA, PF30MD_0, PF30_IN, PF30_OUT),
+	PINMUX_DATA(AUDIO_CLK_MARK, PF30MD_1),
+
+	PINMUX_DATA(PF29_DATA, PF29MD_0, PF29_IN, PF29_OUT),
+	PINMUX_DATA(SSIDATA3_MARK, PF29MD_1),
+
+	PINMUX_DATA(PF28_DATA, PF28MD_0, PF28_IN, PF28_OUT),
+	PINMUX_DATA(SSIWS3_MARK, PF28MD_1),
+
+	PINMUX_DATA(PF27_DATA, PF27MD_0, PF27_IN, PF27_OUT),
+	PINMUX_DATA(SSISCK3_MARK, PF27MD_1),
+
+	PINMUX_DATA(PF26_DATA, PF26MD_0, PF26_IN, PF26_OUT),
+	PINMUX_DATA(SSIDATA2_MARK, PF26MD_1),
+
+	PINMUX_DATA(PF25_DATA, PF25MD_0, PF25_IN, PF25_OUT),
+	PINMUX_DATA(SSIWS2_MARK, PF25MD_1),
+
+	PINMUX_DATA(PF24_DATA, PF24MD_0, PF24_IN, PF24_OUT),
+	PINMUX_DATA(SSISCK2_MARK, PF24MD_1),
+
+	PINMUX_DATA(PF23_DATA, PF23MD_00, PF23_IN, PF23_OUT),
+	PINMUX_DATA(SSIDATA1_MARK, PF23MD_01),
+	PINMUX_DATA(LCD_VEPWC_MARK, PF23MD_10),
+
+	PINMUX_DATA(PF22_DATA, PF22MD_00, PF22_IN, PF22_OUT),
+	PINMUX_DATA(SSIWS1_MARK, PF22MD_01),
+	PINMUX_DATA(LCD_VCPWC_MARK, PF22MD_10),
+
+	PINMUX_DATA(PF21_DATA, PF21MD_00, PF21_IN, PF21_OUT),
+	PINMUX_DATA(SSISCK1_MARK, PF21MD_01),
+	PINMUX_DATA(LCD_CLK_MARK, PF21MD_10),
+
+	PINMUX_DATA(PF20_DATA, PF20MD_00, PF20_IN, PF20_OUT),
+	PINMUX_DATA(SSIDATA0_MARK, PF20MD_01),
+	PINMUX_DATA(LCD_FLM_MARK, PF20MD_10),
+
+	PINMUX_DATA(PF19_DATA, PF19MD_00, PF19_IN, PF19_OUT),
+	PINMUX_DATA(SSIWS0_MARK, PF19MD_01),
+	PINMUX_DATA(LCD_M_DISP_MARK, PF19MD_10),
+
+	PINMUX_DATA(PF18_DATA, PF18MD_00, PF18_IN, PF18_OUT),
+	PINMUX_DATA(SSISCK0_MARK, PF18MD_01),
+	PINMUX_DATA(LCD_CL2_MARK, PF18MD_10),
+
+	PINMUX_DATA(PF17_DATA, PF17MD_00, PF17_IN, PF17_OUT),
+	PINMUX_DATA(FCE_MARK, PF17MD_01),
+	PINMUX_DATA(LCD_CL1_MARK, PF17MD_10),
+
+	PINMUX_DATA(PF16_DATA, PF16MD_00, PF16_IN, PF16_OUT),
+	PINMUX_DATA(FRB_MARK, PF16MD_01),
+	PINMUX_DATA(LCD_DON_MARK, PF16MD_10),
+
+	PINMUX_DATA(PF15_DATA, PF15MD_00, PF15_IN, PF15_OUT),
+	PINMUX_DATA(NAF7_MARK, PF15MD_01),
+	PINMUX_DATA(LCD_DATA15_MARK, PF15MD_10),
+
+	PINMUX_DATA(PF14_DATA, PF14MD_00, PF14_IN, PF14_OUT),
+	PINMUX_DATA(NAF6_MARK, PF14MD_01),
+	PINMUX_DATA(LCD_DATA14_MARK, PF14MD_10),
+
+	PINMUX_DATA(PF13_DATA, PF13MD_00, PF13_IN, PF13_OUT),
+	PINMUX_DATA(NAF5_MARK, PF13MD_01),
+	PINMUX_DATA(LCD_DATA13_MARK, PF13MD_10),
+
+	PINMUX_DATA(PF12_DATA, PF12MD_00, PF12_IN, PF12_OUT),
+	PINMUX_DATA(NAF4_MARK, PF12MD_01),
+	PINMUX_DATA(LCD_DATA12_MARK, PF12MD_10),
+
+	PINMUX_DATA(PF11_DATA, PF11MD_00, PF11_IN, PF11_OUT),
+	PINMUX_DATA(NAF3_MARK, PF11MD_01),
+	PINMUX_DATA(LCD_DATA11_MARK, PF11MD_10),
+
+	PINMUX_DATA(PF10_DATA, PF10MD_00, PF10_IN, PF10_OUT),
+	PINMUX_DATA(NAF2_MARK, PF10MD_01),
+	PINMUX_DATA(LCD_DATA10_MARK, PF10MD_10),
+
+	PINMUX_DATA(PF9_DATA, PF9MD_00, PF9_IN, PF9_OUT),
+	PINMUX_DATA(NAF1_MARK, PF9MD_01),
+	PINMUX_DATA(LCD_DATA9_MARK, PF9MD_10),
+
+	PINMUX_DATA(PF8_DATA, PF8MD_00, PF8_IN, PF8_OUT),
+	PINMUX_DATA(NAF0_MARK, PF8MD_01),
+	PINMUX_DATA(LCD_DATA8_MARK, PF8MD_10),
+
+	PINMUX_DATA(PF7_DATA, PF7MD_00, PF7_IN, PF7_OUT),
+	PINMUX_DATA(FSC_MARK, PF7MD_01),
+	PINMUX_DATA(LCD_DATA7_MARK, PF7MD_10),
+	PINMUX_DATA(SCS1_PF_MARK, PF7MD_11),
+
+	PINMUX_DATA(PF6_DATA, PF6MD_00, PF6_IN, PF6_OUT),
+	PINMUX_DATA(FOE_MARK, PF6MD_01),
+	PINMUX_DATA(LCD_DATA6_MARK, PF6MD_10),
+	PINMUX_DATA(SSO1_PF_MARK, PF6MD_11),
+
+	PINMUX_DATA(PF5_DATA, PF5MD_00, PF5_IN, PF5_OUT),
+	PINMUX_DATA(FCDE_MARK, PF5MD_01),
+	PINMUX_DATA(LCD_DATA5_MARK, PF5MD_10),
+	PINMUX_DATA(SSI1_PF_MARK, PF5MD_11),
+
+	PINMUX_DATA(PF4_DATA, PF4MD_00, PF4_IN, PF4_OUT),
+	PINMUX_DATA(FWE_MARK, PF4MD_01),
+	PINMUX_DATA(LCD_DATA4_MARK, PF4MD_10),
+	PINMUX_DATA(SSCK1_PF_MARK, PF4MD_11),
+
+	PINMUX_DATA(PF3_DATA, PF3MD_00, PF3_IN, PF3_OUT),
+	PINMUX_DATA(TCLKD_PF_MARK, PF3MD_01),
+	PINMUX_DATA(LCD_DATA3_MARK, PF3MD_10),
+	PINMUX_DATA(SCS0_PF_MARK, PF3MD_11),
+
+	PINMUX_DATA(PF2_DATA, PF2MD_00, PF2_IN, PF2_OUT),
+	PINMUX_DATA(TCLKC_PF_MARK, PF2MD_01),
+	PINMUX_DATA(LCD_DATA2_MARK, PF2MD_10),
+	PINMUX_DATA(SSO0_PF_MARK, PF2MD_11),
+
+	PINMUX_DATA(PF1_DATA, PF1MD_00, PF1_IN, PF1_OUT),
+	PINMUX_DATA(TCLKB_PF_MARK, PF1MD_01),
+	PINMUX_DATA(LCD_DATA1_MARK, PF1MD_10),
+	PINMUX_DATA(SSI0_PF_MARK, PF1MD_11),
+
+	PINMUX_DATA(PF0_DATA, PF0MD_00, PF0_IN, PF0_OUT),
+	PINMUX_DATA(TCLKA_PF_MARK, PF0MD_01),
+	PINMUX_DATA(LCD_DATA0_MARK, PF0MD_10),
+	PINMUX_DATA(SSCK0_PF_MARK, PF0MD_11),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+
+	/* PA */
+	PINMUX_GPIO(GPIO_PA7, PA7_DATA),
+	PINMUX_GPIO(GPIO_PA6, PA6_DATA),
+	PINMUX_GPIO(GPIO_PA5, PA5_DATA),
+	PINMUX_GPIO(GPIO_PA4, PA4_DATA),
+	PINMUX_GPIO(GPIO_PA3, PA3_DATA),
+	PINMUX_GPIO(GPIO_PA2, PA2_DATA),
+	PINMUX_GPIO(GPIO_PA1, PA1_DATA),
+	PINMUX_GPIO(GPIO_PA0, PA0_DATA),
+
+	/* PB */
+	PINMUX_GPIO(GPIO_PB12, PB12_DATA),
+	PINMUX_GPIO(GPIO_PB11, PB11_DATA),
+	PINMUX_GPIO(GPIO_PB10, PB10_DATA),
+	PINMUX_GPIO(GPIO_PB9, PB9_DATA),
+	PINMUX_GPIO(GPIO_PB8, PB8_DATA),
+	PINMUX_GPIO(GPIO_PB7, PB7_DATA),
+	PINMUX_GPIO(GPIO_PB6, PB6_DATA),
+	PINMUX_GPIO(GPIO_PB5, PB5_DATA),
+	PINMUX_GPIO(GPIO_PB4, PB4_DATA),
+	PINMUX_GPIO(GPIO_PB3, PB3_DATA),
+	PINMUX_GPIO(GPIO_PB2, PB2_DATA),
+	PINMUX_GPIO(GPIO_PB1, PB1_DATA),
+	PINMUX_GPIO(GPIO_PB0, PB0_DATA),
+
+	/* PC */
+	PINMUX_GPIO(GPIO_PC14, PC14_DATA),
+	PINMUX_GPIO(GPIO_PC13, PC13_DATA),
+	PINMUX_GPIO(GPIO_PC12, PC12_DATA),
+	PINMUX_GPIO(GPIO_PC11, PC11_DATA),
+	PINMUX_GPIO(GPIO_PC10, PC10_DATA),
+	PINMUX_GPIO(GPIO_PC9, PC9_DATA),
+	PINMUX_GPIO(GPIO_PC8, PC8_DATA),
+	PINMUX_GPIO(GPIO_PC7, PC7_DATA),
+	PINMUX_GPIO(GPIO_PC6, PC6_DATA),
+	PINMUX_GPIO(GPIO_PC5, PC5_DATA),
+	PINMUX_GPIO(GPIO_PC4, PC4_DATA),
+	PINMUX_GPIO(GPIO_PC3, PC3_DATA),
+	PINMUX_GPIO(GPIO_PC2, PC2_DATA),
+	PINMUX_GPIO(GPIO_PC1, PC1_DATA),
+	PINMUX_GPIO(GPIO_PC0, PC0_DATA),
+
+	/* PD */
+	PINMUX_GPIO(GPIO_PD15, PD15_DATA),
+	PINMUX_GPIO(GPIO_PD14, PD14_DATA),
+	PINMUX_GPIO(GPIO_PD13, PD13_DATA),
+	PINMUX_GPIO(GPIO_PD12, PD12_DATA),
+	PINMUX_GPIO(GPIO_PD11, PD11_DATA),
+	PINMUX_GPIO(GPIO_PD10, PD10_DATA),
+	PINMUX_GPIO(GPIO_PD9, PD9_DATA),
+	PINMUX_GPIO(GPIO_PD8, PD8_DATA),
+	PINMUX_GPIO(GPIO_PD7, PD7_DATA),
+	PINMUX_GPIO(GPIO_PD6, PD6_DATA),
+	PINMUX_GPIO(GPIO_PD5, PD5_DATA),
+	PINMUX_GPIO(GPIO_PD4, PD4_DATA),
+	PINMUX_GPIO(GPIO_PD3, PD3_DATA),
+	PINMUX_GPIO(GPIO_PD2, PD2_DATA),
+	PINMUX_GPIO(GPIO_PD1, PD1_DATA),
+	PINMUX_GPIO(GPIO_PD0, PD0_DATA),
+
+	/* PE */
+	PINMUX_GPIO(GPIO_PE15, PE15_DATA),
+	PINMUX_GPIO(GPIO_PE14, PE14_DATA),
+	PINMUX_GPIO(GPIO_PE13, PE13_DATA),
+	PINMUX_GPIO(GPIO_PE12, PE12_DATA),
+	PINMUX_GPIO(GPIO_PE11, PE11_DATA),
+	PINMUX_GPIO(GPIO_PE10, PE10_DATA),
+	PINMUX_GPIO(GPIO_PE9, PE9_DATA),
+	PINMUX_GPIO(GPIO_PE8, PE8_DATA),
+	PINMUX_GPIO(GPIO_PE7, PE7_DATA),
+	PINMUX_GPIO(GPIO_PE6, PE6_DATA),
+	PINMUX_GPIO(GPIO_PE5, PE5_DATA),
+	PINMUX_GPIO(GPIO_PE4, PE4_DATA),
+	PINMUX_GPIO(GPIO_PE3, PE3_DATA),
+	PINMUX_GPIO(GPIO_PE2, PE2_DATA),
+	PINMUX_GPIO(GPIO_PE1, PE1_DATA),
+	PINMUX_GPIO(GPIO_PE0, PE0_DATA),
+
+	/* PF */
+	PINMUX_GPIO(GPIO_PF30, PF30_DATA),
+	PINMUX_GPIO(GPIO_PF29, PF29_DATA),
+	PINMUX_GPIO(GPIO_PF28, PF28_DATA),
+	PINMUX_GPIO(GPIO_PF27, PF27_DATA),
+	PINMUX_GPIO(GPIO_PF26, PF26_DATA),
+	PINMUX_GPIO(GPIO_PF25, PF25_DATA),
+	PINMUX_GPIO(GPIO_PF24, PF24_DATA),
+	PINMUX_GPIO(GPIO_PF23, PF23_DATA),
+	PINMUX_GPIO(GPIO_PF22, PF22_DATA),
+	PINMUX_GPIO(GPIO_PF21, PF21_DATA),
+	PINMUX_GPIO(GPIO_PF20, PF20_DATA),
+	PINMUX_GPIO(GPIO_PF19, PF19_DATA),
+	PINMUX_GPIO(GPIO_PF18, PF18_DATA),
+	PINMUX_GPIO(GPIO_PF17, PF17_DATA),
+	PINMUX_GPIO(GPIO_PF16, PF16_DATA),
+	PINMUX_GPIO(GPIO_PF15, PF15_DATA),
+	PINMUX_GPIO(GPIO_PF14, PF14_DATA),
+	PINMUX_GPIO(GPIO_PF13, PF13_DATA),
+	PINMUX_GPIO(GPIO_PF12, PF12_DATA),
+	PINMUX_GPIO(GPIO_PF11, PF11_DATA),
+	PINMUX_GPIO(GPIO_PF10, PF10_DATA),
+	PINMUX_GPIO(GPIO_PF9, PF9_DATA),
+	PINMUX_GPIO(GPIO_PF8, PF8_DATA),
+	PINMUX_GPIO(GPIO_PF7, PF7_DATA),
+	PINMUX_GPIO(GPIO_PF6, PF6_DATA),
+	PINMUX_GPIO(GPIO_PF5, PF5_DATA),
+	PINMUX_GPIO(GPIO_PF4, PF4_DATA),
+	PINMUX_GPIO(GPIO_PF3, PF3_DATA),
+	PINMUX_GPIO(GPIO_PF2, PF2_DATA),
+	PINMUX_GPIO(GPIO_PF1, PF1_DATA),
+	PINMUX_GPIO(GPIO_PF0, PF0_DATA),
+
+	/* INTC */
+	PINMUX_GPIO(GPIO_FN_PINT7_PB, PINT7_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT6_PB, PINT6_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT5_PB, PINT5_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT4_PB, PINT4_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT3_PB, PINT3_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT2_PB, PINT2_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT1_PB, PINT1_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT0_PB, PINT0_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT7_PD, PINT7_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT6_PD, PINT6_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT5_PD, PINT5_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT4_PD, PINT4_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT3_PD, PINT3_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT2_PD, PINT2_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT1_PD, PINT1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_PINT0_PD, PINT0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7_PB, IRQ7_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6_PB, IRQ6_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5_PB, IRQ5_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4_PB, IRQ4_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3_PB, IRQ3_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2_PB, IRQ2_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1_PB, IRQ1_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ0_PB, IRQ0_PB_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7_PD, IRQ7_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6_PD, IRQ6_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5_PD, IRQ5_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4_PD, IRQ4_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3_PD, IRQ3_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2_PD, IRQ2_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1_PD, IRQ1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ0_PD, IRQ0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7_PE, IRQ7_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6_PE, IRQ6_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5_PE, IRQ5_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4_PE, IRQ4_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3_PE, IRQ3_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2_PE, IRQ2_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1_PE, IRQ1_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ0_PE, IRQ0_PE_MARK),
+
+	PINMUX_GPIO(GPIO_FN_WDTOVF, WDTOVF_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQOUT, IRQOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_REFOUT, REFOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQOUT_REFOUT, IRQOUT_REFOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_UBCTRG, UBCTRG_MARK),
+
+	/* CAN */
+	PINMUX_GPIO(GPIO_FN_CTX1, CTX1_MARK),
+	PINMUX_GPIO(GPIO_FN_CRX1, CRX1_MARK),
+	PINMUX_GPIO(GPIO_FN_CTX0, CTX0_MARK),
+	PINMUX_GPIO(GPIO_FN_CTX0_CTX1, CTX0_CTX1_MARK),
+	PINMUX_GPIO(GPIO_FN_CRX0, CRX0_MARK),
+	PINMUX_GPIO(GPIO_FN_CRX0_CRX1, CRX0_CRX1_MARK),
+
+	/* IIC3 */
+	PINMUX_GPIO(GPIO_FN_SDA3, SDA3_MARK),
+	PINMUX_GPIO(GPIO_FN_SCL3, SCL3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDA2, SDA2_MARK),
+	PINMUX_GPIO(GPIO_FN_SCL2, SCL2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDA1, SDA1_MARK),
+	PINMUX_GPIO(GPIO_FN_SCL1, SCL1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDA0, SDA0_MARK),
+	PINMUX_GPIO(GPIO_FN_SCL0, SCL0_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_TEND0_PD, TEND0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND0_PE, TEND0_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK0_PD, DACK0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK0_PE, DACK0_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0_PD, DREQ0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0_PE, DREQ0_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND1_PD, TEND1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND1_PE, TEND1_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK1_PD, DACK1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK1_PE, DACK1_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ1_PD, DREQ1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ1_PE, DREQ1_PE_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK2, DACK2_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ2, DREQ2_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK3, DACK3_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ3, DREQ3_MARK),
+
+	/* ADC */
+	PINMUX_GPIO(GPIO_FN_ADTRG_PD, ADTRG_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_ADTRG_PE, ADTRG_PE_MARK),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_D31, D31_MARK),
+	PINMUX_GPIO(GPIO_FN_D30, D30_MARK),
+	PINMUX_GPIO(GPIO_FN_D29, D29_MARK),
+	PINMUX_GPIO(GPIO_FN_D28, D28_MARK),
+	PINMUX_GPIO(GPIO_FN_D27, D27_MARK),
+	PINMUX_GPIO(GPIO_FN_D26, D26_MARK),
+	PINMUX_GPIO(GPIO_FN_D25, D25_MARK),
+	PINMUX_GPIO(GPIO_FN_D24, D24_MARK),
+	PINMUX_GPIO(GPIO_FN_D23, D23_MARK),
+	PINMUX_GPIO(GPIO_FN_D22, D22_MARK),
+	PINMUX_GPIO(GPIO_FN_D21, D21_MARK),
+	PINMUX_GPIO(GPIO_FN_D20, D20_MARK),
+	PINMUX_GPIO(GPIO_FN_D19, D19_MARK),
+	PINMUX_GPIO(GPIO_FN_D18, D18_MARK),
+	PINMUX_GPIO(GPIO_FN_D17, D17_MARK),
+	PINMUX_GPIO(GPIO_FN_D16, D16_MARK),
+	PINMUX_GPIO(GPIO_FN_A25, A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24, A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23, A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22, A22_MARK),
+	PINMUX_GPIO(GPIO_FN_A21, A21_MARK),
+	PINMUX_GPIO(GPIO_FN_CS4, CS4_MARK),
+	PINMUX_GPIO(GPIO_FN_MRES, MRES_MARK),
+	PINMUX_GPIO(GPIO_FN_BS, BS_MARK),
+	PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_CS1, CS1_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6_CE1B, CS6_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_CE2B, CE2B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5_CE1A, CS5_CE1A_MARK),
+	PINMUX_GPIO(GPIO_FN_CE2A, CE2A_MARK),
+	PINMUX_GPIO(GPIO_FN_FRAME, FRAME_MARK),
+	PINMUX_GPIO(GPIO_FN_WAIT, WAIT_MARK),
+	PINMUX_GPIO(GPIO_FN_RDWR, RDWR_MARK),
+	PINMUX_GPIO(GPIO_FN_CKE, CKE_MARK),
+	PINMUX_GPIO(GPIO_FN_CASU, CASU_MARK),
+	PINMUX_GPIO(GPIO_FN_BREQ, BREQ_MARK),
+	PINMUX_GPIO(GPIO_FN_RASU, RASU_MARK),
+	PINMUX_GPIO(GPIO_FN_BACK, BACK_MARK),
+	PINMUX_GPIO(GPIO_FN_CASL, CASL_MARK),
+	PINMUX_GPIO(GPIO_FN_RASL, RASL_MARK),
+	PINMUX_GPIO(GPIO_FN_WE3_DQMUU_AH_ICIO_WR, WE3_DQMUU_AH_ICIO_WR_MARK),
+	PINMUX_GPIO(GPIO_FN_WE2_DQMUL_ICIORD, WE2_DQMUL_ICIORD_MARK),
+	PINMUX_GPIO(GPIO_FN_WE1_DQMLU_WE, WE1_DQMLU_WE_MARK),
+	PINMUX_GPIO(GPIO_FN_WE0_DQMLL, WE0_DQMLL_MARK),
+	PINMUX_GPIO(GPIO_FN_CS3, CS3_MARK),
+	PINMUX_GPIO(GPIO_FN_CS2, CS2_MARK),
+	PINMUX_GPIO(GPIO_FN_A1, A1_MARK),
+	PINMUX_GPIO(GPIO_FN_A0, A0_MARK),
+	PINMUX_GPIO(GPIO_FN_CS7, CS7_MARK),
+
+	/* TMU */
+	PINMUX_GPIO(GPIO_FN_TIOC4D, TIOC4D_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC4C, TIOC4C_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC4B, TIOC4B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC4A, TIOC4A_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC3D, TIOC3D_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC3C, TIOC3C_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC3B, TIOC3B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC3A, TIOC3A_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC2B, TIOC2B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC1B, TIOC1B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC2A, TIOC2A_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC1A, TIOC1A_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC0D, TIOC0D_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC0C, TIOC0C_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC0B, TIOC0B_MARK),
+	PINMUX_GPIO(GPIO_FN_TIOC0A, TIOC0A_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKD_PD, TCLKD_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKC_PD, TCLKC_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKB_PD, TCLKB_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKA_PD, TCLKA_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKD_PF, TCLKD_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKC_PF, TCLKC_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKB_PF, TCLKB_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_TCLKA_PF, TCLKA_PF_MARK),
+
+	/* SSU */
+	PINMUX_GPIO(GPIO_FN_SCS0_PD, SCS0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSO0_PD, SSO0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSI0_PD, SSI0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSCK0_PD, SSCK0_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCS0_PF, SCS0_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSO0_PF, SSO0_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSI0_PF, SSI0_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSCK0_PF, SSCK0_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SCS1_PD, SCS1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSO1_PD, SSO1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSI1_PD, SSI1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SSCK1_PD, SSCK1_PD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCS1_PF, SCS1_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSO1_PF, SSO1_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSI1_PF, SSI1_PF_MARK),
+	PINMUX_GPIO(GPIO_FN_SSCK1_PF, SSCK1_PF_MARK),
+
+	/* SCIF */
+	PINMUX_GPIO(GPIO_FN_TXD0, TXD0_MARK),
+	PINMUX_GPIO(GPIO_FN_RXD0, RXD0_MARK),
+	PINMUX_GPIO(GPIO_FN_SCK0, SCK0_MARK),
+	PINMUX_GPIO(GPIO_FN_TXD1, TXD1_MARK),
+	PINMUX_GPIO(GPIO_FN_RXD1, RXD1_MARK),
+	PINMUX_GPIO(GPIO_FN_SCK1, SCK1_MARK),
+	PINMUX_GPIO(GPIO_FN_TXD2, TXD2_MARK),
+	PINMUX_GPIO(GPIO_FN_RXD2, RXD2_MARK),
+	PINMUX_GPIO(GPIO_FN_SCK2, SCK2_MARK),
+	PINMUX_GPIO(GPIO_FN_RTS3, RTS3_MARK),
+	PINMUX_GPIO(GPIO_FN_CTS3, CTS3_MARK),
+	PINMUX_GPIO(GPIO_FN_TXD3, TXD3_MARK),
+	PINMUX_GPIO(GPIO_FN_RXD3, RXD3_MARK),
+	PINMUX_GPIO(GPIO_FN_SCK3, SCK3_MARK),
+
+	/* SSI */
+	PINMUX_GPIO(GPIO_FN_AUDIO_CLK, AUDIO_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIDATA3, SSIDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIWS3, SSIWS3_MARK),
+	PINMUX_GPIO(GPIO_FN_SSISCK3, SSISCK3_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIDATA2, SSIDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIWS2, SSIWS2_MARK),
+	PINMUX_GPIO(GPIO_FN_SSISCK2, SSISCK2_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIDATA1, SSIDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIWS1, SSIWS1_MARK),
+	PINMUX_GPIO(GPIO_FN_SSISCK1, SSISCK1_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIDATA0, SSIDATA0_MARK),
+	PINMUX_GPIO(GPIO_FN_SSIWS0, SSIWS0_MARK),
+	PINMUX_GPIO(GPIO_FN_SSISCK0, SSISCK0_MARK),
+
+	/* FLCTL */
+	PINMUX_GPIO(GPIO_FN_FCE, FCE_MARK),
+	PINMUX_GPIO(GPIO_FN_FRB, FRB_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF7, NAF7_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF6, NAF6_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF5, NAF5_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF4, NAF4_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF3, NAF3_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF2, NAF2_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF1, NAF1_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF0, NAF0_MARK),
+	PINMUX_GPIO(GPIO_FN_FSC, FSC_MARK),
+	PINMUX_GPIO(GPIO_FN_FOE, FOE_MARK),
+	PINMUX_GPIO(GPIO_FN_FCDE, FCDE_MARK),
+	PINMUX_GPIO(GPIO_FN_FWE, FWE_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCD_VEPWC, LCD_VEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_VCPWC, LCD_VCPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CLK, LCD_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_FLM, LCD_FLM_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_M_DISP, LCD_M_DISP_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CL2, LCD_CL2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CL1, LCD_CL1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DON, LCD_DON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA15, LCD_DATA15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA14, LCD_DATA14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA13, LCD_DATA13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA12, LCD_DATA12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA11, LCD_DATA11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA10, LCD_DATA10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA9, LCD_DATA9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA8, LCD_DATA8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA7, LCD_DATA7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA6, LCD_DATA6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA5, LCD_DATA5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA4, LCD_DATA4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA3, LCD_DATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA2, LCD_DATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA1, LCD_DATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA0, LCD_DATA0_MARK),
+};
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PBIORL", 0xfffe3886, 16, 1) {
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PB11_IN, PB11_OUT,
+		PB10_IN, PB10_OUT,
+		PB9_IN, PB9_OUT,
+		PB8_IN, PB8_OUT,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{ PINMUX_CFG_REG("PBCRL4", 0xfffe3890, 16, 4) {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB12MD_00, PB12MD_01, PB12MD_10, PB12MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PBCRL3", 0xfffe3892, 16, 4) {
+		PB11MD_0, PB11MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB10MD_0, PB10MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB9MD_00, PB9MD_01, PB9MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB8MD_00, PB8MD_01, PB8MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PBCRL2", 0xfffe3894, 16, 4) {
+		PB7MD_00, PB7MD_01, PB7MD_10, PB7MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB6MD_00, PB6MD_01, PB6MD_10, PB6MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB5MD_00, PB5MD_01, PB5MD_10, PB5MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB4MD_00, PB4MD_01, PB4MD_10, PB4MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PBCRL1", 0xfffe3896, 16, 4) {
+		PB3MD_00, PB3MD_01, PB3MD_10, PB3MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB2MD_00, PB2MD_01, PB2MD_10, PB2MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB1MD_00, PB1MD_01, PB1MD_10, PB1MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB0MD_00, PB0MD_01, PB0MD_10, PB0MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("IFCR", 0xfffe38a2, 16, 4) {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PB12IRQ_00, PB12IRQ_01, PB12IRQ_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PCIORL", 0xfffe3906, 16, 1) {
+		0, 0,
+		PC14_IN, PC14_OUT,
+		PC13_IN, PC13_OUT,
+		PC12_IN, PC12_OUT,
+		PC11_IN, PC11_OUT,
+		PC10_IN, PC10_OUT,
+		PC9_IN, PC9_OUT,
+		PC8_IN, PC8_OUT,
+		PC7_IN, PC7_OUT,
+		PC6_IN, PC6_OUT,
+		PC5_IN, PC5_OUT,
+		PC4_IN, PC4_OUT,
+		PC3_IN, PC3_OUT,
+		PC2_IN, PC2_OUT,
+		PC1_IN, PC1_OUT,
+		PC0_IN, PC0_OUT }
+	},
+	{ PINMUX_CFG_REG("PCCRL4", 0xfffe3910, 16, 4) {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC14MD_0, PC14MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC13MD_0, PC13MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC12MD_0, PC12MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PCCRL3", 0xfffe3912, 16, 4) {
+		PC11MD_00, PC11MD_01, PC11MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC10MD_00, PC10MD_01, PC10MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC9MD_0, PC9MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC8MD_0, PC8MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PCCRL2", 0xfffe3914, 16, 4) {
+		PC7MD_0, PC7MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC6MD_0, PC6MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC5MD_0, PC5MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC4MD_0, PC4MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PCCRL1", 0xfffe3916, 16, 4) {
+		PC3MD_0, PC3MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC2MD_0, PC2MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC1MD_0, PC1MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PC0MD_00, PC0MD_01, PC0MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PDIORL", 0xfffe3986, 16, 1) {
+		PD15_IN, PD15_OUT,
+		PD14_IN, PD14_OUT,
+		PD13_IN, PD13_OUT,
+		PD12_IN, PD12_OUT,
+		PD11_IN, PD11_OUT,
+		PD10_IN, PD10_OUT,
+		PD9_IN, PD9_OUT,
+		PD8_IN, PD8_OUT,
+		PD7_IN, PD7_OUT,
+		PD6_IN, PD6_OUT,
+		PD5_IN, PD5_OUT,
+		PD4_IN, PD4_OUT,
+		PD3_IN, PD3_OUT,
+		PD2_IN, PD2_OUT,
+		PD1_IN, PD1_OUT,
+		PD0_IN, PD0_OUT }
+	},
+	{ PINMUX_CFG_REG("PDCRL4", 0xfffe3990, 16, 4) {
+		PD15MD_000, PD15MD_001, PD15MD_010, 0,
+		PD15MD_100, PD15MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD14MD_000, PD14MD_001, PD14MD_010, 0,
+		0, PD14MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD13MD_000, PD13MD_001, PD13MD_010, 0,
+		PD13MD_100, PD13MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD12MD_000, PD12MD_001, PD12MD_010, 0,
+		PD12MD_100, PD12MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PDCRL3", 0xfffe3992, 16, 4) {
+		PD11MD_000, PD11MD_001, PD11MD_010, 0,
+		PD11MD_100, PD11MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD10MD_000, PD10MD_001, PD10MD_010, 0,
+		PD10MD_100, PD10MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD9MD_000, PD9MD_001, PD9MD_010, 0,
+		PD9MD_100, PD9MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD8MD_000, PD8MD_001, PD8MD_010, 0,
+		PD8MD_100, PD8MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PDCRL2", 0xfffe3994, 16, 4) {
+		PD7MD_000, PD7MD_001, PD7MD_010, PD7MD_011,
+		PD7MD_100, PD7MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD6MD_000, PD6MD_001, PD6MD_010, PD6MD_011,
+		PD6MD_100, PD6MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD5MD_000, PD5MD_001, PD5MD_010, PD5MD_011,
+		PD5MD_100, PD5MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD4MD_000, PD4MD_001, PD4MD_010, PD4MD_011,
+		PD4MD_100, PD4MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PDCRL1", 0xfffe3996, 16, 4) {
+		PD3MD_000, PD3MD_001, PD3MD_010, PD3MD_011,
+		PD3MD_100, PD3MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD2MD_000, PD2MD_001, PD2MD_010, PD2MD_011,
+		PD2MD_100, PD2MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD1MD_000, PD1MD_001, PD1MD_010, PD1MD_011,
+		PD1MD_100, PD1MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PD0MD_000, PD0MD_001, PD0MD_010, PD0MD_011,
+		PD0MD_100, PD0MD_101, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PEIORL", 0xfffe3a06, 16, 1) {
+		PE15_IN, PE15_OUT,
+		PE14_IN, PE14_OUT,
+		PE13_IN, PE13_OUT,
+		PE12_IN, PE12_OUT,
+		PE11_IN, PE11_OUT,
+		PE10_IN, PE10_OUT,
+		PE9_IN, PE9_OUT,
+		PE8_IN, PE8_OUT,
+		PE7_IN, PE7_OUT,
+		PE6_IN, PE6_OUT,
+		PE5_IN, PE5_OUT,
+		PE4_IN, PE4_OUT,
+		PE3_IN, PE3_OUT,
+		PE2_IN, PE2_OUT,
+		PE1_IN, PE1_OUT,
+		PE0_IN, PE0_OUT }
+	},
+	{ PINMUX_CFG_REG("PECRL4", 0xfffe3a10, 16, 4) {
+		PE15MD_00, PE15MD_01, 0, PE15MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE14MD_00, PE14MD_01, 0, PE14MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE13MD_00, 0, 0, PE13MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE12MD_00, 0, 0, PE12MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PECRL3", 0xfffe3a12, 16, 4) {
+		PE11MD_000, PE11MD_001, PE11MD_010, 0,
+		PE11MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE10MD_000, PE10MD_001, PE10MD_010, 0,
+		PE10MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE9MD_00, PE9MD_01, PE9MD_10, PE9MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE8MD_00, PE8MD_01, PE8MD_10, PE8MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PECRL2", 0xfffe3a14, 16, 4) {
+		PE7MD_000, PE7MD_001, PE7MD_010, PE7MD_011,
+		PE7MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE6MD_000, PE6MD_001, PE6MD_010, PE6MD_011,
+		PE6MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE5MD_000, PE5MD_001, PE5MD_010, PE5MD_011,
+		PE5MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE4MD_000, PE4MD_001, PE4MD_010, PE4MD_011,
+		PE4MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PECRL1", 0xfffe3a16, 16, 4) {
+		PE3MD_00, PE3MD_01, 0, PE3MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE2MD_00, PE2MD_01, 0, PE2MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE1MD_00, PE1MD_01, PE1MD_10, PE1MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PE0MD_000, PE0MD_001, 0, PE0MD_011,
+		PE0MD_100, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFIORH", 0xfffe3a84, 16, 1) {
+		0, 0,
+		PF30_IN, PF30_OUT,
+		PF29_IN, PF29_OUT,
+		PF28_IN, PF28_OUT,
+		PF27_IN, PF27_OUT,
+		PF26_IN, PF26_OUT,
+		PF25_IN, PF25_OUT,
+		PF24_IN, PF24_OUT,
+		PF23_IN, PF23_OUT,
+		PF22_IN, PF22_OUT,
+		PF21_IN, PF21_OUT,
+		PF20_IN, PF20_OUT,
+		PF19_IN, PF19_OUT,
+		PF18_IN, PF18_OUT,
+		PF17_IN, PF17_OUT,
+		PF16_IN, PF16_OUT }
+	},
+	{ PINMUX_CFG_REG("PFIORL", 0xfffe3a86, 16, 1) {
+		PF15_IN, PF15_OUT,
+		PF14_IN, PF14_OUT,
+		PF13_IN, PF13_OUT,
+		PF12_IN, PF12_OUT,
+		PF11_IN, PF11_OUT,
+		PF10_IN, PF10_OUT,
+		PF9_IN, PF9_OUT,
+		PF8_IN, PF8_OUT,
+		PF7_IN, PF7_OUT,
+		PF6_IN, PF6_OUT,
+		PF5_IN, PF5_OUT,
+		PF4_IN, PF4_OUT,
+		PF3_IN, PF3_OUT,
+		PF2_IN, PF2_OUT,
+		PF1_IN, PF1_OUT,
+		PF0_IN, PF0_OUT }
+	},
+	{ PINMUX_CFG_REG("PFCRH4", 0xfffe3a88, 16, 4) {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF30MD_0, PF30MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF29MD_0, PF29MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF28MD_0, PF28MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRH3", 0xfffe3a8a, 16, 4) {
+		PF27MD_0, PF27MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF26MD_0, PF26MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF25MD_0, PF25MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF24MD_0, PF24MD_1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRH2", 0xfffe3a8c, 16, 4) {
+		PF23MD_00, PF23MD_01, PF23MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF22MD_00, PF22MD_01, PF22MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF21MD_00, PF21MD_01, PF21MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF20MD_00, PF20MD_01, PF20MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRH1", 0xfffe3a8e, 16, 4) {
+		PF19MD_00, PF19MD_01, PF19MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF18MD_00, PF18MD_01, PF18MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF17MD_00, PF17MD_01, PF17MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF16MD_00, PF16MD_01, PF16MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRL4", 0xfffe3a90, 16, 4) {
+		PF15MD_00, PF15MD_01, PF15MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF14MD_00, PF14MD_01, PF14MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF13MD_00, PF13MD_01, PF13MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF12MD_00, PF12MD_01, PF12MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRL3", 0xfffe3a92, 16, 4) {
+		PF11MD_00, PF11MD_01, PF11MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF10MD_00, PF10MD_01, PF10MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF9MD_00, PF9MD_01, PF9MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF8MD_00, PF8MD_01, PF8MD_10, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRL2", 0xfffe3a94, 16, 4) {
+		PF7MD_00, PF7MD_01, PF7MD_10, PF7MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF6MD_00, PF6MD_01, PF6MD_10, PF6MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF5MD_00, PF5MD_01, PF5MD_10, PF5MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF4MD_00, PF4MD_01, PF4MD_10, PF4MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PFCRL1", 0xfffe3a96, 16, 4) {
+		PF3MD_00, PF3MD_01, PF3MD_10, PF3MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF2MD_00, PF2MD_01, PF2MD_10, PF2MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF1MD_00, PF1MD_01, PF1MD_10, PF1MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+		PF0MD_00, PF0MD_01, PF0MD_10, PF0MD_11,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADRL", 0xfffe3802, 16) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PA7_DATA, PA6_DATA, PA5_DATA, PA4_DATA,
+		PA3_DATA, PA2_DATA, PA1_DATA, PA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDRL", 0xfffe3882, 16) {
+		0, 0, 0, PB12_DATA,
+		PB11_DATA, PB10_DATA, PB9_DATA, PB8_DATA,
+		PB7_DATA, PB6_DATA, PB5_DATA, PB4_DATA,
+		PB3_DATA, PB2_DATA, PB1_DATA, PB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDRL", 0xfffe3902, 16) {
+		0, PC14_DATA, PC13_DATA, PC12_DATA,
+		PC11_DATA, PC10_DATA, PC9_DATA, PC8_DATA,
+		PC7_DATA, PC6_DATA, PC5_DATA, PC4_DATA,
+		PC3_DATA, PC2_DATA, PC1_DATA, PC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDRL", 0xfffe3982, 16) {
+		PD15_DATA, PD14_DATA, PD13_DATA, PD12_DATA,
+		PD11_DATA, PD10_DATA, PD9_DATA, PD8_DATA,
+		PD7_DATA, PD6_DATA, PD5_DATA, PD4_DATA,
+		PD3_DATA, PD2_DATA, PD1_DATA, PD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDRL", 0xfffe3a02, 16) {
+		PE15_DATA, PE14_DATA, PE13_DATA, PE12_DATA,
+		PE11_DATA, PE10_DATA, PE9_DATA, PE8_DATA,
+		PE7_DATA, PE6_DATA, PE5_DATA, PE4_DATA,
+		PE3_DATA, PE2_DATA, PE1_DATA, PE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDRH", 0xfffe3a80, 16) {
+		0, PF30_DATA, PF29_DATA, PF28_DATA,
+		PF27_DATA, PF26_DATA, PF25_DATA, PF24_DATA,
+		PF23_DATA, PF22_DATA, PF21_DATA, PF20_DATA,
+		PF19_DATA, PF18_DATA, PF17_DATA, PF16_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDRL", 0xfffe3a82, 16) {
+		PF15_DATA, PF14_DATA, PF13_DATA, PF12_DATA,
+		PF11_DATA, PF10_DATA, PF9_DATA, PF8_DATA,
+		PF7_DATA, PF6_DATA, PF5_DATA, PF4_DATA,
+		PF3_DATA, PF2_DATA, PF1_DATA, PF0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7203_pinmux_info = {
+	.name = "sh7203_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PA7,
+	.last_gpio = GPIO_FN_LCD_DATA0,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7203_pinmux_info);
+}
+
+arch_initcall(plat_pinmux_setup);
diff --git a/arch/sh/kernel/cpu/sh3/Makefile b/arch/sh/kernel/cpu/sh3/Makefile
index 511de55af83..e07c69e16d9 100644
--- a/arch/sh/kernel/cpu/sh3/Makefile
+++ b/arch/sh/kernel/cpu/sh3/Makefile
@@ -24,4 +24,8 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7710)	:= clock-sh7710.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7720)	:= clock-sh7710.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7712)	:= clock-sh7712.o
 
+# Pinmux setup
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7720)	:= pinmux-sh7720.o
+
 obj-y	+= $(clock-y)
+obj-$(CONFIG_GENERIC_GPIO)	+= $(pinmux-y)
diff --git a/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c b/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c
new file mode 100644
index 00000000000..9ca15462714
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh3/pinmux-sh7720.c
@@ -0,0 +1,1242 @@
+/*
+ * SH7720 Pinmux
+ *
+ *  Copyright (C) 2008  Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <cpu/sh7720.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+	PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA,
+	PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+	PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA,
+	PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+	PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA,
+	PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+	PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA,
+	PTE6_DATA, PTE5_DATA, PTE4_DATA,
+	PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA,
+	PTF6_DATA, PTF5_DATA, PTF4_DATA,
+	PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA,
+	PTG6_DATA, PTG5_DATA, PTG4_DATA,
+	PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA,
+	PTH6_DATA, PTH5_DATA, PTH4_DATA,
+	PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA,
+	PTJ6_DATA, PTJ5_DATA, PTJ4_DATA,
+	PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA,
+	PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA,
+	PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA, PTL3_DATA,
+	PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+	PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA,
+	PTP4_DATA, PTP3_DATA, PTP2_DATA, PTP1_DATA, PTP0_DATA,
+	PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+	PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA,
+	PTS4_DATA, PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA,
+	PTT4_DATA, PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA,
+	PTU4_DATA, PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA,
+	PTV4_DATA, PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN,
+	PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN,
+	PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN,
+	PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN,
+	PTC7_IN, PTC6_IN, PTC5_IN, PTC4_IN,
+	PTC3_IN, PTC2_IN, PTC1_IN, PTC0_IN,
+	PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN,
+	PTD3_IN, PTD2_IN, PTD1_IN, PTD0_IN,
+	PTE6_IN, PTE5_IN, PTE4_IN,
+	PTE3_IN, PTE2_IN, PTE1_IN, PTE0_IN,
+	PTF6_IN, PTF5_IN, PTF4_IN,
+	PTF3_IN, PTF2_IN, PTF1_IN, PTF0_IN,
+	PTG6_IN, PTG5_IN, PTG4_IN,
+	PTG3_IN, PTG2_IN, PTG1_IN, PTG0_IN,
+	PTH6_IN, PTH5_IN, PTH4_IN,
+	PTH3_IN, PTH2_IN, PTH1_IN, PTH0_IN,
+	PTJ6_IN, PTJ5_IN, PTJ4_IN,
+	PTJ3_IN, PTJ2_IN, PTJ1_IN, PTJ0_IN,
+	PTK3_IN, PTK2_IN, PTK1_IN, PTK0_IN,
+	PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN, PTL3_IN,
+	PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN,
+	PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN,
+	PTP4_IN, PTP3_IN, PTP2_IN, PTP1_IN, PTP0_IN,
+	PTR7_IN, PTR6_IN, PTR5_IN, PTR4_IN,
+	PTR3_IN, PTR2_IN, PTR1_IN, PTR0_IN,
+	PTS4_IN, PTS3_IN, PTS2_IN, PTS1_IN, PTS0_IN,
+	PTT4_IN, PTT3_IN, PTT2_IN, PTT1_IN, PTT0_IN,
+	PTU4_IN, PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN,
+	PTV4_IN, PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_INPUT_PULLUP_BEGIN,
+	PTA7_IN_PU, PTA6_IN_PU, PTA5_IN_PU, PTA4_IN_PU,
+	PTA3_IN_PU, PTA2_IN_PU, PTA1_IN_PU, PTA0_IN_PU,
+	PTB7_IN_PU, PTB6_IN_PU, PTB5_IN_PU, PTB4_IN_PU,
+	PTB3_IN_PU, PTB2_IN_PU, PTB1_IN_PU, PTB0_IN_PU,
+	PTC7_IN_PU, PTC6_IN_PU, PTC5_IN_PU, PTC4_IN_PU,
+	PTC3_IN_PU, PTC2_IN_PU, PTC1_IN_PU, PTC0_IN_PU,
+	PTD7_IN_PU, PTD6_IN_PU, PTD5_IN_PU, PTD4_IN_PU,
+	PTD3_IN_PU, PTD2_IN_PU, PTD1_IN_PU, PTD0_IN_PU,
+	PTE4_IN_PU, PTE3_IN_PU, PTE2_IN_PU, PTE1_IN_PU, PTE0_IN_PU,
+	PTF0_IN_PU,
+	PTG6_IN_PU, PTG5_IN_PU, PTG4_IN_PU,
+	PTG3_IN_PU, PTG2_IN_PU, PTG1_IN_PU, PTG0_IN_PU,
+	PTH6_IN_PU, PTH5_IN_PU, PTH4_IN_PU,
+	PTH3_IN_PU, PTH2_IN_PU, PTH1_IN_PU, PTH0_IN_PU,
+	PTJ6_IN_PU, PTJ5_IN_PU, PTJ4_IN_PU,
+	PTJ3_IN_PU, PTJ2_IN_PU, PTJ1_IN_PU, PTJ0_IN_PU,
+	PTK3_IN_PU, PTK2_IN_PU, PTK1_IN_PU, PTK0_IN_PU,
+	PTL7_IN_PU, PTL6_IN_PU, PTL5_IN_PU, PTL4_IN_PU, PTL3_IN_PU,
+	PTM7_IN_PU, PTM6_IN_PU, PTM5_IN_PU, PTM4_IN_PU,
+	PTM3_IN_PU, PTM2_IN_PU, PTM1_IN_PU, PTM0_IN_PU,
+	PTP4_IN_PU, PTP3_IN_PU, PTP2_IN_PU, PTP1_IN_PU, PTP0_IN_PU,
+	PTR7_IN_PU, PTR6_IN_PU, PTR5_IN_PU, PTR4_IN_PU,
+	PTR3_IN_PU, PTR2_IN_PU, PTR1_IN_PU, PTR0_IN_PU,
+	PTS4_IN_PU, PTS3_IN_PU, PTS2_IN_PU, PTS1_IN_PU, PTS0_IN_PU,
+	PTT4_IN_PU, PTT3_IN_PU, PTT2_IN_PU, PTT1_IN_PU, PTT0_IN_PU,
+	PTU4_IN_PU, PTU3_IN_PU, PTU2_IN_PU, PTU1_IN_PU, PTU0_IN_PU,
+	PTV4_IN_PU, PTV3_IN_PU, PTV2_IN_PU, PTV1_IN_PU, PTV0_IN_PU,
+	PINMUX_INPUT_PULLUP_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PTA7_OUT, PTA6_OUT, PTA5_OUT, PTA4_OUT,
+	PTA3_OUT, PTA2_OUT, PTA1_OUT, PTA0_OUT,
+	PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT,
+	PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT,
+	PTC7_OUT, PTC6_OUT, PTC5_OUT, PTC4_OUT,
+	PTC3_OUT, PTC2_OUT, PTC1_OUT, PTC0_OUT,
+	PTD7_OUT, PTD6_OUT, PTD5_OUT, PTD4_OUT,
+	PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT,
+	PTE4_OUT, PTE3_OUT, PTE2_OUT, PTE1_OUT, PTE0_OUT,
+	PTF0_OUT,
+	PTG6_OUT, PTG5_OUT, PTG4_OUT,
+	PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT,
+	PTH6_OUT, PTH5_OUT, PTH4_OUT,
+	PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT,
+	PTJ6_OUT, PTJ5_OUT, PTJ4_OUT,
+	PTJ3_OUT, PTJ2_OUT, PTJ1_OUT, PTJ0_OUT,
+	PTK3_OUT, PTK2_OUT, PTK1_OUT, PTK0_OUT,
+	PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT, PTL3_OUT,
+	PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT,
+	PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT,
+	PTP4_OUT, PTP3_OUT, PTP2_OUT, PTP1_OUT, PTP0_OUT,
+	PTR7_OUT, PTR6_OUT, PTR5_OUT, PTR4_OUT,
+	PTR3_OUT, PTR2_OUT, PTR1_OUT, PTR0_OUT,
+	PTS4_OUT, PTS3_OUT, PTS2_OUT, PTS1_OUT, PTS0_OUT,
+	PTT4_OUT, PTT3_OUT, PTT2_OUT, PTT1_OUT, PTT0_OUT,
+	PTU4_OUT, PTU3_OUT, PTU2_OUT, PTU1_OUT, PTU0_OUT,
+	PTV4_OUT, PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	PTA7_FN, PTA6_FN, PTA5_FN, PTA4_FN,
+	PTA3_FN, PTA2_FN, PTA1_FN, PTA0_FN,
+	PTB7_FN, PTB6_FN, PTB5_FN, PTB4_FN,
+	PTB3_FN, PTB2_FN, PTB1_FN, PTB0_FN,
+	PTC7_FN, PTC6_FN, PTC5_FN, PTC4_FN,
+	PTC3_FN, PTC2_FN, PTC1_FN, PTC0_FN,
+	PTD7_FN, PTD6_FN, PTD5_FN, PTD4_FN,
+	PTD3_FN, PTD2_FN, PTD1_FN, PTD0_FN,
+	PTE6_FN, PTE5_FN, PTE4_FN,
+	PTE3_FN, PTE2_FN, PTE1_FN, PTE0_FN,
+	PTF6_FN, PTF5_FN, PTF4_FN,
+	PTF3_FN, PTF2_FN, PTF1_FN, PTF0_FN,
+	PTG6_FN, PTG5_FN, PTG4_FN,
+	PTG3_FN, PTG2_FN, PTG1_FN, PTG0_FN,
+	PTH6_FN, PTH5_FN, PTH4_FN,
+	PTH3_FN, PTH2_FN, PTH1_FN, PTH0_FN,
+	PTJ6_FN, PTJ5_FN, PTJ4_FN,
+	PTJ3_FN, PTJ2_FN, PTJ1_FN, PTJ0_FN,
+	PTK3_FN, PTK2_FN, PTK1_FN, PTK0_FN,
+	PTL7_FN, PTL6_FN, PTL5_FN, PTL4_FN, PTL3_FN,
+	PTM7_FN, PTM6_FN, PTM5_FN, PTM4_FN,
+	PTM3_FN, PTM2_FN, PTM1_FN, PTM0_FN,
+	PTP4_FN, PTP3_FN, PTP2_FN, PTP1_FN, PTP0_FN,
+	PTR7_FN, PTR6_FN, PTR5_FN, PTR4_FN,
+	PTR3_FN, PTR2_FN, PTR1_FN, PTR0_FN,
+	PTS4_FN, PTS3_FN, PTS2_FN, PTS1_FN, PTS0_FN,
+	PTT4_FN, PTT3_FN, PTT2_FN, PTT1_FN, PTT0_FN,
+	PTU4_FN, PTU3_FN, PTU2_FN, PTU1_FN, PTU0_FN,
+	PTV4_FN, PTV3_FN, PTV2_FN, PTV1_FN, PTV0_FN,
+
+	PSELA_1_0_00, PSELA_1_0_01, PSELA_1_0_10,
+	PSELA_3_2_00, PSELA_3_2_01, PSELA_3_2_10, PSELA_3_2_11,
+	PSELA_5_4_00, PSELA_5_4_01, PSELA_5_4_10, PSELA_5_4_11,
+	PSELA_7_6_00, PSELA_7_6_01, PSELA_7_6_10,
+	PSELA_9_8_00, PSELA_9_8_01, PSELA_9_8_10,
+	PSELA_11_10_00, PSELA_11_10_01, PSELA_11_10_10,
+	PSELA_13_12_00, PSELA_13_12_10,
+	PSELA_15_14_00, PSELA_15_14_10,
+	PSELB_9_8_00, PSELB_9_8_11,
+	PSELB_11_10_00, PSELB_11_10_01, PSELB_11_10_10, PSELB_11_10_11,
+	PSELB_13_12_00, PSELB_13_12_01, PSELB_13_12_10, PSELB_13_12_11,
+	PSELB_15_14_00, PSELB_15_14_11,
+	PSELC_9_8_00, PSELC_9_8_10,
+	PSELC_11_10_00, PSELC_11_10_10,
+	PSELC_13_12_00,	PSELC_13_12_01,	PSELC_13_12_10,
+	PSELC_15_14_00,	PSELC_15_14_01,	PSELC_15_14_10,
+	PSELD_1_0_00, PSELD_1_0_10,
+	PSELD_11_10_00,	PSELD_11_10_01,
+	PSELD_15_14_00,	PSELD_15_14_01,	PSELD_15_14_10,
+	PINMUX_FUNCTION_END,
+
+	PINMUX_MARK_BEGIN,
+	D31_MARK, D30_MARK, D29_MARK, D28_MARK,
+	D27_MARK, D26_MARK, D25_MARK, D24_MARK,
+	D23_MARK, D22_MARK, D21_MARK, D20_MARK,
+	D19_MARK, D18_MARK, D17_MARK, D16_MARK,
+	IOIS16_MARK, RAS_MARK, CAS_MARK, CKE_MARK,
+	CS5B_CE1A_MARK, CS6B_CE1B_MARK,
+	A25_MARK, A24_MARK, A23_MARK, A22_MARK,
+	A21_MARK, A20_MARK, A19_MARK, A0_MARK,
+	REFOUT_MARK, IRQOUT_MARK,
+	LCD_DATA15_MARK, LCD_DATA14_MARK,
+	LCD_DATA13_MARK, LCD_DATA12_MARK,
+	LCD_DATA11_MARK, LCD_DATA10_MARK,
+	LCD_DATA9_MARK, LCD_DATA8_MARK,
+	LCD_DATA7_MARK, LCD_DATA6_MARK,
+	LCD_DATA5_MARK, LCD_DATA4_MARK,
+	LCD_DATA3_MARK, LCD_DATA2_MARK,
+	LCD_DATA1_MARK, LCD_DATA0_MARK,
+	LCD_M_DISP_MARK,
+	LCD_CL1_MARK, LCD_CL2_MARK,
+	LCD_DON_MARK, LCD_FLM_MARK,
+	LCD_VEPWC_MARK, LCD_VCPWC_MARK,
+	AFE_RXIN_MARK, AFE_RDET_MARK,
+	AFE_FS_MARK, AFE_TXOUT_MARK,
+	AFE_SCLK_MARK, AFE_RLYCNT_MARK,
+	AFE_HC1_MARK,
+	IIC_SCL_MARK, IIC_SDA_MARK,
+	DA1_MARK, DA0_MARK,
+	AN3_MARK, AN2_MARK, AN1_MARK, AN0_MARK, ADTRG_MARK,
+	USB1D_RCV_MARK, USB1D_TXSE0_MARK,
+	USB1D_TXDPLS_MARK, USB1D_DMNS_MARK,
+	USB1D_DPLS_MARK, USB1D_SPEED_MARK,
+	USB1D_TXENL_MARK,
+	USB2_PWR_EN_MARK, USB1_PWR_EN_USBF_UPLUP_MARK, USB1D_SUSPEND_MARK,
+	IRQ5_MARK, IRQ4_MARK,
+	IRQ3_IRL3_MARK, IRQ2_IRL2_MARK,
+	IRQ1_IRL1_MARK, IRQ0_IRL0_MARK,
+	PCC_REG_MARK, PCC_DRV_MARK,
+	PCC_BVD2_MARK, PCC_BVD1_MARK,
+	PCC_CD2_MARK, PCC_CD1_MARK,
+	PCC_RESET_MARK, PCC_RDY_MARK,
+	PCC_VS2_MARK, PCC_VS1_MARK,
+	AUDATA3_MARK, AUDATA2_MARK, AUDATA1_MARK, AUDATA0_MARK,
+	AUDCK_MARK, AUDSYNC_MARK, ASEBRKAK_MARK, TRST_MARK,
+	TMS_MARK, TDO_MARK, TDI_MARK, TCK_MARK,
+	DACK1_MARK, DREQ1_MARK, DACK0_MARK, DREQ0_MARK,
+	TEND1_MARK, TEND0_MARK,
+	SIOF0_SYNC_MARK, SIOF0_MCLK_MARK,
+	SIOF0_TXD_MARK, SIOF0_RXD_MARK,
+	SIOF0_SCK_MARK,
+	SIOF1_SYNC_MARK, SIOF1_MCLK_MARK,
+	SIOF1_TXD_MARK, SIOF1_RXD_MARK,
+	SIOF1_SCK_MARK,
+	SCIF0_TXD_MARK, SCIF0_RXD_MARK,
+	SCIF0_RTS_MARK, SCIF0_CTS_MARK, SCIF0_SCK_MARK,
+	SCIF1_TXD_MARK, SCIF1_RXD_MARK,
+	SCIF1_RTS_MARK, SCIF1_CTS_MARK, SCIF1_SCK_MARK,
+	TPU_TO1_MARK, TPU_TO0_MARK,
+	TPU_TI3B_MARK, TPU_TI3A_MARK,
+	TPU_TI2B_MARK, TPU_TI2A_MARK,
+	TPU_TO3_MARK, TPU_TO2_MARK,
+	SIM_D_MARK, SIM_CLK_MARK, SIM_RST_MARK,
+	MMC_DAT_MARK, MMC_CMD_MARK,
+	MMC_CLK_MARK, MMC_VDDON_MARK,
+	MMC_ODMOD_MARK,
+	STATUS0_MARK, STATUS1_MARK,
+	PINMUX_MARK_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+	/* PTA GPIO */
+	PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_OUT, PTA7_IN_PU),
+	PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_OUT, PTA6_IN_PU),
+	PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_OUT, PTA5_IN_PU),
+	PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_OUT, PTA4_IN_PU),
+	PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_OUT, PTA3_IN_PU),
+	PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_OUT, PTA2_IN_PU),
+	PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_OUT, PTA1_IN_PU),
+	PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_OUT, PTA0_IN_PU),
+
+	/* PTB GPIO */
+	PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT, PTB7_IN_PU),
+	PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT, PTB6_IN_PU),
+	PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT, PTB5_IN_PU),
+	PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT, PTB4_IN_PU),
+	PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT, PTB3_IN_PU),
+	PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT, PTB2_IN_PU),
+	PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT, PTB1_IN_PU),
+	PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT, PTB0_IN_PU),
+
+	/* PTC GPIO */
+	PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_OUT, PTC7_IN_PU),
+	PINMUX_DATA(PTC6_DATA, PTC6_IN, PTC6_OUT, PTC6_IN_PU),
+	PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_OUT, PTC5_IN_PU),
+	PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT, PTC4_IN_PU),
+	PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT, PTC3_IN_PU),
+	PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT, PTC2_IN_PU),
+	PINMUX_DATA(PTC1_DATA, PTC1_IN, PTC1_OUT, PTC1_IN_PU),
+	PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT, PTC0_IN_PU),
+
+	/* PTD GPIO */
+	PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_OUT, PTD7_IN_PU),
+	PINMUX_DATA(PTD6_DATA, PTD6_IN, PTD6_OUT, PTD6_IN_PU),
+	PINMUX_DATA(PTD5_DATA, PTD5_IN, PTD5_OUT, PTD5_IN_PU),
+	PINMUX_DATA(PTD4_DATA, PTD4_IN, PTD4_OUT, PTD4_IN_PU),
+	PINMUX_DATA(PTD3_DATA, PTD3_IN, PTD3_OUT, PTD3_IN_PU),
+	PINMUX_DATA(PTD2_DATA, PTD2_IN, PTD2_OUT, PTD2_IN_PU),
+	PINMUX_DATA(PTD1_DATA, PTD1_IN, PTD1_OUT, PTD1_IN_PU),
+	PINMUX_DATA(PTD0_DATA, PTD0_IN, PTD0_OUT, PTD0_IN_PU),
+
+	/* PTE GPIO */
+	PINMUX_DATA(PTE6_DATA, PTE6_IN),
+	PINMUX_DATA(PTE5_DATA, PTE5_IN),
+	PINMUX_DATA(PTE4_DATA, PTE4_IN, PTE4_OUT, PTE4_IN_PU),
+	PINMUX_DATA(PTE3_DATA, PTE3_IN, PTE3_OUT, PTE3_IN_PU),
+	PINMUX_DATA(PTE2_DATA, PTE2_IN, PTE2_OUT, PTE2_IN_PU),
+	PINMUX_DATA(PTE1_DATA, PTE1_IN, PTE1_OUT, PTE1_IN_PU),
+	PINMUX_DATA(PTE0_DATA, PTE0_IN, PTE0_OUT, PTE0_IN_PU),
+
+	/* PTF GPIO */
+	PINMUX_DATA(PTF6_DATA, PTF6_IN),
+	PINMUX_DATA(PTF5_DATA, PTF5_IN),
+	PINMUX_DATA(PTF4_DATA, PTF4_IN),
+	PINMUX_DATA(PTF3_DATA, PTF3_IN),
+	PINMUX_DATA(PTF2_DATA, PTF2_IN),
+	PINMUX_DATA(PTF1_DATA, PTF1_IN),
+	PINMUX_DATA(PTF0_DATA, PTF0_IN, PTF0_OUT, PTF0_IN_PU),
+
+	/* PTG GPIO */
+	PINMUX_DATA(PTG6_DATA, PTG6_IN, PTG6_OUT, PTG6_IN_PU),
+	PINMUX_DATA(PTG5_DATA, PTG5_IN, PTG5_OUT, PTG5_IN_PU),
+	PINMUX_DATA(PTG4_DATA, PTG4_IN, PTG4_OUT, PTG4_IN_PU),
+	PINMUX_DATA(PTG3_DATA, PTG3_IN, PTG3_OUT, PTG3_IN_PU),
+	PINMUX_DATA(PTG2_DATA, PTG2_IN, PTG2_OUT, PTG2_IN_PU),
+	PINMUX_DATA(PTG1_DATA, PTG1_IN, PTG1_OUT, PTG1_IN_PU),
+	PINMUX_DATA(PTG0_DATA, PTG0_IN, PTG0_OUT, PTG0_IN_PU),
+
+	/* PTH GPIO */
+	PINMUX_DATA(PTH6_DATA, PTH6_IN, PTH6_OUT, PTH6_IN_PU),
+	PINMUX_DATA(PTH5_DATA, PTH5_IN, PTH5_OUT, PTH5_IN_PU),
+	PINMUX_DATA(PTH4_DATA, PTH4_IN, PTH4_OUT, PTH4_IN_PU),
+	PINMUX_DATA(PTH3_DATA, PTH3_IN, PTH3_OUT, PTH3_IN_PU),
+	PINMUX_DATA(PTH2_DATA, PTH2_IN, PTH2_OUT, PTH2_IN_PU),
+	PINMUX_DATA(PTH1_DATA, PTH1_IN, PTH1_OUT, PTH1_IN_PU),
+	PINMUX_DATA(PTH0_DATA, PTH0_IN, PTH0_OUT, PTH0_IN_PU),
+
+	/* PTJ GPIO */
+	PINMUX_DATA(PTJ6_DATA, PTJ6_IN, PTJ6_OUT, PTJ6_IN_PU),
+	PINMUX_DATA(PTJ5_DATA, PTJ5_IN, PTJ5_OUT, PTJ5_IN_PU),
+	PINMUX_DATA(PTJ4_DATA, PTJ4_IN, PTJ4_OUT, PTJ4_IN_PU),
+	PINMUX_DATA(PTJ3_DATA, PTJ3_IN, PTJ3_OUT, PTJ3_IN_PU),
+	PINMUX_DATA(PTJ2_DATA, PTJ2_IN, PTJ2_OUT, PTJ2_IN_PU),
+	PINMUX_DATA(PTJ1_DATA, PTJ1_IN, PTJ1_OUT, PTJ1_IN_PU),
+	PINMUX_DATA(PTJ0_DATA, PTJ0_IN, PTJ0_OUT, PTJ0_IN_PU),
+
+	/* PTK GPIO */
+	PINMUX_DATA(PTK3_DATA, PTK3_IN, PTK3_OUT, PTK3_IN_PU),
+	PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_OUT, PTK2_IN_PU),
+	PINMUX_DATA(PTK1_DATA, PTK1_IN, PTK1_OUT, PTK1_IN_PU),
+	PINMUX_DATA(PTK0_DATA, PTK0_IN, PTK0_OUT, PTK0_IN_PU),
+
+	/* PTL GPIO */
+	PINMUX_DATA(PTL7_DATA, PTL7_IN, PTL7_OUT, PTL7_IN_PU),
+	PINMUX_DATA(PTL6_DATA, PTL6_IN, PTL6_OUT, PTL6_IN_PU),
+	PINMUX_DATA(PTL5_DATA, PTL5_IN, PTL5_OUT, PTL5_IN_PU),
+	PINMUX_DATA(PTL4_DATA, PTL4_IN, PTL4_OUT, PTL4_IN_PU),
+	PINMUX_DATA(PTL3_DATA, PTL3_IN, PTL3_OUT, PTL3_IN_PU),
+
+	/* PTM GPIO */
+	PINMUX_DATA(PTM7_DATA, PTM7_IN, PTM7_OUT, PTM7_IN_PU),
+	PINMUX_DATA(PTM6_DATA, PTM6_IN, PTM6_OUT, PTM6_IN_PU),
+	PINMUX_DATA(PTM5_DATA, PTM5_IN, PTM5_OUT, PTM5_IN_PU),
+	PINMUX_DATA(PTM4_DATA, PTM4_IN, PTM4_OUT, PTM4_IN_PU),
+	PINMUX_DATA(PTM3_DATA, PTM3_IN, PTM3_OUT, PTM3_IN_PU),
+	PINMUX_DATA(PTM2_DATA, PTM2_IN, PTM2_OUT, PTM2_IN_PU),
+	PINMUX_DATA(PTM1_DATA, PTM1_IN, PTM1_OUT, PTM1_IN_PU),
+	PINMUX_DATA(PTM0_DATA, PTM0_IN, PTM0_OUT, PTM0_IN_PU),
+
+	/* PTP GPIO */
+	PINMUX_DATA(PTP4_DATA, PTP4_IN, PTP4_OUT, PTP4_IN_PU),
+	PINMUX_DATA(PTP3_DATA, PTP3_IN, PTP3_OUT, PTP3_IN_PU),
+	PINMUX_DATA(PTP2_DATA, PTP2_IN, PTP2_OUT, PTP2_IN_PU),
+	PINMUX_DATA(PTP1_DATA, PTP1_IN, PTP1_OUT, PTP1_IN_PU),
+	PINMUX_DATA(PTP0_DATA, PTP0_IN, PTP0_OUT, PTP0_IN_PU),
+
+	/* PTR GPIO */
+	PINMUX_DATA(PTR7_DATA, PTR7_IN, PTR7_OUT, PTR7_IN_PU),
+	PINMUX_DATA(PTR6_DATA, PTR6_IN, PTR6_OUT, PTR6_IN_PU),
+	PINMUX_DATA(PTR5_DATA, PTR5_IN, PTR5_OUT, PTR5_IN_PU),
+	PINMUX_DATA(PTR4_DATA, PTR4_IN, PTR4_OUT, PTR4_IN_PU),
+	PINMUX_DATA(PTR3_DATA, PTR3_IN, PTR3_OUT, PTR3_IN_PU),
+	PINMUX_DATA(PTR2_DATA, PTR2_IN, PTR2_OUT, PTR2_IN_PU),
+	PINMUX_DATA(PTR1_DATA, PTR1_IN, PTR1_OUT, PTR1_IN_PU),
+	PINMUX_DATA(PTR0_DATA, PTR0_IN, PTR0_OUT, PTR0_IN_PU),
+
+	/* PTS GPIO */
+	PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_OUT, PTS4_IN_PU),
+	PINMUX_DATA(PTS3_DATA, PTS3_IN, PTS3_OUT, PTS3_IN_PU),
+	PINMUX_DATA(PTS2_DATA, PTS2_IN, PTS2_OUT, PTS2_IN_PU),
+	PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_OUT, PTS1_IN_PU),
+	PINMUX_DATA(PTS0_DATA, PTS0_IN, PTS0_OUT, PTS0_IN_PU),
+
+	/* PTT GPIO */
+	PINMUX_DATA(PTT4_DATA, PTT4_IN, PTT4_OUT, PTT4_IN_PU),
+	PINMUX_DATA(PTT3_DATA, PTT3_IN, PTT3_OUT, PTT3_IN_PU),
+	PINMUX_DATA(PTT2_DATA, PTT2_IN, PTT2_OUT, PTT2_IN_PU),
+	PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_OUT, PTT1_IN_PU),
+	PINMUX_DATA(PTT0_DATA, PTT0_IN, PTT0_OUT, PTT0_IN_PU),
+
+	/* PTU GPIO */
+	PINMUX_DATA(PTU4_DATA, PTU4_IN, PTU4_OUT, PTU4_IN_PU),
+	PINMUX_DATA(PTU3_DATA, PTU3_IN, PTU3_OUT, PTU3_IN_PU),
+	PINMUX_DATA(PTU2_DATA, PTU2_IN, PTU2_OUT, PTU2_IN_PU),
+	PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_OUT, PTU1_IN_PU),
+	PINMUX_DATA(PTU0_DATA, PTU0_IN, PTU0_OUT, PTU0_IN_PU),
+
+	/* PTV GPIO */
+	PINMUX_DATA(PTV4_DATA, PTV4_IN, PTV4_OUT, PTV4_IN_PU),
+	PINMUX_DATA(PTV3_DATA, PTV3_IN, PTV3_OUT, PTV3_IN_PU),
+	PINMUX_DATA(PTV2_DATA, PTV2_IN, PTV2_OUT, PTV2_IN_PU),
+	PINMUX_DATA(PTV1_DATA, PTV1_IN, PTV1_OUT, PTV1_IN_PU),
+	PINMUX_DATA(PTV0_DATA, PTV0_IN, PTV0_OUT, PTV0_IN_PU),
+
+	/* PTA FN */
+	PINMUX_DATA(D23_MARK, PTA7_FN),
+	PINMUX_DATA(D22_MARK, PTA6_FN),
+	PINMUX_DATA(D21_MARK, PTA5_FN),
+	PINMUX_DATA(D20_MARK, PTA4_FN),
+	PINMUX_DATA(D19_MARK, PTA3_FN),
+	PINMUX_DATA(D18_MARK, PTA2_FN),
+	PINMUX_DATA(D17_MARK, PTA1_FN),
+	PINMUX_DATA(D16_MARK, PTA0_FN),
+
+	/* PTB FN */
+	PINMUX_DATA(D31_MARK, PTB7_FN),
+	PINMUX_DATA(D30_MARK, PTB6_FN),
+	PINMUX_DATA(D29_MARK, PTB5_FN),
+	PINMUX_DATA(D28_MARK, PTB4_FN),
+	PINMUX_DATA(D27_MARK, PTB3_FN),
+	PINMUX_DATA(D26_MARK, PTB2_FN),
+	PINMUX_DATA(D25_MARK, PTB1_FN),
+	PINMUX_DATA(D24_MARK, PTB0_FN),
+
+	/* PTC FN */
+	PINMUX_DATA(LCD_DATA7_MARK, PTC7_FN),
+	PINMUX_DATA(LCD_DATA6_MARK, PTC6_FN),
+	PINMUX_DATA(LCD_DATA5_MARK, PTC5_FN),
+	PINMUX_DATA(LCD_DATA4_MARK, PTC4_FN),
+	PINMUX_DATA(LCD_DATA3_MARK, PTC3_FN),
+	PINMUX_DATA(LCD_DATA2_MARK, PTC2_FN),
+	PINMUX_DATA(LCD_DATA1_MARK, PTC1_FN),
+	PINMUX_DATA(LCD_DATA0_MARK, PTC0_FN),
+
+	/* PTD FN */
+	PINMUX_DATA(LCD_DATA15_MARK, PTD7_FN),
+	PINMUX_DATA(LCD_DATA14_MARK, PTD6_FN),
+	PINMUX_DATA(LCD_DATA13_MARK, PTD5_FN),
+	PINMUX_DATA(LCD_DATA12_MARK, PTD4_FN),
+	PINMUX_DATA(LCD_DATA11_MARK, PTD3_FN),
+	PINMUX_DATA(LCD_DATA10_MARK, PTD2_FN),
+	PINMUX_DATA(LCD_DATA9_MARK, PTD1_FN),
+	PINMUX_DATA(LCD_DATA8_MARK, PTD0_FN),
+
+	/* PTE FN */
+	PINMUX_DATA(IIC_SCL_MARK, PSELB_9_8_00, PTE6_FN),
+	PINMUX_DATA(AFE_RXIN_MARK, PSELB_9_8_11, PTE6_FN),
+	PINMUX_DATA(IIC_SDA_MARK, PSELB_9_8_00, PTE5_FN),
+	PINMUX_DATA(AFE_RDET_MARK, PSELB_9_8_11, PTE5_FN),
+	PINMUX_DATA(LCD_M_DISP_MARK, PTE4_FN),
+	PINMUX_DATA(LCD_CL1_MARK, PTE3_FN),
+	PINMUX_DATA(LCD_CL2_MARK, PTE2_FN),
+	PINMUX_DATA(LCD_DON_MARK, PTE1_FN),
+	PINMUX_DATA(LCD_FLM_MARK, PTE0_FN),
+
+	/* PTF FN */
+	PINMUX_DATA(DA1_MARK, PTF6_FN),
+	PINMUX_DATA(DA0_MARK, PTF5_FN),
+	PINMUX_DATA(AN3_MARK, PTF4_FN),
+	PINMUX_DATA(AN2_MARK, PTF3_FN),
+	PINMUX_DATA(AN1_MARK, PTF2_FN),
+	PINMUX_DATA(AN0_MARK, PTF1_FN),
+	PINMUX_DATA(ADTRG_MARK, PTF0_FN),
+
+	/* PTG FN */
+	PINMUX_DATA(USB1D_RCV_MARK, PSELA_3_2_00, PTG6_FN),
+	PINMUX_DATA(AFE_FS_MARK, PSELA_3_2_01, PTG6_FN),
+	PINMUX_DATA(PCC_REG_MARK, PSELA_3_2_10, PTG6_FN),
+	PINMUX_DATA(IRQ5_MARK, PSELA_3_2_11, PTG6_FN),
+	PINMUX_DATA(USB1D_TXSE0_MARK, PSELA_5_4_00, PTG5_FN),
+	PINMUX_DATA(AFE_TXOUT_MARK, PSELA_5_4_01, PTG5_FN),
+	PINMUX_DATA(PCC_DRV_MARK, PSELA_5_4_10, PTG5_FN),
+	PINMUX_DATA(IRQ4_MARK, PSELA_5_4_11, PTG5_FN),
+	PINMUX_DATA(USB1D_TXDPLS_MARK, PSELA_7_6_00, PTG4_FN),
+	PINMUX_DATA(AFE_SCLK_MARK, PSELA_7_6_01, PTG4_FN),
+	PINMUX_DATA(IOIS16_MARK, PSELA_7_6_10, PTG4_FN),
+	PINMUX_DATA(USB1D_DMNS_MARK, PSELA_9_8_00, PTG3_FN),
+	PINMUX_DATA(AFE_RLYCNT_MARK, PSELA_9_8_01, PTG3_FN),
+	PINMUX_DATA(PCC_BVD2_MARK, PSELA_9_8_10, PTG3_FN),
+	PINMUX_DATA(USB1D_DPLS_MARK, PSELA_11_10_00, PTG2_FN),
+	PINMUX_DATA(AFE_HC1_MARK, PSELA_11_10_01, PTG2_FN),
+	PINMUX_DATA(PCC_BVD1_MARK, PSELA_11_10_10, PTG2_FN),
+	PINMUX_DATA(USB1D_SPEED_MARK, PSELA_13_12_00, PTG1_FN),
+	PINMUX_DATA(PCC_CD2_MARK, PSELA_13_12_10, PTG1_FN),
+	PINMUX_DATA(USB1D_TXENL_MARK, PSELA_15_14_00, PTG0_FN),
+	PINMUX_DATA(PCC_CD1_MARK, PSELA_15_14_10, PTG0_FN),
+
+	/* PTH FN */
+	PINMUX_DATA(RAS_MARK, PTH6_FN),
+	PINMUX_DATA(CAS_MARK, PTH5_FN),
+	PINMUX_DATA(CKE_MARK, PTH4_FN),
+	PINMUX_DATA(STATUS1_MARK, PTH3_FN),
+	PINMUX_DATA(STATUS0_MARK, PTH2_FN),
+	PINMUX_DATA(USB2_PWR_EN_MARK, PTH1_FN),
+	PINMUX_DATA(USB1_PWR_EN_USBF_UPLUP_MARK, PTH0_FN),
+
+	/* PTJ FN */
+	PINMUX_DATA(AUDCK_MARK, PTJ6_FN),
+	PINMUX_DATA(ASEBRKAK_MARK, PTJ5_FN),
+	PINMUX_DATA(AUDATA3_MARK, PTJ4_FN),
+	PINMUX_DATA(AUDATA2_MARK, PTJ3_FN),
+	PINMUX_DATA(AUDATA1_MARK, PTJ2_FN),
+	PINMUX_DATA(AUDATA0_MARK, PTJ1_FN),
+	PINMUX_DATA(AUDSYNC_MARK, PTJ0_FN),
+
+	/* PTK FN */
+	PINMUX_DATA(PCC_RESET_MARK, PTK3_FN),
+	PINMUX_DATA(PCC_RDY_MARK, PTK2_FN),
+	PINMUX_DATA(PCC_VS2_MARK, PTK1_FN),
+	PINMUX_DATA(PCC_VS1_MARK, PTK0_FN),
+
+	/* PTL FN */
+	PINMUX_DATA(TRST_MARK, PTL7_FN),
+	PINMUX_DATA(TMS_MARK, PTL6_FN),
+	PINMUX_DATA(TDO_MARK, PTL5_FN),
+	PINMUX_DATA(TDI_MARK, PTL4_FN),
+	PINMUX_DATA(TCK_MARK, PTL3_FN),
+
+	/* PTM FN */
+	PINMUX_DATA(DREQ1_MARK, PTM7_FN),
+	PINMUX_DATA(DREQ0_MARK, PTM6_FN),
+	PINMUX_DATA(DACK1_MARK, PTM5_FN),
+	PINMUX_DATA(DACK0_MARK, PTM4_FN),
+	PINMUX_DATA(TEND1_MARK, PTM3_FN),
+	PINMUX_DATA(TEND0_MARK, PTM2_FN),
+	PINMUX_DATA(CS5B_CE1A_MARK, PTM1_FN),
+	PINMUX_DATA(CS6B_CE1B_MARK, PTM0_FN),
+
+	/* PTP FN */
+	PINMUX_DATA(USB1D_SUSPEND_MARK, PSELA_1_0_00, PTP4_FN),
+	PINMUX_DATA(REFOUT_MARK, PSELA_1_0_01, PTP4_FN),
+	PINMUX_DATA(IRQOUT_MARK, PSELA_1_0_10, PTP4_FN),
+	PINMUX_DATA(IRQ3_IRL3_MARK, PTP3_FN),
+	PINMUX_DATA(IRQ2_IRL2_MARK, PTP2_FN),
+	PINMUX_DATA(IRQ1_IRL1_MARK, PTP1_FN),
+	PINMUX_DATA(IRQ0_IRL0_MARK, PTP0_FN),
+
+	/* PTR FN */
+	PINMUX_DATA(A25_MARK, PTR7_FN),
+	PINMUX_DATA(A24_MARK, PTR6_FN),
+	PINMUX_DATA(A23_MARK, PTR5_FN),
+	PINMUX_DATA(A22_MARK, PTR4_FN),
+	PINMUX_DATA(A21_MARK, PTR3_FN),
+	PINMUX_DATA(A20_MARK, PTR2_FN),
+	PINMUX_DATA(A19_MARK, PTR1_FN),
+	PINMUX_DATA(A0_MARK, PTR0_FN),
+
+	/* PTS FN */
+	PINMUX_DATA(SIOF0_SYNC_MARK, PTS4_FN),
+	PINMUX_DATA(SIOF0_MCLK_MARK, PTS3_FN),
+	PINMUX_DATA(SIOF0_TXD_MARK, PTS2_FN),
+	PINMUX_DATA(SIOF0_RXD_MARK, PTS1_FN),
+	PINMUX_DATA(SIOF0_SCK_MARK, PTS0_FN),
+
+	/* PTT FN */
+	PINMUX_DATA(SCIF0_CTS_MARK, PSELB_15_14_00, PTT4_FN),
+	PINMUX_DATA(TPU_TO1_MARK, PSELB_15_14_11, PTT4_FN),
+	PINMUX_DATA(SCIF0_RTS_MARK, PSELB_15_14_00, PTT3_FN),
+	PINMUX_DATA(TPU_TO0_MARK, PSELB_15_14_11, PTT3_FN),
+	PINMUX_DATA(SCIF0_TXD_MARK, PTT2_FN),
+	PINMUX_DATA(SCIF0_RXD_MARK, PTT1_FN),
+	PINMUX_DATA(SCIF0_SCK_MARK, PTT0_FN),
+
+	/* PTU FN */
+	PINMUX_DATA(SIOF1_SYNC_MARK, PTU4_FN),
+	PINMUX_DATA(SIOF1_MCLK_MARK, PSELD_11_10_00, PTU3_FN),
+	PINMUX_DATA(TPU_TI3B_MARK, PSELD_11_10_01, PTU3_FN),
+	PINMUX_DATA(SIOF1_TXD_MARK, PSELD_15_14_00, PTU2_FN),
+	PINMUX_DATA(TPU_TI3A_MARK, PSELD_15_14_01, PTU2_FN),
+	PINMUX_DATA(MMC_DAT_MARK, PSELD_15_14_10, PTU2_FN),
+	PINMUX_DATA(SIOF1_RXD_MARK, PSELC_13_12_00, PTU1_FN),
+	PINMUX_DATA(TPU_TI2B_MARK, PSELC_13_12_01, PTU1_FN),
+	PINMUX_DATA(MMC_CMD_MARK, PSELC_13_12_10, PTU1_FN),
+	PINMUX_DATA(SIOF1_SCK_MARK, PSELC_15_14_00, PTU0_FN),
+	PINMUX_DATA(TPU_TI2A_MARK, PSELC_15_14_01, PTU0_FN),
+	PINMUX_DATA(MMC_CLK_MARK, PSELC_15_14_10, PTU0_FN),
+
+	/* PTV FN */
+	PINMUX_DATA(SCIF1_CTS_MARK, PSELB_11_10_00, PTV4_FN),
+	PINMUX_DATA(TPU_TO3_MARK, PSELB_11_10_01, PTV4_FN),
+	PINMUX_DATA(MMC_VDDON_MARK, PSELB_11_10_10, PTV4_FN),
+	PINMUX_DATA(LCD_VEPWC_MARK, PSELB_11_10_11, PTV4_FN),
+	PINMUX_DATA(SCIF1_RTS_MARK, PSELB_13_12_00, PTV3_FN),
+	PINMUX_DATA(TPU_TO2_MARK, PSELB_13_12_01, PTV3_FN),
+	PINMUX_DATA(MMC_ODMOD_MARK, PSELB_13_12_10, PTV3_FN),
+	PINMUX_DATA(LCD_VCPWC_MARK, PSELB_13_12_11, PTV3_FN),
+	PINMUX_DATA(SCIF1_TXD_MARK, PSELC_9_8_00, PTV2_FN),
+	PINMUX_DATA(SIM_D_MARK, PSELC_9_8_10, PTV2_FN),
+	PINMUX_DATA(SCIF1_RXD_MARK, PSELC_11_10_00, PTV1_FN),
+	PINMUX_DATA(SIM_RST_MARK, PSELC_11_10_10, PTV1_FN),
+	PINMUX_DATA(SCIF1_SCK_MARK, PSELD_1_0_00, PTV0_FN),
+	PINMUX_DATA(SIM_CLK_MARK, PSELD_1_0_10, PTV0_FN),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+	/* PTA */
+	PINMUX_GPIO(GPIO_PTA7, PTA7_DATA),
+	PINMUX_GPIO(GPIO_PTA6, PTA6_DATA),
+	PINMUX_GPIO(GPIO_PTA5, PTA5_DATA),
+	PINMUX_GPIO(GPIO_PTA4, PTA4_DATA),
+	PINMUX_GPIO(GPIO_PTA3, PTA3_DATA),
+	PINMUX_GPIO(GPIO_PTA2, PTA2_DATA),
+	PINMUX_GPIO(GPIO_PTA1, PTA1_DATA),
+	PINMUX_GPIO(GPIO_PTA0, PTA0_DATA),
+
+	/* PTB */
+	PINMUX_GPIO(GPIO_PTB7, PTB7_DATA),
+	PINMUX_GPIO(GPIO_PTB6, PTB6_DATA),
+	PINMUX_GPIO(GPIO_PTB5, PTB5_DATA),
+	PINMUX_GPIO(GPIO_PTB4, PTB4_DATA),
+	PINMUX_GPIO(GPIO_PTB3, PTB3_DATA),
+	PINMUX_GPIO(GPIO_PTB2, PTB2_DATA),
+	PINMUX_GPIO(GPIO_PTB1, PTB1_DATA),
+	PINMUX_GPIO(GPIO_PTB0, PTB0_DATA),
+
+	/* PTC */
+	PINMUX_GPIO(GPIO_PTC7, PTC7_DATA),
+	PINMUX_GPIO(GPIO_PTC6, PTC6_DATA),
+	PINMUX_GPIO(GPIO_PTC5, PTC5_DATA),
+	PINMUX_GPIO(GPIO_PTC4, PTC4_DATA),
+	PINMUX_GPIO(GPIO_PTC3, PTC3_DATA),
+	PINMUX_GPIO(GPIO_PTC2, PTC2_DATA),
+	PINMUX_GPIO(GPIO_PTC1, PTC1_DATA),
+	PINMUX_GPIO(GPIO_PTC0, PTC0_DATA),
+
+	/* PTD */
+	PINMUX_GPIO(GPIO_PTD7, PTD7_DATA),
+	PINMUX_GPIO(GPIO_PTD6, PTD6_DATA),
+	PINMUX_GPIO(GPIO_PTD5, PTD5_DATA),
+	PINMUX_GPIO(GPIO_PTD4, PTD4_DATA),
+	PINMUX_GPIO(GPIO_PTD3, PTD3_DATA),
+	PINMUX_GPIO(GPIO_PTD2, PTD2_DATA),
+	PINMUX_GPIO(GPIO_PTD1, PTD1_DATA),
+	PINMUX_GPIO(GPIO_PTD0, PTD0_DATA),
+
+	/* PTE */
+	PINMUX_GPIO(GPIO_PTE6, PTE6_DATA),
+	PINMUX_GPIO(GPIO_PTE5, PTE5_DATA),
+	PINMUX_GPIO(GPIO_PTE4, PTE4_DATA),
+	PINMUX_GPIO(GPIO_PTE3, PTE3_DATA),
+	PINMUX_GPIO(GPIO_PTE2, PTE2_DATA),
+	PINMUX_GPIO(GPIO_PTE1, PTE1_DATA),
+	PINMUX_GPIO(GPIO_PTE0, PTE0_DATA),
+
+	/* PTF */
+	PINMUX_GPIO(GPIO_PTF6, PTF6_DATA),
+	PINMUX_GPIO(GPIO_PTF5, PTF5_DATA),
+	PINMUX_GPIO(GPIO_PTF4, PTF4_DATA),
+	PINMUX_GPIO(GPIO_PTF3, PTF3_DATA),
+	PINMUX_GPIO(GPIO_PTF2, PTF2_DATA),
+	PINMUX_GPIO(GPIO_PTF1, PTF1_DATA),
+	PINMUX_GPIO(GPIO_PTF0, PTF0_DATA),
+
+	/* PTG */
+	PINMUX_GPIO(GPIO_PTG6, PTG6_DATA),
+	PINMUX_GPIO(GPIO_PTG5, PTG5_DATA),
+	PINMUX_GPIO(GPIO_PTG4, PTG4_DATA),
+	PINMUX_GPIO(GPIO_PTG3, PTG3_DATA),
+	PINMUX_GPIO(GPIO_PTG2, PTG2_DATA),
+	PINMUX_GPIO(GPIO_PTG1, PTG1_DATA),
+	PINMUX_GPIO(GPIO_PTG0, PTG0_DATA),
+
+	/* PTH */
+	PINMUX_GPIO(GPIO_PTH6, PTH6_DATA),
+	PINMUX_GPIO(GPIO_PTH5, PTH5_DATA),
+	PINMUX_GPIO(GPIO_PTH4, PTH4_DATA),
+	PINMUX_GPIO(GPIO_PTH3, PTH3_DATA),
+	PINMUX_GPIO(GPIO_PTH2, PTH2_DATA),
+	PINMUX_GPIO(GPIO_PTH1, PTH1_DATA),
+	PINMUX_GPIO(GPIO_PTH0, PTH0_DATA),
+
+	/* PTJ */
+	PINMUX_GPIO(GPIO_PTJ6, PTJ6_DATA),
+	PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA),
+	PINMUX_GPIO(GPIO_PTJ4, PTJ4_DATA),
+	PINMUX_GPIO(GPIO_PTJ3, PTJ3_DATA),
+	PINMUX_GPIO(GPIO_PTJ2, PTJ2_DATA),
+	PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA),
+	PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA),
+
+	/* PTK */
+	PINMUX_GPIO(GPIO_PTK3, PTK3_DATA),
+	PINMUX_GPIO(GPIO_PTK2, PTK2_DATA),
+	PINMUX_GPIO(GPIO_PTK1, PTK1_DATA),
+	PINMUX_GPIO(GPIO_PTK0, PTK0_DATA),
+
+	/* PTL */
+	PINMUX_GPIO(GPIO_PTL7, PTL7_DATA),
+	PINMUX_GPIO(GPIO_PTL6, PTL6_DATA),
+	PINMUX_GPIO(GPIO_PTL5, PTL5_DATA),
+	PINMUX_GPIO(GPIO_PTL4, PTL4_DATA),
+	PINMUX_GPIO(GPIO_PTL3, PTL3_DATA),
+
+	/* PTM */
+	PINMUX_GPIO(GPIO_PTM7, PTM7_DATA),
+	PINMUX_GPIO(GPIO_PTM6, PTM6_DATA),
+	PINMUX_GPIO(GPIO_PTM5, PTM5_DATA),
+	PINMUX_GPIO(GPIO_PTM4, PTM4_DATA),
+	PINMUX_GPIO(GPIO_PTM3, PTM3_DATA),
+	PINMUX_GPIO(GPIO_PTM2, PTM2_DATA),
+	PINMUX_GPIO(GPIO_PTM1, PTM1_DATA),
+	PINMUX_GPIO(GPIO_PTM0, PTM0_DATA),
+
+	/* PTP */
+	PINMUX_GPIO(GPIO_PTP4, PTP4_DATA),
+	PINMUX_GPIO(GPIO_PTP3, PTP3_DATA),
+	PINMUX_GPIO(GPIO_PTP2, PTP2_DATA),
+	PINMUX_GPIO(GPIO_PTP1, PTP1_DATA),
+	PINMUX_GPIO(GPIO_PTP0, PTP0_DATA),
+
+	/* PTR */
+	PINMUX_GPIO(GPIO_PTR7, PTR7_DATA),
+	PINMUX_GPIO(GPIO_PTR6, PTR6_DATA),
+	PINMUX_GPIO(GPIO_PTR5, PTR5_DATA),
+	PINMUX_GPIO(GPIO_PTR4, PTR4_DATA),
+	PINMUX_GPIO(GPIO_PTR3, PTR3_DATA),
+	PINMUX_GPIO(GPIO_PTR2, PTR2_DATA),
+	PINMUX_GPIO(GPIO_PTR1, PTR1_DATA),
+	PINMUX_GPIO(GPIO_PTR0, PTR0_DATA),
+
+	/* PTS */
+	PINMUX_GPIO(GPIO_PTS4, PTS4_DATA),
+	PINMUX_GPIO(GPIO_PTS3, PTS3_DATA),
+	PINMUX_GPIO(GPIO_PTS2, PTS2_DATA),
+	PINMUX_GPIO(GPIO_PTS1, PTS1_DATA),
+	PINMUX_GPIO(GPIO_PTS0, PTS0_DATA),
+
+	/* PTT */
+	PINMUX_GPIO(GPIO_PTT4, PTT4_DATA),
+	PINMUX_GPIO(GPIO_PTT3, PTT3_DATA),
+	PINMUX_GPIO(GPIO_PTT2, PTT2_DATA),
+	PINMUX_GPIO(GPIO_PTT1, PTT1_DATA),
+	PINMUX_GPIO(GPIO_PTT0, PTT0_DATA),
+
+	/* PTU */
+	PINMUX_GPIO(GPIO_PTU4, PTU4_DATA),
+	PINMUX_GPIO(GPIO_PTU3, PTU3_DATA),
+	PINMUX_GPIO(GPIO_PTU2, PTU2_DATA),
+	PINMUX_GPIO(GPIO_PTU1, PTU1_DATA),
+	PINMUX_GPIO(GPIO_PTU0, PTU0_DATA),
+
+	/* PTV */
+	PINMUX_GPIO(GPIO_PTV4, PTV4_DATA),
+	PINMUX_GPIO(GPIO_PTV3, PTV3_DATA),
+	PINMUX_GPIO(GPIO_PTV2, PTV2_DATA),
+	PINMUX_GPIO(GPIO_PTV1, PTV1_DATA),
+	PINMUX_GPIO(GPIO_PTV0, PTV0_DATA),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_D31, D31_MARK),
+	PINMUX_GPIO(GPIO_FN_D30, D30_MARK),
+	PINMUX_GPIO(GPIO_FN_D29, D29_MARK),
+	PINMUX_GPIO(GPIO_FN_D28, D28_MARK),
+	PINMUX_GPIO(GPIO_FN_D27, D27_MARK),
+	PINMUX_GPIO(GPIO_FN_D26, D26_MARK),
+	PINMUX_GPIO(GPIO_FN_D25, D25_MARK),
+	PINMUX_GPIO(GPIO_FN_D24, D24_MARK),
+	PINMUX_GPIO(GPIO_FN_D23, D23_MARK),
+	PINMUX_GPIO(GPIO_FN_D22, D22_MARK),
+	PINMUX_GPIO(GPIO_FN_D21, D21_MARK),
+	PINMUX_GPIO(GPIO_FN_D20, D20_MARK),
+	PINMUX_GPIO(GPIO_FN_D19, D19_MARK),
+	PINMUX_GPIO(GPIO_FN_D18, D18_MARK),
+	PINMUX_GPIO(GPIO_FN_D17, D17_MARK),
+	PINMUX_GPIO(GPIO_FN_D16, D16_MARK),
+	PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_RAS, RAS_MARK),
+	PINMUX_GPIO(GPIO_FN_CAS, CAS_MARK),
+	PINMUX_GPIO(GPIO_FN_CKE, CKE_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5B_CE1A, CS5B_CE1A_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6B_CE1B, CS6B_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_A25, A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24, A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23, A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22, A22_MARK),
+	PINMUX_GPIO(GPIO_FN_A21, A21_MARK),
+	PINMUX_GPIO(GPIO_FN_A20, A20_MARK),
+	PINMUX_GPIO(GPIO_FN_A19, A19_MARK),
+	PINMUX_GPIO(GPIO_FN_A0, A0_MARK),
+	PINMUX_GPIO(GPIO_FN_REFOUT, REFOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQOUT, IRQOUT_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCD_DATA15, LCD_DATA15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA14, LCD_DATA14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA13, LCD_DATA13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA12, LCD_DATA12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA11, LCD_DATA11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA10, LCD_DATA10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA9, LCD_DATA9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA8, LCD_DATA8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA7, LCD_DATA7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA6, LCD_DATA6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA5, LCD_DATA5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA4, LCD_DATA4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA3, LCD_DATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA2, LCD_DATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA1, LCD_DATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DATA0, LCD_DATA0_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_M_DISP, LCD_M_DISP_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CL1, LCD_CL1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_CL2, LCD_CL2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_DON, LCD_DON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_FLM, LCD_FLM_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_VEPWC, LCD_VEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCD_VCPWC, LCD_VCPWC_MARK),
+
+	/* AFEIF */
+	PINMUX_GPIO(GPIO_FN_AFE_RXIN, AFE_RXIN_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_RDET, AFE_RDET_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_FS, AFE_FS_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_TXOUT, AFE_TXOUT_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_SCLK, AFE_SCLK_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_RLYCNT, AFE_RLYCNT_MARK),
+	PINMUX_GPIO(GPIO_FN_AFE_HC1, AFE_HC1_MARK),
+
+	/* IIC */
+	PINMUX_GPIO(GPIO_FN_IIC_SCL, IIC_SCL_MARK),
+	PINMUX_GPIO(GPIO_FN_IIC_SDA, IIC_SDA_MARK),
+
+	/* DAC */
+	PINMUX_GPIO(GPIO_FN_DA1, DA1_MARK),
+	PINMUX_GPIO(GPIO_FN_DA0, DA0_MARK),
+
+	/* ADC */
+	PINMUX_GPIO(GPIO_FN_AN3, AN3_MARK),
+	PINMUX_GPIO(GPIO_FN_AN2, AN2_MARK),
+	PINMUX_GPIO(GPIO_FN_AN1, AN1_MARK),
+	PINMUX_GPIO(GPIO_FN_AN0, AN0_MARK),
+	PINMUX_GPIO(GPIO_FN_ADTRG, ADTRG_MARK),
+
+	/* USB */
+	PINMUX_GPIO(GPIO_FN_USB1D_RCV, USB1D_RCV_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_TXSE0, USB1D_TXSE0_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_TXDPLS, USB1D_TXDPLS_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_DMNS, USB1D_DMNS_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_DPLS, USB1D_DPLS_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_SPEED, USB1D_SPEED_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_TXENL, USB1D_TXENL_MARK),
+
+	PINMUX_GPIO(GPIO_FN_USB2_PWR_EN, USB2_PWR_EN_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1_PWR_EN_USBF_UPLUP,
+		    USB1_PWR_EN_USBF_UPLUP_MARK),
+	PINMUX_GPIO(GPIO_FN_USB1D_SUSPEND, USB1D_SUSPEND_MARK),
+
+	/* INTC */
+	PINMUX_GPIO(GPIO_FN_IRQ5, IRQ5_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4, IRQ4_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3_IRL3, IRQ3_IRL3_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2_IRL2, IRQ2_IRL2_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1_IRL1, IRQ1_IRL1_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ0_IRL0, IRQ0_IRL0_MARK),
+
+	/* PCC */
+	PINMUX_GPIO(GPIO_FN_PCC_REG, PCC_REG_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_DRV, PCC_DRV_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_BVD2, PCC_BVD2_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_BVD1, PCC_BVD1_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_CD2, PCC_CD2_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_CD1, PCC_CD1_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_RESET, PCC_RESET_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_RDY, PCC_RDY_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_VS2, PCC_VS2_MARK),
+	PINMUX_GPIO(GPIO_FN_PCC_VS1, PCC_VS1_MARK),
+
+	/* HUDI */
+	PINMUX_GPIO(GPIO_FN_AUDATA3, AUDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA2, AUDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA1, AUDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA0, AUDATA0_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDCK, AUDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDSYNC, AUDSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_ASEBRKAK, ASEBRKAK_MARK),
+	PINMUX_GPIO(GPIO_FN_TRST, TRST_MARK),
+	PINMUX_GPIO(GPIO_FN_TMS, TMS_MARK),
+	PINMUX_GPIO(GPIO_FN_TDO, TDO_MARK),
+	PINMUX_GPIO(GPIO_FN_TDI, TDI_MARK),
+	PINMUX_GPIO(GPIO_FN_TCK, TCK_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_DACK1, DACK1_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ1, DREQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK0, DACK0_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0, DREQ0_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND1, TEND1_MARK),
+	PINMUX_GPIO(GPIO_FN_TEND0, TEND0_MARK),
+
+	/* SIOF0 */
+	PINMUX_GPIO(GPIO_FN_SIOF0_SYNC, SIOF0_SYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_MCLK, SIOF0_MCLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_TXD, SIOF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_RXD, SIOF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SCK, SIOF0_SCK_MARK),
+
+	/* SIOF1 */
+	PINMUX_GPIO(GPIO_FN_SIOF1_SYNC, SIOF1_SYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_MCLK, SIOF1_MCLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_TXD, SIOF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_RXD, SIOF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SCK, SIOF1_SCK_MARK),
+
+	/* SCIF0 */
+	PINMUX_GPIO(GPIO_FN_SCIF0_TXD, SCIF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RXD, SCIF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RTS, SCIF0_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_CTS, SCIF0_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_SCK, SCIF0_SCK_MARK),
+
+	/* SCIF1 */
+	PINMUX_GPIO(GPIO_FN_SCIF1_TXD, SCIF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RXD, SCIF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RTS, SCIF1_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_CTS, SCIF1_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_SCK, SCIF1_SCK_MARK),
+
+	/* TPU */
+	PINMUX_GPIO(GPIO_FN_TPU_TO1, TPU_TO1_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TO0, TPU_TO0_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TI3B, TPU_TI3B_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TI3A, TPU_TI3A_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TI2B, TPU_TI2B_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TI2A, TPU_TI2A_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TO3, TPU_TO3_MARK),
+	PINMUX_GPIO(GPIO_FN_TPU_TO2, TPU_TO2_MARK),
+
+	/* SIM */
+	PINMUX_GPIO(GPIO_FN_SIM_D, SIM_D_MARK),
+	PINMUX_GPIO(GPIO_FN_SIM_CLK, SIM_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIM_RST, SIM_RST_MARK),
+
+	/* MMC */
+	PINMUX_GPIO(GPIO_FN_MMC_DAT, MMC_DAT_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_CMD, MMC_CMD_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_CLK, MMC_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_VDDON, MMC_VDDON_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_ODMOD, MMC_ODMOD_MARK),
+
+	/* SYSC */
+	PINMUX_GPIO(GPIO_FN_STATUS0, STATUS0_MARK),
+	PINMUX_GPIO(GPIO_FN_STATUS1, STATUS1_MARK),
+};
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) {
+		PTA7_FN, PTA7_OUT, PTA7_IN_PU, PTA7_IN,
+		PTA6_FN, PTA6_OUT, PTA6_IN_PU, PTA6_IN,
+		PTA5_FN, PTA5_OUT, PTA5_IN_PU, PTA5_IN,
+		PTA4_FN, PTA4_OUT, PTA4_IN_PU, PTA4_IN,
+		PTA3_FN, PTA3_OUT, PTA3_IN_PU, PTA3_IN,
+		PTA2_FN, PTA2_OUT, PTA2_IN_PU, PTA2_IN,
+		PTA1_FN, PTA1_OUT, PTA1_IN_PU, PTA1_IN,
+		PTA0_FN, PTA0_OUT, PTA0_IN_PU, PTA0_IN }
+	},
+	{ PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) {
+		PTB7_FN, PTB7_OUT, PTB7_IN_PU, PTB7_IN,
+		PTB6_FN, PTB6_OUT, PTB6_IN_PU, PTB6_IN,
+		PTB5_FN, PTB5_OUT, PTB5_IN_PU, PTB5_IN,
+		PTB4_FN, PTB4_OUT, PTB4_IN_PU, PTB4_IN,
+		PTB3_FN, PTB3_OUT, PTB3_IN_PU, PTB3_IN,
+		PTB2_FN, PTB2_OUT, PTB2_IN_PU, PTB2_IN,
+		PTB1_FN, PTB1_OUT, PTB1_IN_PU, PTB1_IN,
+		PTB0_FN, PTB0_OUT, PTB0_IN_PU, PTB0_IN }
+	},
+	{ PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) {
+		PTC7_FN, PTC7_OUT, PTC7_IN_PU, PTC7_IN,
+		PTC6_FN, PTC6_OUT, PTC6_IN_PU, PTC6_IN,
+		PTC5_FN, PTC5_OUT, PTC5_IN_PU, PTC5_IN,
+		PTC4_FN, PTC4_OUT, PTC4_IN_PU, PTC4_IN,
+		PTC3_FN, PTC3_OUT, PTC3_IN_PU, PTC3_IN,
+		PTC2_FN, PTC2_OUT, PTC2_IN_PU, PTC2_IN,
+		PTC1_FN, PTC1_OUT, PTC1_IN_PU, PTC1_IN,
+		PTC0_FN, PTC0_OUT, PTC0_IN_PU, PTC0_IN }
+	},
+	{ PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) {
+		PTD7_FN, PTD7_OUT, PTD7_IN_PU, PTD7_IN,
+		PTD6_FN, PTD6_OUT, PTD6_IN_PU, PTD6_IN,
+		PTD5_FN, PTD5_OUT, PTD5_IN_PU, PTD5_IN,
+		PTD4_FN, PTD4_OUT, PTD4_IN_PU, PTD4_IN,
+		PTD3_FN, PTD3_OUT, PTD3_IN_PU, PTD3_IN,
+		PTD2_FN, PTD2_OUT, PTD2_IN_PU, PTD2_IN,
+		PTD1_FN, PTD1_OUT, PTD1_IN_PU, PTD1_IN,
+		PTD0_FN, PTD0_OUT, PTD0_IN_PU, PTD0_IN }
+	},
+	{ PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) {
+		0, 0, 0, 0,
+		PTE6_FN, 0, 0, PTE6_IN,
+		PTE5_FN, 0, 0, PTE5_IN,
+		PTE4_FN, PTE4_OUT, PTE4_IN_PU, PTE4_IN,
+		PTE3_FN, PTE3_OUT, PTE3_IN_PU, PTE3_IN,
+		PTE2_FN, PTE2_OUT, PTE2_IN_PU, PTE2_IN,
+		PTE1_FN, PTE1_OUT, PTE1_IN_PU, PTE1_IN,
+		PTE0_FN, PTE0_OUT, PTE0_IN_PU, PTE0_IN }
+	},
+	{ PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) {
+		0, 0, 0, 0,
+		PTF6_FN, 0, 0, PTF6_IN,
+		PTF5_FN, 0, 0, PTF5_IN,
+		PTF4_FN, 0, 0, PTF4_IN,
+		PTF3_FN, 0, 0, PTF3_IN,
+		PTF2_FN, 0, 0, PTF2_IN,
+		PTF1_FN, 0, 0, PTF1_IN,
+		PTF0_FN, 0, 0, PTF0_IN }
+	},
+	{ PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) {
+		0, 0, 0, 0,
+		PTG6_FN, PTG6_OUT, PTG6_IN_PU, PTG6_IN,
+		PTG5_FN, PTG5_OUT, PTG5_IN_PU, PTG5_IN,
+		PTG4_FN, PTG4_OUT, PTG4_IN_PU, PTG4_IN,
+		PTG3_FN, PTG3_OUT, PTG3_IN_PU, PTG3_IN,
+		PTG2_FN, PTG2_OUT, PTG2_IN_PU, PTG2_IN,
+		PTG1_FN, PTG1_OUT, PTG1_IN_PU, PTG1_IN,
+		PTG0_FN, PTG0_OUT, PTG0_IN_PU, PTG0_IN }
+	},
+	{ PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) {
+		0, 0, 0, 0,
+		PTH6_FN, PTH6_OUT, PTH6_IN_PU, PTH6_IN,
+		PTH5_FN, PTH5_OUT, PTH5_IN_PU, PTH5_IN,
+		PTH4_FN, PTH4_OUT, PTH4_IN_PU, PTH4_IN,
+		PTH3_FN, PTH3_OUT, PTH3_IN_PU, PTH3_IN,
+		PTH2_FN, PTH2_OUT, PTH2_IN_PU, PTH2_IN,
+		PTH1_FN, PTH1_OUT, PTH1_IN_PU, PTH1_IN,
+		PTH0_FN, PTH0_OUT, PTH0_IN_PU, PTH0_IN }
+	},
+	{ PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) {
+		0, 0, 0, 0,
+		PTJ6_FN, PTJ6_OUT, PTJ6_IN_PU, PTJ6_IN,
+		PTJ5_FN, PTJ5_OUT, PTJ5_IN_PU, PTJ5_IN,
+		PTJ4_FN, PTJ4_OUT, PTJ4_IN_PU, PTJ4_IN,
+		PTJ3_FN, PTJ3_OUT, PTJ3_IN_PU, PTJ3_IN,
+		PTJ2_FN, PTJ2_OUT, PTJ2_IN_PU, PTJ2_IN,
+		PTJ1_FN, PTJ1_OUT, PTJ1_IN_PU, PTJ1_IN,
+		PTJ0_FN, PTJ0_OUT, PTJ0_IN_PU, PTJ0_IN }
+	},
+	{ PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTK3_FN, PTK3_OUT, PTK3_IN_PU, PTK3_IN,
+		PTK2_FN, PTK2_OUT, PTK2_IN_PU, PTK2_IN,
+		PTK1_FN, PTK1_OUT, PTK1_IN_PU, PTK1_IN,
+		PTK0_FN, PTK0_OUT, PTK0_IN_PU, PTK0_IN }
+	},
+	{ PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) {
+		PTL7_FN, PTL7_OUT, PTL7_IN_PU, PTL7_IN,
+		PTL6_FN, PTL6_OUT, PTL6_IN_PU, PTL6_IN,
+		PTL5_FN, PTL5_OUT, PTL5_IN_PU, PTL5_IN,
+		PTL4_FN, PTL4_OUT, PTL4_IN_PU, PTL4_IN,
+		PTL3_FN, PTL3_OUT, PTL3_IN_PU, PTL3_IN,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) {
+		PTM7_FN, PTM7_OUT, PTM7_IN_PU, PTM7_IN,
+		PTM6_FN, PTM6_OUT, PTM6_IN_PU, PTM6_IN,
+		PTM5_FN, PTM5_OUT, PTM5_IN_PU, PTM5_IN,
+		PTM4_FN, PTM4_OUT, PTM4_IN_PU, PTM4_IN,
+		PTM3_FN, PTM3_OUT, PTM3_IN_PU, PTM3_IN,
+		PTM2_FN, PTM2_OUT, PTM2_IN_PU, PTM2_IN,
+		PTM1_FN, PTM1_OUT, PTM1_IN_PU, PTM1_IN,
+		PTM0_FN, PTM0_OUT, PTM0_IN_PU, PTM0_IN }
+	},
+	{ PINMUX_CFG_REG("PPCR", 0xa4050118, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTP4_FN, PTP4_OUT, PTP4_IN_PU, PTP4_IN,
+		PTP3_FN, PTP3_OUT, PTP3_IN_PU, PTP3_IN,
+		PTP2_FN, PTP2_OUT, PTP2_IN_PU, PTP2_IN,
+		PTP1_FN, PTP1_OUT, PTP1_IN_PU, PTP1_IN,
+		PTP0_FN, PTP0_OUT, PTP0_IN_PU, PTP0_IN }
+	},
+	{ PINMUX_CFG_REG("PRCR", 0xa405011a, 16, 2) {
+		PTR7_FN, PTR7_OUT, PTR7_IN_PU, PTR7_IN,
+		PTR6_FN, PTR6_OUT, PTR6_IN_PU, PTR6_IN,
+		PTR5_FN, PTR5_OUT, PTR5_IN_PU, PTR5_IN,
+		PTR4_FN, PTR4_OUT, PTR4_IN_PU, PTR4_IN,
+		PTR3_FN, PTR3_OUT, PTR3_IN_PU, PTR3_IN,
+		PTR2_FN, PTR2_OUT, PTR2_IN_PU, PTR2_IN,
+		PTR1_FN, PTR1_OUT, PTR1_IN_PU, PTR1_IN,
+		PTR0_FN, PTR0_OUT, PTR0_IN_PU, PTR0_IN }
+	},
+	{ PINMUX_CFG_REG("PSCR", 0xa405011c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTS4_FN, PTS4_OUT, PTS4_IN_PU, PTS4_IN,
+		PTS3_FN, PTS3_OUT, PTS3_IN_PU, PTS3_IN,
+		PTS2_FN, PTS2_OUT, PTS2_IN_PU, PTS2_IN,
+		PTS1_FN, PTS1_OUT, PTS1_IN_PU, PTS1_IN,
+		PTS0_FN, PTS0_OUT, PTS0_IN_PU, PTS0_IN }
+	},
+	{ PINMUX_CFG_REG("PTCR", 0xa405011e, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTT4_FN, PTT4_OUT, PTT4_IN_PU, PTT4_IN,
+		PTT3_FN, PTT3_OUT, PTT3_IN_PU, PTT3_IN,
+		PTT2_FN, PTT2_OUT, PTT2_IN_PU, PTT2_IN,
+		PTT1_FN, PTT1_OUT, PTT1_IN_PU, PTT1_IN,
+		PTT0_FN, PTT0_OUT, PTT0_IN_PU, PTT0_IN }
+	},
+	{ PINMUX_CFG_REG("PUCR", 0xa4050120, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTU4_FN, PTU4_OUT, PTU4_IN_PU, PTU4_IN,
+		PTU3_FN, PTU3_OUT, PTU3_IN_PU, PTU3_IN,
+		PTU2_FN, PTU2_OUT, PTU2_IN_PU, PTU2_IN,
+		PTU1_FN, PTU1_OUT, PTU1_IN_PU, PTU1_IN,
+		PTU0_FN, PTU0_OUT, PTU0_IN_PU, PTU0_IN }
+	},
+	{ PINMUX_CFG_REG("PVCR", 0xa4050122, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTV4_FN, PTV4_OUT, PTV4_IN_PU, PTV4_IN,
+		PTV3_FN, PTV3_OUT, PTV3_IN_PU, PTV3_IN,
+		PTV2_FN, PTV2_OUT, PTV2_IN_PU, PTV2_IN,
+		PTV1_FN, PTV1_OUT, PTV1_IN_PU, PTV1_IN,
+		PTV0_FN, PTV0_OUT, PTV0_IN_PU, PTV0_IN }
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADR", 0xa4050140, 8) {
+		PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+		PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDR", 0xa4050142, 8) {
+		PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+		PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDR", 0xa4050144, 8) {
+		PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+		PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDR", 0xa4050126, 8) {
+		PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+		PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDR", 0xa4050148, 8) {
+		0, PTE6_DATA, PTE5_DATA, PTE4_DATA,
+		PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDR", 0xa405014a, 8) {
+		0, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+		PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA }
+	},
+	{ PINMUX_DATA_REG("PGDR", 0xa405014c, 8) {
+		0, PTG6_DATA, PTG5_DATA, PTG4_DATA,
+		PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA }
+	},
+	{ PINMUX_DATA_REG("PHDR", 0xa405014e, 8) {
+		0, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+		PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA }
+	},
+	{ PINMUX_DATA_REG("PJDR", 0xa4050150, 8) {
+		0, PTJ6_DATA, PTJ5_DATA, PTJ4_DATA,
+		PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PKDR", 0xa4050152, 8) {
+		0, 0, 0, 0,
+		PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA }
+	},
+	{ PINMUX_DATA_REG("PLDR", 0xa4050154, 8) {
+		PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+		PTL3_DATA, 0, 0, 0 }
+	},
+	{ PINMUX_DATA_REG("PMDR", 0xa4050156, 8) {
+		PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+		PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA }
+	},
+	{ PINMUX_DATA_REG("PPDR", 0xa4050158, 8) {
+		0, 0, 0, PTP4_DATA,
+		PTP3_DATA, PTP2_DATA, PTP1_DATA, PTP0_DATA }
+	},
+	{ PINMUX_DATA_REG("PRDR", 0xa405015a, 8) {
+		PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+		PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA }
+	},
+	{ PINMUX_DATA_REG("PSDR", 0xa405015c, 8) {
+		0, 0, 0, PTS4_DATA,
+		PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA }
+	},
+	{ PINMUX_DATA_REG("PTDR", 0xa405015e, 8) {
+		0, 0, 0, PTT4_DATA,
+		PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA }
+	},
+	{ PINMUX_DATA_REG("PUDR", 0xa4050160, 8) {
+		0, 0, 0, PTU4_DATA,
+		PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA }
+	},
+	{ PINMUX_DATA_REG("PVDR", 0xa4050162, 8) {
+		0, 0, 0, PTV4_DATA,
+		PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7720_pinmux_info = {
+	.name = "sh7720_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PTA7,
+	.last_gpio = GPIO_FN_STATUS1,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7720_pinmux_info);
+}
+
+arch_initcall(plat_pinmux_setup);
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index 2d452f67fb8..2780917c008 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -36,7 +36,7 @@ extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
 extern unsigned long long float64_sub(unsigned long long a,
 				      unsigned long long b);
 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
-
+extern unsigned long int float64_to_float32(unsigned long long a);
 static unsigned int fpu_exception_flags;
 
 /*
@@ -417,6 +417,29 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 
 		regs->pc = nextpc;
 		return 1;
+	} else if ((finsn & 0xf0bd) == 0xf0bd) {
+		/* fcnvds - double to single precision convert */
+		struct task_struct *tsk = current;
+		int m;
+		unsigned int hx;
+
+		m = (finsn >> 9) & 0x7;
+		hx = tsk->thread.fpu.hard.fp_regs[m];
+
+		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
+			&& ((hx & 0x7fffffff) < 0x00100000)) {
+			/* subnormal double to float conversion */
+			long long llx;
+
+			llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
+			    | tsk->thread.fpu.hard.fp_regs[m + 1];
+
+			tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
+		} else
+			return 0;
+
+		regs->pc = nextpc;
+		return 1;
 	}
 
 	return 0;
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 254c5c55ab9..d9bdc931ac0 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -178,10 +179,14 @@ static int __init sh7760_devices_setup(void)
 }
 __initcall(sh7760_devices_setup);
 
+#define INTC_ICR	0xffd00000UL
+#define INTC_ICR_IRLM	(1 << 7)
+
 void __init plat_irq_setup_pins(int mode)
 {
 	switch (mode) {
 	case IRQ_MODE_IRQ:
+		ctrl_outw(ctrl_inw(INTC_ICR) | INTC_ICR_IRLM, INTC_ICR);
 		register_intc_controller(&intc_desc_irq);
 		break;
 	default:
diff --git a/arch/sh/kernel/cpu/sh4/softfloat.c b/arch/sh/kernel/cpu/sh4/softfloat.c
index 828cb57cb95..2b747f3b02b 100644
--- a/arch/sh/kernel/cpu/sh4/softfloat.c
+++ b/arch/sh/kernel/cpu/sh4/softfloat.c
@@ -85,6 +85,7 @@ float64 float64_div(float64 a, float64 b);
 float32 float32_div(float32 a, float32 b);
 float32 float32_mul(float32 a, float32 b);
 float64 float64_mul(float64 a, float64 b);
+float32 float64_to_float32(float64 a);
 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
 		   bits64 * z1Ptr);
 inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
@@ -890,3 +891,31 @@ float64 float64_mul(float64 a, float64 b)
 	}
 	return roundAndPackFloat64(zSign, zExp, zSig0);
 }
+
+/*
+ * -------------------------------------------------------------------------------
+ *  Returns the result of converting the double-precision floating-point value
+ *  `a' to the single-precision floating-point format.  The conversion is
+ *  performed according to the IEC/IEEE Standard for Binary Floating-point
+ *  Arithmetic.
+ *  -------------------------------------------------------------------------------
+ *  */
+float32 float64_to_float32(float64 a)
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    bits32 zSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+
+    shift64RightJamming( aSig, 22, &aSig );
+    zSig = aSig;
+    if ( aExp || zSig ) {
+        zSig |= 0x40000000;
+        aExp -= 0x381;
+    }
+    return roundAndPackFloat32(aSign, aExp, zSig);
+}
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 9381ad8da26..be9a0c18595 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -27,5 +27,10 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SHX3)	:= clock-shx3.o
 
+# Pinmux setup
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7722)	:= pinmux-sh7722.o
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7723)	:= pinmux-sh7723.o
+
 obj-y			+= $(clock-y)
 obj-$(CONFIG_SMP)	+= $(smp-y)
+obj-$(CONFIG_GENERIC_GPIO)	+= $(pinmux-y)
diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c
new file mode 100644
index 00000000000..cb9d07bd59f
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7722.c
@@ -0,0 +1,1783 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <cpu/sh7722.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+	PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA,
+	PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+	PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA,
+	PTC7_DATA, PTC5_DATA, PTC4_DATA, PTC3_DATA, PTC2_DATA, PTC0_DATA,
+	PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+	PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA,
+	PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA, PTE1_DATA, PTE0_DATA,
+	PTF6_DATA, PTF5_DATA, PTF4_DATA,
+	PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA,
+	PTG4_DATA, PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA,
+	PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+	PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA,
+	PTJ7_DATA, PTJ6_DATA, PTJ5_DATA, PTJ1_DATA, PTJ0_DATA,
+	PTK6_DATA, PTK5_DATA, PTK4_DATA,
+	PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA,
+	PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+	PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA,
+	PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+	PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA,
+	PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+	PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA,
+	PTQ6_DATA, PTQ5_DATA, PTQ4_DATA,
+	PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA,
+	PTR4_DATA, PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA,
+	PTS4_DATA, PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA,
+	PTT4_DATA, PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA,
+	PTU4_DATA, PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA,
+	PTV4_DATA, PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA,
+	PTW6_DATA, PTW5_DATA, PTW4_DATA,
+	PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA,
+	PTX6_DATA, PTX5_DATA, PTX4_DATA,
+	PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA,
+	PTY6_DATA, PTY5_DATA, PTY4_DATA,
+	PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA,
+	PTZ5_DATA, PTZ4_DATA, PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN,
+	PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN,
+	PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN,
+	PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN,
+	PTC7_IN, PTC5_IN, PTC4_IN, PTC3_IN, PTC2_IN, PTC0_IN,
+	PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN, PTD3_IN, PTD2_IN, PTD1_IN,
+	PTE7_IN, PTE6_IN, PTE5_IN, PTE4_IN, PTE1_IN, PTE0_IN,
+	PTF6_IN, PTF5_IN, PTF4_IN, PTF3_IN, PTF2_IN, PTF1_IN,
+	PTH6_IN, PTH5_IN, PTH1_IN, PTH0_IN,
+	PTJ1_IN, PTJ0_IN,
+	PTK6_IN, PTK5_IN, PTK4_IN, PTK3_IN, PTK2_IN, PTK0_IN,
+	PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN,
+	PTL3_IN, PTL2_IN, PTL1_IN, PTL0_IN,
+	PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN,
+	PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN,
+	PTN7_IN, PTN6_IN, PTN5_IN, PTN4_IN,
+	PTN3_IN, PTN2_IN, PTN1_IN, PTN0_IN,
+	PTQ5_IN, PTQ4_IN, PTQ3_IN, PTQ2_IN, PTQ0_IN,
+	PTR2_IN,
+	PTS4_IN, PTS2_IN, PTS1_IN,
+	PTT4_IN, PTT3_IN, PTT2_IN, PTT1_IN,
+	PTU4_IN, PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN,
+	PTV4_IN, PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN,
+	PTW6_IN, PTW4_IN, PTW3_IN, PTW2_IN, PTW1_IN, PTW0_IN,
+	PTX6_IN, PTX5_IN, PTX4_IN, PTX3_IN, PTX2_IN, PTX1_IN, PTX0_IN,
+	PTY5_IN, PTY4_IN, PTY3_IN, PTY2_IN, PTY0_IN,
+	PTZ5_IN, PTZ4_IN, PTZ3_IN, PTZ2_IN, PTZ1_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_INPUT_PULLDOWN_BEGIN,
+	PTA7_IN_PD, PTA6_IN_PD, PTA5_IN_PD, PTA4_IN_PD,
+	PTA3_IN_PD, PTA2_IN_PD, PTA1_IN_PD, PTA0_IN_PD,
+	PTE7_IN_PD, PTE6_IN_PD, PTE5_IN_PD, PTE4_IN_PD,	PTE1_IN_PD, PTE0_IN_PD,
+	PTF6_IN_PD, PTF5_IN_PD, PTF4_IN_PD, PTF3_IN_PD, PTF2_IN_PD, PTF1_IN_PD,
+	PTH6_IN_PD, PTH5_IN_PD, PTH1_IN_PD, PTH0_IN_PD,
+	PTK6_IN_PD, PTK5_IN_PD, PTK4_IN_PD, PTK3_IN_PD, PTK2_IN_PD, PTK0_IN_PD,
+	PTL7_IN_PD, PTL6_IN_PD, PTL5_IN_PD, PTL4_IN_PD,
+	PTL3_IN_PD, PTL2_IN_PD, PTL1_IN_PD, PTL0_IN_PD,
+	PTM7_IN_PD, PTM6_IN_PD, PTM5_IN_PD, PTM4_IN_PD,
+	PTM3_IN_PD, PTM2_IN_PD, PTM1_IN_PD, PTM0_IN_PD,
+	PTQ5_IN_PD, PTQ4_IN_PD, PTQ3_IN_PD, PTQ2_IN_PD,
+	PTS4_IN_PD, PTS2_IN_PD, PTS1_IN_PD,
+	PTT4_IN_PD, PTT3_IN_PD, PTT2_IN_PD, PTT1_IN_PD,
+	PTU4_IN_PD, PTU3_IN_PD, PTU2_IN_PD, PTU1_IN_PD, PTU0_IN_PD,
+	PTV4_IN_PD, PTV3_IN_PD, PTV2_IN_PD, PTV1_IN_PD, PTV0_IN_PD,
+	PTW6_IN_PD, PTW4_IN_PD,	PTW3_IN_PD, PTW2_IN_PD, PTW1_IN_PD, PTW0_IN_PD,
+	PTX6_IN_PD, PTX5_IN_PD, PTX4_IN_PD,
+	PTX3_IN_PD, PTX2_IN_PD, PTX1_IN_PD, PTX0_IN_PD,
+	PINMUX_INPUT_PULLDOWN_END,
+
+	PINMUX_INPUT_PULLUP_BEGIN,
+	PTC7_IN_PU, PTC5_IN_PU,
+	PTD7_IN_PU, PTD6_IN_PU, PTD5_IN_PU, PTD4_IN_PU,
+	PTD3_IN_PU, PTD2_IN_PU, PTD1_IN_PU,
+	PTJ1_IN_PU, PTJ0_IN_PU,
+	PTQ0_IN_PU,
+	PTR2_IN_PU,
+	PTX6_IN_PU,
+	PTY5_IN_PU, PTY4_IN_PU, PTY3_IN_PU, PTY2_IN_PU, PTY0_IN_PU,
+	PTZ5_IN_PU, PTZ4_IN_PU, PTZ3_IN_PU, PTZ2_IN_PU, PTZ1_IN_PU,
+	PINMUX_INPUT_PULLUP_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PTA7_OUT, PTA5_OUT,
+	PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT,
+	PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT,
+	PTC4_OUT, PTC3_OUT, PTC2_OUT, PTC0_OUT,
+	PTD6_OUT, PTD5_OUT, PTD4_OUT,
+	PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT,
+	PTE7_OUT, PTE6_OUT, PTE5_OUT, PTE4_OUT, PTE1_OUT, PTE0_OUT,
+	PTF6_OUT, PTF5_OUT, PTF4_OUT, PTF3_OUT, PTF2_OUT, PTF0_OUT,
+	PTG4_OUT, PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT,
+	PTH7_OUT, PTH6_OUT, PTH5_OUT, PTH4_OUT,
+	PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT,
+	PTJ7_OUT, PTJ6_OUT, PTJ5_OUT, PTJ1_OUT, PTJ0_OUT,
+	PTK6_OUT, PTK5_OUT, PTK4_OUT, PTK3_OUT, PTK1_OUT, PTK0_OUT,
+	PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT,
+	PTL3_OUT, PTL2_OUT, PTL1_OUT, PTL0_OUT,
+	PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT,
+	PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT,
+	PTN7_OUT, PTN6_OUT, PTN5_OUT, PTN4_OUT,
+	PTN3_OUT, PTN2_OUT, PTN1_OUT, PTN0_OUT,	PTQ6_OUT, PTQ5_OUT, PTQ4_OUT,
+	PTQ3_OUT, PTQ2_OUT, PTQ1_OUT, PTQ0_OUT,
+	PTR4_OUT, PTR3_OUT, PTR1_OUT, PTR0_OUT,
+	PTS3_OUT, PTS2_OUT, PTS0_OUT,
+	PTT4_OUT, PTT3_OUT, PTT2_OUT, PTT0_OUT,
+	PTU4_OUT, PTU3_OUT, PTU2_OUT, PTU0_OUT,
+	PTV4_OUT, PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT,
+	PTW5_OUT, PTW4_OUT, PTW3_OUT, PTW2_OUT, PTW1_OUT, PTW0_OUT,
+	PTX6_OUT, PTX5_OUT, PTX4_OUT, PTX3_OUT, PTX2_OUT, PTX1_OUT, PTX0_OUT,
+	PTY5_OUT, PTY4_OUT, PTY3_OUT, PTY2_OUT, PTY1_OUT, PTY0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_MARK_BEGIN,
+	SCIF0_TXD_MARK, SCIF0_RXD_MARK,
+	SCIF0_RTS_MARK, SCIF0_CTS_MARK, SCIF0_SCK_MARK,
+	SCIF1_TXD_MARK, SCIF1_RXD_MARK,
+	SCIF1_RTS_MARK, SCIF1_CTS_MARK, SCIF1_SCK_MARK,
+	SCIF2_TXD_MARK, SCIF2_RXD_MARK,
+	SCIF2_RTS_MARK, SCIF2_CTS_MARK, SCIF2_SCK_MARK,
+	SIOTXD_MARK, SIORXD_MARK,
+	SIOD_MARK, SIOSTRB0_MARK, SIOSTRB1_MARK,
+	SIOSCK_MARK, SIOMCK_MARK,
+	VIO_D15_MARK, VIO_D14_MARK, VIO_D13_MARK, VIO_D12_MARK,
+	VIO_D11_MARK, VIO_D10_MARK, VIO_D9_MARK, VIO_D8_MARK,
+	VIO_D7_MARK, VIO_D6_MARK, VIO_D5_MARK, VIO_D4_MARK,
+	VIO_D3_MARK, VIO_D2_MARK, VIO_D1_MARK, VIO_D0_MARK,
+	VIO_CLK_MARK, VIO_VD_MARK, VIO_HD_MARK, VIO_FLD_MARK,
+	VIO_CKO_MARK, VIO_STEX_MARK, VIO_STEM_MARK, VIO_VD2_MARK,
+	VIO_HD2_MARK, VIO_CLK2_MARK,
+	LCDD23_MARK, LCDD22_MARK, LCDD21_MARK, LCDD20_MARK,
+	LCDD19_MARK, LCDD18_MARK, LCDD17_MARK, LCDD16_MARK,
+	LCDD15_MARK, LCDD14_MARK, LCDD13_MARK, LCDD12_MARK,
+	LCDD11_MARK, LCDD10_MARK, LCDD9_MARK, LCDD8_MARK,
+	LCDD7_MARK, LCDD6_MARK, LCDD5_MARK, LCDD4_MARK,
+	LCDD3_MARK, LCDD2_MARK, LCDD1_MARK, LCDD0_MARK,
+	LCDLCLK_MARK, LCDDON_MARK, LCDVCPWC_MARK, LCDVEPWC_MARK,
+	LCDVSYN_MARK, LCDDCK_MARK, LCDHSYN_MARK, LCDDISP_MARK,
+	LCDRS_MARK, LCDCS_MARK, LCDWR_MARK, LCDRD_MARK,
+	LCDDON2_MARK, LCDVCPWC2_MARK, LCDVEPWC2_MARK, LCDVSYN2_MARK,
+	LCDCS2_MARK,
+	IOIS16_MARK, A25_MARK, A24_MARK, A23_MARK, A22_MARK,
+	BS_MARK, CS6B_CE1B_MARK, WAIT_MARK, CS6A_CE2B_MARK,
+	HPD63_MARK, HPD62_MARK, HPD61_MARK, HPD60_MARK,
+	HPD59_MARK, HPD58_MARK, HPD57_MARK, HPD56_MARK,
+	HPD55_MARK, HPD54_MARK, HPD53_MARK, HPD52_MARK,
+	HPD51_MARK, HPD50_MARK, HPD49_MARK, HPD48_MARK,
+	HPDQM7_MARK, HPDQM6_MARK, HPDQM5_MARK, HPDQM4_MARK,
+	IRQ0_MARK, IRQ1_MARK, IRQ2_MARK, IRQ3_MARK,
+	IRQ4_MARK, IRQ5_MARK, IRQ6_MARK, IRQ7_MARK,
+	SDHICD_MARK, SDHIWP_MARK, SDHID3_MARK, SDHID2_MARK,
+	SDHID1_MARK, SDHID0_MARK, SDHICMD_MARK, SDHICLK_MARK,
+	SIUAOLR_MARK, SIUAOBT_MARK, SIUAISLD_MARK, SIUAILR_MARK,
+	SIUAIBT_MARK, SIUAOSLD_MARK, SIUMCKA_MARK, SIUFCKA_MARK,
+	SIUBOLR_MARK, SIUBOBT_MARK, SIUBISLD_MARK, SIUBILR_MARK,
+	SIUBIBT_MARK, SIUBOSLD_MARK, SIUMCKB_MARK, SIUFCKB_MARK,
+	AUDSYNC_MARK, AUDATA3_MARK, AUDATA2_MARK, AUDATA1_MARK,	AUDATA0_MARK,
+	DACK_MARK, DREQ0_MARK,
+	DV_CLKI_MARK, DV_CLK_MARK, DV_HSYNC_MARK, DV_VSYNC_MARK,
+	DV_D15_MARK, DV_D14_MARK, DV_D13_MARK, DV_D12_MARK,
+	DV_D11_MARK, DV_D10_MARK, DV_D9_MARK, DV_D8_MARK,
+	DV_D7_MARK, DV_D6_MARK, DV_D5_MARK, DV_D4_MARK,
+	DV_D3_MARK, DV_D2_MARK, DV_D1_MARK, DV_D0_MARK,
+	STATUS0_MARK, PDSTATUS_MARK,
+	SIOF0_MCK_MARK, SIOF0_SCK_MARK,
+	SIOF0_SYNC_MARK, SIOF0_SS1_MARK, SIOF0_SS2_MARK,
+	SIOF0_TXD_MARK,	SIOF0_RXD_MARK,
+	SIOF1_MCK_MARK, SIOF1_SCK_MARK,
+	SIOF1_SYNC_MARK, SIOF1_SS1_MARK, SIOF1_SS2_MARK,
+	SIOF1_TXD_MARK, SIOF1_RXD_MARK,
+	SIM_D_MARK, SIM_CLK_MARK, SIM_RST_MARK,
+	TS_SDAT_MARK, TS_SCK_MARK, TS_SDEN_MARK, TS_SPSYNC_MARK,
+	IRDA_IN_MARK, IRDA_OUT_MARK,
+	TPUTO_MARK,
+	FCE_MARK, NAF7_MARK, NAF6_MARK, NAF5_MARK, NAF4_MARK,
+	NAF3_MARK, NAF2_MARK, NAF1_MARK, NAF0_MARK, FCDE_MARK,
+	FOE_MARK, FSC_MARK, FWE_MARK, FRB_MARK,
+	KEYIN0_MARK, KEYIN1_MARK, KEYIN2_MARK, KEYIN3_MARK, KEYIN4_MARK,
+	KEYOUT0_MARK, KEYOUT1_MARK, KEYOUT2_MARK, KEYOUT3_MARK,
+	KEYOUT4_IN6_MARK, KEYOUT5_IN5_MARK,
+	PINMUX_MARK_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	VIO_D7_SCIF1_SCK, VIO_D6_SCIF1_RXD, VIO_D5_SCIF1_TXD, VIO_D4,
+	VIO_D3, VIO_D2, VIO_D1, VIO_D0_LCDLCLK,
+	HPD55, HPD54, HPD53, HPD52, HPD51, HPD50, HPD49, HPD48,
+	IOIS16, HPDQM7, HPDQM6, HPDQM5, HPDQM4,
+	SDHICD, SDHIWP, SDHID3, IRQ2_SDHID2, SDHID1, SDHID0, SDHICMD, SDHICLK,
+	A25, A24, A23, A22, IRQ5, IRQ4_BS,
+	PTF6, SIOSCK_SIUBOBT, SIOSTRB1_SIUBOLR,
+	SIOSTRB0_SIUBIBT, SIOD_SIUBILR, SIORXD_SIUBISLD, SIOTXD_SIUBOSLD,
+	AUDSYNC, AUDATA3, AUDATA2, AUDATA1, AUDATA0,
+	LCDVCPWC_LCDVCPWC2, LCDVSYN2_DACK, LCDVSYN, LCDDISP_LCDRS,
+	LCDHSYN_LCDCS, LCDDON_LCDDON2, LCDD17_DV_HSYNC, LCDD16_DV_VSYNC,
+	STATUS0, PDSTATUS, IRQ1, IRQ0,
+	SIUAILR_SIOF1_SS2, SIUAIBT_SIOF1_SS1, SIUAOLR_SIOF1_SYNC,
+	SIUAOBT_SIOF1_SCK, SIUAISLD_SIOF1_RXD, SIUAOSLD_SIOF1_TXD, PTK0,
+	LCDD15_DV_D15, LCDD14_DV_D14, LCDD13_DV_D13, LCDD12_DV_D12,
+	LCDD11_DV_D11, LCDD10_DV_D10, LCDD9_DV_D9, LCDD8_DV_D8,
+	LCDD7_DV_D7, LCDD6_DV_D6, LCDD5_DV_D5, LCDD4_DV_D4,
+	LCDD3_DV_D3, LCDD2_DV_D2, LCDD1_DV_D1, LCDD0_DV_D0,
+	HPD63, HPD62, HPD61, HPD60, HPD59, HPD58, HPD57, HPD56,
+	SIOF0_SS2_SIM_RST, SIOF0_SS1_TS_SPSYNC, SIOF0_SYNC_TS_SDEN,
+	SIOF0_SCK_TS_SCK, PTQ2, PTQ1, PTQ0,
+	LCDRD, CS6B_CE1B_LCDCS2, WAIT, LCDDCK_LCDWR, LCDVEPWC_LCDVEPWC2,
+	SCIF0_CTS_SIUAISPD, SCIF0_RTS_SIUAOSPD,
+	SCIF0_SCK_TPUTO, SCIF0_RXD, SCIF0_TXD,
+	FOE_VIO_VD2, FWE, FSC, DREQ0, FCDE,
+	NAF2_VIO_D10, NAF1_VIO_D9, NAF0_VIO_D8,
+	FRB_VIO_CLK2, FCE_VIO_HD2,
+	NAF7_VIO_D15, NAF6_VIO_D14, NAF5_VIO_D13, NAF4_VIO_D12, NAF3_VIO_D11,
+	VIO_FLD_SCIF2_CTS, VIO_CKO_SCIF2_RTS, VIO_STEX_SCIF2_SCK,
+	VIO_STEM_SCIF2_TXD, VIO_HD_SCIF2_RXD,
+	VIO_VD_SCIF1_CTS, VIO_CLK_SCIF1_RTS,
+	CS6A_CE2B, LCDD23, LCDD22, LCDD21, LCDD20,
+	LCDD19_DV_CLKI, LCDD18_DV_CLK,
+	KEYOUT5_IN5, KEYOUT4_IN6, KEYOUT3, KEYOUT2, KEYOUT1, KEYOUT0,
+	KEYIN4_IRQ7, KEYIN3, KEYIN2, KEYIN1, KEYIN0_IRQ6,
+
+	PSA15_KEYIN0, PSA15_IRQ6, PSA14_KEYIN4, PSA14_IRQ7,
+	PSA9_IRQ4, PSA9_BS, PSA4_IRQ2, PSA4_SDHID2,
+	PSB15_SIOTXD, PSB15_SIUBOSLD, PSB14_SIORXD, PSB14_SIUBISLD,
+	PSB13_SIOD, PSB13_SIUBILR, PSB12_SIOSTRB0, PSB12_SIUBIBT,
+	PSB11_SIOSTRB1, PSB11_SIUBOLR, PSB10_SIOSCK, PSB10_SIUBOBT,
+	PSB9_SIOMCK, PSB9_SIUMCKB, PSB8_SIOF0_MCK, PSB8_IRQ3,
+	PSB7_SIOF0_TXD, PSB7_IRDA_OUT, PSB6_SIOF0_RXD, PSB6_IRDA_IN,
+	PSB5_SIOF0_SCK, PSB5_TS_SCK, PSB4_SIOF0_SYNC, PSB4_TS_SDEN,
+	PSB3_SIOF0_SS1, PSB3_TS_SPSYNC, PSB2_SIOF0_SS2, PSB2_SIM_RST,
+	PSB1_SIUMCKA, PSB1_SIOF1_MCK, PSB0_SIUAOSLD, PSB0_SIOF1_TXD,
+	PSC15_SIUAISLD, PSC15_SIOF1_RXD, PSC14_SIUAOBT, PSC14_SIOF1_SCK,
+	PSC13_SIUAOLR, PSC13_SIOF1_SYNC, PSC12_SIUAIBT, PSC12_SIOF1_SS1,
+	PSC11_SIUAILR, PSC11_SIOF1_SS2, PSC0_NAF, PSC0_VIO,
+	PSD13_VIO, PSD13_SCIF2, PSD12_VIO, PSD12_SCIF1,
+	PSD11_VIO, PSD11_SCIF1, PSD10_VIO_D0, PSD10_LCDLCLK,
+	PSD9_SIOMCK_SIUMCKB, PSD9_SIUFCKB, PSD8_SCIF0_SCK, PSD8_TPUTO,
+	PSD7_SCIF0_RTS, PSD7_SIUAOSPD, PSD6_SCIF0_CTS, PSD6_SIUAISPD,
+	PSD5_CS6B_CE1B, PSD5_LCDCS2,
+	PSD3_LCDVEPWC_LCDVCPWC, PSD3_LCDVEPWC2_LCDVCPWC2,
+	PSD2_LCDDON, PSD2_LCDDON2, PSD0_LCDD19_LCDD0, PSD0_DV,
+	PSE15_SIOF0_MCK_IRQ3, PSE15_SIM_D,
+	PSE14_SIOF0_TXD_IRDA_OUT, PSE14_SIM_CLK,
+	PSE13_SIOF0_RXD_IRDA_IN, PSE13_TS_SDAT, PSE12_LCDVSYN2, PSE12_DACK,
+	PSE11_SIUMCKA_SIOF1_MCK, PSE11_SIUFCKA,
+	PSE3_FLCTL, PSE3_VIO, PSE2_NAF2, PSE2_VIO_D10,
+	PSE1_NAF1, PSE1_VIO_D9, PSE0_NAF0, PSE0_VIO_D8,
+
+	HIZA14_KEYSC, HIZA14_HIZ,
+	HIZA10_NAF, HIZA10_HIZ,
+	HIZA9_VIO, HIZA9_HIZ,
+	HIZA8_LCDC, HIZA8_HIZ,
+	HIZA7_LCDC, HIZA7_HIZ,
+	HIZA6_LCDC, HIZA6_HIZ,
+	HIZB1_VIO, HIZB1_HIZ,
+	HIZB0_VIO, HIZB0_HIZ,
+	HIZC15_IRQ7, HIZC15_HIZ,
+	HIZC14_IRQ6, HIZC14_HIZ,
+	HIZC13_IRQ5, HIZC13_HIZ,
+	HIZC12_IRQ4, HIZC12_HIZ,
+	HIZC11_IRQ3, HIZC11_HIZ,
+	HIZC10_IRQ2, HIZC10_HIZ,
+	HIZC9_IRQ1, HIZC9_HIZ,
+	HIZC8_IRQ0, HIZC8_HIZ,
+	MSELB9_VIO, MSELB9_VIO2,
+	MSELB8_RGB, MSELB8_SYS,
+	PINMUX_FUNCTION_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+	/* PTA */
+	PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_IN_PD, PTA7_OUT),
+	PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_IN_PD),
+	PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_IN_PD, PTA5_OUT),
+	PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_IN_PD),
+	PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_IN_PD),
+	PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_IN_PD),
+	PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_IN_PD),
+	PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_IN_PD),
+
+	/* PTB */
+	PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT),
+	PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT),
+	PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT),
+	PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT),
+	PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT),
+	PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT),
+	PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT),
+	PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT),
+
+	/* PTC */
+	PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_IN_PU),
+	PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_IN_PU),
+	PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT),
+	PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT),
+	PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT),
+	PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT),
+
+	/* PTD */
+	PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_IN_PU),
+	PINMUX_DATA(PTD6_DATA, PTD6_OUT, PTD6_IN, PTD6_IN_PU),
+	PINMUX_DATA(PTD5_DATA, PTD5_OUT, PTD5_IN, PTD5_IN_PU),
+	PINMUX_DATA(PTD4_DATA, PTD4_OUT, PTD4_IN, PTD4_IN_PU),
+	PINMUX_DATA(PTD3_DATA, PTD3_OUT, PTD3_IN, PTD3_IN_PU),
+	PINMUX_DATA(PTD2_DATA, PTD2_OUT, PTD2_IN, PTD2_IN_PU),
+	PINMUX_DATA(PTD1_DATA, PTD1_OUT, PTD1_IN, PTD1_IN_PU),
+	PINMUX_DATA(PTD0_DATA, PTD0_OUT),
+
+	/* PTE */
+	PINMUX_DATA(PTE7_DATA, PTE7_OUT, PTE7_IN, PTE7_IN_PD),
+	PINMUX_DATA(PTE6_DATA, PTE6_OUT, PTE6_IN, PTE6_IN_PD),
+	PINMUX_DATA(PTE5_DATA, PTE5_OUT, PTE5_IN, PTE5_IN_PD),
+	PINMUX_DATA(PTE4_DATA, PTE4_OUT, PTE4_IN, PTE4_IN_PD),
+	PINMUX_DATA(PTE1_DATA, PTE1_OUT, PTE1_IN, PTE1_IN_PD),
+	PINMUX_DATA(PTE0_DATA, PTE0_OUT, PTE0_IN, PTE0_IN_PD),
+
+	/* PTF */
+	PINMUX_DATA(PTF6_DATA, PTF6_OUT, PTF6_IN, PTF6_IN_PD),
+	PINMUX_DATA(PTF5_DATA, PTF5_OUT, PTF5_IN, PTF5_IN_PD),
+	PINMUX_DATA(PTF4_DATA, PTF4_OUT, PTF4_IN, PTF4_IN_PD),
+	PINMUX_DATA(PTF3_DATA, PTF3_OUT, PTF3_IN, PTF3_IN_PD),
+	PINMUX_DATA(PTF2_DATA, PTF2_OUT, PTF2_IN, PTF2_IN_PD),
+	PINMUX_DATA(PTF1_DATA, PTF1_IN, PTF1_IN_PD),
+	PINMUX_DATA(PTF0_DATA, PTF0_OUT),
+
+	/* PTG */
+	PINMUX_DATA(PTG4_DATA, PTG4_OUT),
+	PINMUX_DATA(PTG3_DATA, PTG3_OUT),
+	PINMUX_DATA(PTG2_DATA, PTG2_OUT),
+	PINMUX_DATA(PTG1_DATA, PTG1_OUT),
+	PINMUX_DATA(PTG0_DATA, PTG0_OUT),
+
+	/* PTH */
+	PINMUX_DATA(PTH7_DATA, PTH7_OUT),
+	PINMUX_DATA(PTH6_DATA, PTH6_OUT, PTH6_IN, PTH6_IN_PD),
+	PINMUX_DATA(PTH5_DATA, PTH5_OUT, PTH5_IN, PTH5_IN_PD),
+	PINMUX_DATA(PTH4_DATA, PTH4_OUT),
+	PINMUX_DATA(PTH3_DATA, PTH3_OUT),
+	PINMUX_DATA(PTH2_DATA, PTH2_OUT),
+	PINMUX_DATA(PTH1_DATA, PTH1_OUT, PTH1_IN, PTH1_IN_PD),
+	PINMUX_DATA(PTH0_DATA, PTH0_OUT, PTH0_IN, PTH0_IN_PD),
+
+	/* PTJ */
+	PINMUX_DATA(PTJ7_DATA, PTJ7_OUT),
+	PINMUX_DATA(PTJ6_DATA, PTJ6_OUT),
+	PINMUX_DATA(PTJ5_DATA, PTJ5_OUT),
+	PINMUX_DATA(PTJ1_DATA, PTJ1_OUT, PTJ1_IN, PTJ1_IN_PU),
+	PINMUX_DATA(PTJ0_DATA, PTJ0_OUT, PTJ0_IN, PTJ0_IN_PU),
+
+	/* PTK */
+	PINMUX_DATA(PTK6_DATA, PTK6_OUT, PTK6_IN, PTK6_IN_PD),
+	PINMUX_DATA(PTK5_DATA, PTK5_OUT, PTK5_IN, PTK5_IN_PD),
+	PINMUX_DATA(PTK4_DATA, PTK4_OUT, PTK4_IN, PTK4_IN_PD),
+	PINMUX_DATA(PTK3_DATA, PTK3_OUT, PTK3_IN, PTK3_IN_PD),
+	PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_IN_PD),
+	PINMUX_DATA(PTK1_DATA, PTK1_OUT),
+	PINMUX_DATA(PTK0_DATA, PTK0_OUT, PTK0_IN, PTK0_IN_PD),
+
+	/* PTL */
+	PINMUX_DATA(PTL7_DATA, PTL7_OUT, PTL7_IN, PTL7_IN_PD),
+	PINMUX_DATA(PTL6_DATA, PTL6_OUT, PTL6_IN, PTL6_IN_PD),
+	PINMUX_DATA(PTL5_DATA, PTL5_OUT, PTL5_IN, PTL5_IN_PD),
+	PINMUX_DATA(PTL4_DATA, PTL4_OUT, PTL4_IN, PTL4_IN_PD),
+	PINMUX_DATA(PTL3_DATA, PTL3_OUT, PTL3_IN, PTL3_IN_PD),
+	PINMUX_DATA(PTL2_DATA, PTL2_OUT, PTL2_IN, PTL2_IN_PD),
+	PINMUX_DATA(PTL1_DATA, PTL1_OUT, PTL1_IN, PTL1_IN_PD),
+	PINMUX_DATA(PTL0_DATA, PTL0_OUT, PTL0_IN, PTL0_IN_PD),
+
+	/* PTM */
+	PINMUX_DATA(PTM7_DATA, PTM7_OUT, PTM7_IN, PTM7_IN_PD),
+	PINMUX_DATA(PTM6_DATA, PTM6_OUT, PTM6_IN, PTM6_IN_PD),
+	PINMUX_DATA(PTM5_DATA, PTM5_OUT, PTM5_IN, PTM5_IN_PD),
+	PINMUX_DATA(PTM4_DATA, PTM4_OUT, PTM4_IN, PTM4_IN_PD),
+	PINMUX_DATA(PTM3_DATA, PTM3_OUT, PTM3_IN, PTM3_IN_PD),
+	PINMUX_DATA(PTM2_DATA, PTM2_OUT, PTM2_IN, PTM2_IN_PD),
+	PINMUX_DATA(PTM1_DATA, PTM1_OUT, PTM1_IN, PTM1_IN_PD),
+	PINMUX_DATA(PTM0_DATA, PTM0_OUT, PTM0_IN, PTM0_IN_PD),
+
+	/* PTN */
+	PINMUX_DATA(PTN7_DATA, PTN7_OUT, PTN7_IN),
+	PINMUX_DATA(PTN6_DATA, PTN6_OUT, PTN6_IN),
+	PINMUX_DATA(PTN5_DATA, PTN5_OUT, PTN5_IN),
+	PINMUX_DATA(PTN4_DATA, PTN4_OUT, PTN4_IN),
+	PINMUX_DATA(PTN3_DATA, PTN3_OUT, PTN3_IN),
+	PINMUX_DATA(PTN2_DATA, PTN2_OUT, PTN2_IN),
+	PINMUX_DATA(PTN1_DATA, PTN1_OUT, PTN1_IN),
+	PINMUX_DATA(PTN0_DATA, PTN0_OUT, PTN0_IN),
+
+	/* PTQ */
+	PINMUX_DATA(PTQ6_DATA, PTQ6_OUT),
+	PINMUX_DATA(PTQ5_DATA, PTQ5_OUT, PTQ5_IN, PTQ5_IN_PD),
+	PINMUX_DATA(PTQ4_DATA, PTQ4_OUT, PTQ4_IN, PTQ4_IN_PD),
+	PINMUX_DATA(PTQ3_DATA, PTQ3_OUT, PTQ3_IN, PTQ3_IN_PD),
+	PINMUX_DATA(PTQ2_DATA, PTQ2_IN, PTQ2_IN_PD),
+	PINMUX_DATA(PTQ1_DATA, PTQ1_OUT),
+	PINMUX_DATA(PTQ0_DATA, PTQ0_OUT, PTQ0_IN, PTQ0_IN_PU),
+
+	/* PTR */
+	PINMUX_DATA(PTR4_DATA, PTR4_OUT),
+	PINMUX_DATA(PTR3_DATA, PTR3_OUT),
+	PINMUX_DATA(PTR2_DATA, PTR2_IN, PTR2_IN_PU),
+	PINMUX_DATA(PTR1_DATA, PTR1_OUT),
+	PINMUX_DATA(PTR0_DATA, PTR0_OUT),
+
+	/* PTS */
+	PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_IN_PD),
+	PINMUX_DATA(PTS3_DATA, PTS3_OUT),
+	PINMUX_DATA(PTS2_DATA, PTS2_OUT, PTS2_IN, PTS2_IN_PD),
+	PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_IN_PD),
+	PINMUX_DATA(PTS0_DATA, PTS0_OUT),
+
+	/* PTT */
+	PINMUX_DATA(PTT4_DATA, PTT4_OUT, PTT4_IN, PTT4_IN_PD),
+	PINMUX_DATA(PTT3_DATA, PTT3_OUT, PTT3_IN, PTT3_IN_PD),
+	PINMUX_DATA(PTT2_DATA, PTT2_OUT, PTT2_IN, PTT2_IN_PD),
+	PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_IN_PD),
+	PINMUX_DATA(PTT0_DATA, PTT0_OUT),
+
+	/* PTU */
+	PINMUX_DATA(PTU4_DATA, PTU4_OUT, PTU4_IN, PTU4_IN_PD),
+	PINMUX_DATA(PTU3_DATA, PTU3_OUT, PTU3_IN, PTU3_IN_PD),
+	PINMUX_DATA(PTU2_DATA, PTU2_OUT, PTU2_IN, PTU2_IN_PD),
+	PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_IN_PD),
+	PINMUX_DATA(PTU0_DATA, PTU0_OUT, PTU0_IN, PTU0_IN_PD),
+
+	/* PTV */
+	PINMUX_DATA(PTV4_DATA, PTV4_OUT, PTV4_IN, PTV4_IN_PD),
+	PINMUX_DATA(PTV3_DATA, PTV3_OUT, PTV3_IN, PTV3_IN_PD),
+	PINMUX_DATA(PTV2_DATA, PTV2_OUT, PTV2_IN, PTV2_IN_PD),
+	PINMUX_DATA(PTV1_DATA, PTV1_OUT, PTV1_IN, PTV1_IN_PD),
+	PINMUX_DATA(PTV0_DATA, PTV0_OUT, PTV0_IN, PTV0_IN_PD),
+
+	/* PTW */
+	PINMUX_DATA(PTW6_DATA, PTW6_IN, PTW6_IN_PD),
+	PINMUX_DATA(PTW5_DATA, PTW5_OUT),
+	PINMUX_DATA(PTW4_DATA, PTW4_OUT, PTW4_IN, PTW4_IN_PD),
+	PINMUX_DATA(PTW3_DATA, PTW3_OUT, PTW3_IN, PTW3_IN_PD),
+	PINMUX_DATA(PTW2_DATA, PTW2_OUT, PTW2_IN, PTW2_IN_PD),
+	PINMUX_DATA(PTW1_DATA, PTW1_OUT, PTW1_IN, PTW1_IN_PD),
+	PINMUX_DATA(PTW0_DATA, PTW0_OUT, PTW0_IN, PTW0_IN_PD),
+
+	/* PTX */
+	PINMUX_DATA(PTX6_DATA, PTX6_OUT, PTX6_IN, PTX6_IN_PD),
+	PINMUX_DATA(PTX5_DATA, PTX5_OUT, PTX5_IN, PTX5_IN_PD),
+	PINMUX_DATA(PTX4_DATA, PTX4_OUT, PTX4_IN, PTX4_IN_PD),
+	PINMUX_DATA(PTX3_DATA, PTX3_OUT, PTX3_IN, PTX3_IN_PD),
+	PINMUX_DATA(PTX2_DATA, PTX2_OUT, PTX2_IN, PTX2_IN_PD),
+	PINMUX_DATA(PTX1_DATA, PTX1_OUT, PTX1_IN, PTX1_IN_PD),
+	PINMUX_DATA(PTX0_DATA, PTX0_OUT, PTX0_IN, PTX0_IN_PD),
+
+	/* PTY */
+	PINMUX_DATA(PTY5_DATA, PTY5_OUT, PTY5_IN, PTY5_IN_PU),
+	PINMUX_DATA(PTY4_DATA, PTY4_OUT, PTY4_IN, PTY4_IN_PU),
+	PINMUX_DATA(PTY3_DATA, PTY3_OUT, PTY3_IN, PTY3_IN_PU),
+	PINMUX_DATA(PTY2_DATA, PTY2_OUT, PTY2_IN, PTY2_IN_PU),
+	PINMUX_DATA(PTY1_DATA, PTY1_OUT),
+	PINMUX_DATA(PTY0_DATA, PTY0_OUT, PTY0_IN, PTY0_IN_PU),
+
+	/* PTZ */
+	PINMUX_DATA(PTZ5_DATA, PTZ5_IN, PTZ5_IN_PU),
+	PINMUX_DATA(PTZ4_DATA, PTZ4_IN, PTZ4_IN_PU),
+	PINMUX_DATA(PTZ3_DATA, PTZ3_IN, PTZ3_IN_PU),
+	PINMUX_DATA(PTZ2_DATA, PTZ2_IN, PTZ2_IN_PU),
+	PINMUX_DATA(PTZ1_DATA, PTZ1_IN, PTZ1_IN_PU),
+
+	/* SCIF0 */
+	PINMUX_DATA(SCIF0_TXD_MARK, SCIF0_TXD),
+	PINMUX_DATA(SCIF0_RXD_MARK, SCIF0_RXD),
+	PINMUX_DATA(SCIF0_RTS_MARK, PSD7_SCIF0_RTS, SCIF0_RTS_SIUAOSPD),
+	PINMUX_DATA(SCIF0_CTS_MARK, PSD6_SCIF0_CTS, SCIF0_CTS_SIUAISPD),
+	PINMUX_DATA(SCIF0_SCK_MARK, PSD8_SCIF0_SCK, SCIF0_SCK_TPUTO),
+
+	/* SCIF1 */
+	PINMUX_DATA(SCIF1_TXD_MARK, PSD11_SCIF1, VIO_D5_SCIF1_TXD),
+	PINMUX_DATA(SCIF1_RXD_MARK, PSD11_SCIF1, VIO_D6_SCIF1_RXD),
+	PINMUX_DATA(SCIF1_RTS_MARK, PSD12_SCIF1, VIO_CLK_SCIF1_RTS),
+	PINMUX_DATA(SCIF1_CTS_MARK, PSD12_SCIF1, VIO_VD_SCIF1_CTS),
+	PINMUX_DATA(SCIF1_SCK_MARK, PSD11_SCIF1, VIO_D7_SCIF1_SCK),
+
+	/* SCIF2 */
+	PINMUX_DATA(SCIF2_TXD_MARK, PSD13_SCIF2, VIO_STEM_SCIF2_TXD),
+	PINMUX_DATA(SCIF2_RXD_MARK, PSD13_SCIF2, VIO_HD_SCIF2_RXD),
+	PINMUX_DATA(SCIF2_RTS_MARK, PSD13_SCIF2, VIO_CKO_SCIF2_RTS),
+	PINMUX_DATA(SCIF2_CTS_MARK, PSD13_SCIF2, VIO_FLD_SCIF2_CTS),
+	PINMUX_DATA(SCIF2_SCK_MARK, PSD13_SCIF2, VIO_STEX_SCIF2_SCK),
+
+	/* SIO */
+	PINMUX_DATA(SIOTXD_MARK, PSB15_SIOTXD, SIOTXD_SIUBOSLD),
+	PINMUX_DATA(SIORXD_MARK, PSB14_SIORXD, SIORXD_SIUBISLD),
+	PINMUX_DATA(SIOD_MARK, PSB13_SIOD, SIOD_SIUBILR),
+	PINMUX_DATA(SIOSTRB0_MARK, PSB12_SIOSTRB0, SIOSTRB0_SIUBIBT),
+	PINMUX_DATA(SIOSTRB1_MARK, PSB11_SIOSTRB1, SIOSTRB1_SIUBOLR),
+	PINMUX_DATA(SIOSCK_MARK, PSB10_SIOSCK, SIOSCK_SIUBOBT),
+	PINMUX_DATA(SIOMCK_MARK, PSD9_SIOMCK_SIUMCKB, PSB9_SIOMCK, PTF6),
+
+	/* CEU */
+	PINMUX_DATA(VIO_D15_MARK, PSC0_VIO, HIZA10_NAF, NAF7_VIO_D15),
+	PINMUX_DATA(VIO_D14_MARK, PSC0_VIO, HIZA10_NAF, NAF6_VIO_D14),
+	PINMUX_DATA(VIO_D13_MARK, PSC0_VIO, HIZA10_NAF, NAF5_VIO_D13),
+	PINMUX_DATA(VIO_D12_MARK, PSC0_VIO, HIZA10_NAF, NAF4_VIO_D12),
+	PINMUX_DATA(VIO_D11_MARK, PSC0_VIO, HIZA10_NAF, NAF3_VIO_D11),
+	PINMUX_DATA(VIO_D10_MARK, PSE2_VIO_D10, HIZB0_VIO, NAF2_VIO_D10),
+	PINMUX_DATA(VIO_D9_MARK, PSE1_VIO_D9, HIZB0_VIO, NAF1_VIO_D9),
+	PINMUX_DATA(VIO_D8_MARK, PSE0_VIO_D8, HIZB0_VIO, NAF0_VIO_D8),
+	PINMUX_DATA(VIO_D7_MARK, PSD11_VIO, VIO_D7_SCIF1_SCK),
+	PINMUX_DATA(VIO_D6_MARK, PSD11_VIO, VIO_D6_SCIF1_RXD),
+	PINMUX_DATA(VIO_D5_MARK, PSD11_VIO, VIO_D5_SCIF1_TXD),
+	PINMUX_DATA(VIO_D4_MARK, VIO_D4),
+	PINMUX_DATA(VIO_D3_MARK, VIO_D3),
+	PINMUX_DATA(VIO_D2_MARK, VIO_D2),
+	PINMUX_DATA(VIO_D1_MARK, VIO_D1),
+	PINMUX_DATA(VIO_D0_MARK, PSD10_VIO_D0, VIO_D0_LCDLCLK),
+	PINMUX_DATA(VIO_CLK_MARK, PSD12_VIO, MSELB9_VIO, VIO_CLK_SCIF1_RTS),
+	PINMUX_DATA(VIO_VD_MARK, PSD12_VIO, MSELB9_VIO, VIO_VD_SCIF1_CTS),
+	PINMUX_DATA(VIO_HD_MARK, PSD13_VIO, MSELB9_VIO, VIO_HD_SCIF2_RXD),
+	PINMUX_DATA(VIO_FLD_MARK, PSD13_VIO, HIZA9_VIO, VIO_FLD_SCIF2_CTS),
+	PINMUX_DATA(VIO_CKO_MARK, PSD13_VIO, HIZA9_VIO, VIO_CKO_SCIF2_RTS),
+	PINMUX_DATA(VIO_STEX_MARK, PSD13_VIO, HIZA9_VIO, VIO_STEX_SCIF2_SCK),
+	PINMUX_DATA(VIO_STEM_MARK, PSD13_VIO, HIZA9_VIO, VIO_STEM_SCIF2_TXD),
+	PINMUX_DATA(VIO_VD2_MARK, PSE3_VIO, MSELB9_VIO2,
+		    HIZB0_VIO, FOE_VIO_VD2),
+	PINMUX_DATA(VIO_HD2_MARK, PSE3_VIO, MSELB9_VIO2,
+		    HIZB1_VIO, HIZB1_VIO, FCE_VIO_HD2),
+	PINMUX_DATA(VIO_CLK2_MARK, PSE3_VIO, MSELB9_VIO2,
+		    HIZB1_VIO, FRB_VIO_CLK2),
+
+	/* LCDC */
+	PINMUX_DATA(LCDD23_MARK, HIZA8_LCDC, LCDD23),
+	PINMUX_DATA(LCDD22_MARK, HIZA8_LCDC, LCDD22),
+	PINMUX_DATA(LCDD21_MARK, HIZA8_LCDC, LCDD21),
+	PINMUX_DATA(LCDD20_MARK, HIZA8_LCDC, LCDD20),
+	PINMUX_DATA(LCDD19_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD19_DV_CLKI),
+	PINMUX_DATA(LCDD18_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD18_DV_CLK),
+	PINMUX_DATA(LCDD17_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC,
+		    LCDD17_DV_HSYNC),
+	PINMUX_DATA(LCDD16_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC,
+		    LCDD16_DV_VSYNC),
+	PINMUX_DATA(LCDD15_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD15_DV_D15),
+	PINMUX_DATA(LCDD14_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD14_DV_D14),
+	PINMUX_DATA(LCDD13_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD13_DV_D13),
+	PINMUX_DATA(LCDD12_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD12_DV_D12),
+	PINMUX_DATA(LCDD11_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD11_DV_D11),
+	PINMUX_DATA(LCDD10_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD10_DV_D10),
+	PINMUX_DATA(LCDD9_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD9_DV_D9),
+	PINMUX_DATA(LCDD8_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD8_DV_D8),
+	PINMUX_DATA(LCDD7_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD7_DV_D7),
+	PINMUX_DATA(LCDD6_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD6_DV_D6),
+	PINMUX_DATA(LCDD5_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD5_DV_D5),
+	PINMUX_DATA(LCDD4_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD4_DV_D4),
+	PINMUX_DATA(LCDD3_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD3_DV_D3),
+	PINMUX_DATA(LCDD2_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD2_DV_D2),
+	PINMUX_DATA(LCDD1_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD1_DV_D1),
+	PINMUX_DATA(LCDD0_MARK, PSD0_LCDD19_LCDD0, HIZA8_LCDC, LCDD0_DV_D0),
+	PINMUX_DATA(LCDLCLK_MARK, PSD10_LCDLCLK, VIO_D0_LCDLCLK),
+	/* Main LCD */
+	PINMUX_DATA(LCDDON_MARK, PSD2_LCDDON, HIZA7_LCDC, LCDDON_LCDDON2),
+	PINMUX_DATA(LCDVCPWC_MARK, PSD3_LCDVEPWC_LCDVCPWC,
+		    HIZA6_LCDC, LCDVCPWC_LCDVCPWC2),
+	PINMUX_DATA(LCDVEPWC_MARK, PSD3_LCDVEPWC_LCDVCPWC,
+		    HIZA6_LCDC, LCDVEPWC_LCDVEPWC2),
+	PINMUX_DATA(LCDVSYN_MARK, HIZA7_LCDC, LCDVSYN),
+	/* Main LCD - RGB Mode */
+	PINMUX_DATA(LCDDCK_MARK, MSELB8_RGB, HIZA8_LCDC, LCDDCK_LCDWR),
+	PINMUX_DATA(LCDHSYN_MARK, MSELB8_RGB, HIZA7_LCDC, LCDHSYN_LCDCS),
+	PINMUX_DATA(LCDDISP_MARK, MSELB8_RGB, HIZA7_LCDC, LCDDISP_LCDRS),
+	/* Main LCD - SYS Mode */
+	PINMUX_DATA(LCDRS_MARK, MSELB8_SYS, HIZA7_LCDC, LCDDISP_LCDRS),
+	PINMUX_DATA(LCDCS_MARK, MSELB8_SYS, HIZA7_LCDC, LCDHSYN_LCDCS),
+	PINMUX_DATA(LCDWR_MARK, MSELB8_SYS, HIZA8_LCDC, LCDDCK_LCDWR),
+	PINMUX_DATA(LCDRD_MARK, HIZA7_LCDC, LCDRD),
+	/* Sub LCD - SYS Mode */
+	PINMUX_DATA(LCDDON2_MARK, PSD2_LCDDON2, HIZA7_LCDC, LCDDON_LCDDON2),
+	PINMUX_DATA(LCDVCPWC2_MARK, PSD3_LCDVEPWC2_LCDVCPWC2,
+		    HIZA6_LCDC, LCDVCPWC_LCDVCPWC2),
+	PINMUX_DATA(LCDVEPWC2_MARK, PSD3_LCDVEPWC2_LCDVCPWC2,
+		    HIZA6_LCDC, LCDVEPWC_LCDVEPWC2),
+	PINMUX_DATA(LCDVSYN2_MARK, PSE12_LCDVSYN2, HIZA8_LCDC, LCDVSYN2_DACK),
+	PINMUX_DATA(LCDCS2_MARK, PSD5_LCDCS2, CS6B_CE1B_LCDCS2),
+
+	/* BSC */
+	PINMUX_DATA(IOIS16_MARK, IOIS16),
+	PINMUX_DATA(A25_MARK, A25),
+	PINMUX_DATA(A24_MARK, A24),
+	PINMUX_DATA(A23_MARK, A23),
+	PINMUX_DATA(A22_MARK, A22),
+	PINMUX_DATA(BS_MARK, PSA9_BS, IRQ4_BS),
+	PINMUX_DATA(CS6B_CE1B_MARK, PSD5_CS6B_CE1B, CS6B_CE1B_LCDCS2),
+	PINMUX_DATA(WAIT_MARK, WAIT),
+	PINMUX_DATA(CS6A_CE2B_MARK, CS6A_CE2B),
+
+	/* SBSC */
+	PINMUX_DATA(HPD63_MARK, HPD63),
+	PINMUX_DATA(HPD62_MARK, HPD62),
+	PINMUX_DATA(HPD61_MARK, HPD61),
+	PINMUX_DATA(HPD60_MARK, HPD60),
+	PINMUX_DATA(HPD59_MARK, HPD59),
+	PINMUX_DATA(HPD58_MARK, HPD58),
+	PINMUX_DATA(HPD57_MARK, HPD57),
+	PINMUX_DATA(HPD56_MARK, HPD56),
+	PINMUX_DATA(HPD55_MARK, HPD55),
+	PINMUX_DATA(HPD54_MARK, HPD54),
+	PINMUX_DATA(HPD53_MARK, HPD53),
+	PINMUX_DATA(HPD52_MARK, HPD52),
+	PINMUX_DATA(HPD51_MARK, HPD51),
+	PINMUX_DATA(HPD50_MARK, HPD50),
+	PINMUX_DATA(HPD49_MARK, HPD49),
+	PINMUX_DATA(HPD48_MARK, HPD48),
+	PINMUX_DATA(HPDQM7_MARK, HPDQM7),
+	PINMUX_DATA(HPDQM6_MARK, HPDQM6),
+	PINMUX_DATA(HPDQM5_MARK, HPDQM5),
+	PINMUX_DATA(HPDQM4_MARK, HPDQM4),
+
+	/* IRQ */
+	PINMUX_DATA(IRQ0_MARK, HIZC8_IRQ0, IRQ0),
+	PINMUX_DATA(IRQ1_MARK, HIZC9_IRQ1, IRQ1),
+	PINMUX_DATA(IRQ2_MARK, PSA4_IRQ2, HIZC10_IRQ2, IRQ2_SDHID2),
+	PINMUX_DATA(IRQ3_MARK, PSE15_SIOF0_MCK_IRQ3, PSB8_IRQ3,
+		    HIZC11_IRQ3, PTQ0),
+	PINMUX_DATA(IRQ4_MARK, PSA9_IRQ4, HIZC12_IRQ4, IRQ4_BS),
+	PINMUX_DATA(IRQ5_MARK, HIZC13_IRQ5, IRQ5),
+	PINMUX_DATA(IRQ6_MARK, PSA15_IRQ6, HIZC14_IRQ6, KEYIN0_IRQ6),
+	PINMUX_DATA(IRQ7_MARK, PSA14_IRQ7, HIZC15_IRQ7, KEYIN4_IRQ7),
+
+	/* SDHI */
+	PINMUX_DATA(SDHICD_MARK, SDHICD),
+	PINMUX_DATA(SDHIWP_MARK, SDHIWP),
+	PINMUX_DATA(SDHID3_MARK, SDHID3),
+	PINMUX_DATA(SDHID2_MARK, PSA4_SDHID2, IRQ2_SDHID2),
+	PINMUX_DATA(SDHID1_MARK, SDHID1),
+	PINMUX_DATA(SDHID0_MARK, SDHID0),
+	PINMUX_DATA(SDHICMD_MARK, SDHICMD),
+	PINMUX_DATA(SDHICLK_MARK, SDHICLK),
+
+	/* SIU - Port A */
+	PINMUX_DATA(SIUAOLR_MARK, PSC13_SIUAOLR, SIUAOLR_SIOF1_SYNC),
+	PINMUX_DATA(SIUAOBT_MARK, PSC14_SIUAOBT, SIUAOBT_SIOF1_SCK),
+	PINMUX_DATA(SIUAISLD_MARK, PSC15_SIUAISLD, SIUAISLD_SIOF1_RXD),
+	PINMUX_DATA(SIUAILR_MARK, PSC11_SIUAILR, SIUAILR_SIOF1_SS2),
+	PINMUX_DATA(SIUAIBT_MARK, PSC12_SIUAIBT, SIUAIBT_SIOF1_SS1),
+	PINMUX_DATA(SIUAOSLD_MARK, PSB0_SIUAOSLD, SIUAOSLD_SIOF1_TXD),
+	PINMUX_DATA(SIUMCKA_MARK, PSE11_SIUMCKA_SIOF1_MCK, PSB1_SIUMCKA, PTK0),
+	PINMUX_DATA(SIUFCKA_MARK, PSE11_SIUFCKA, PTK0),
+
+	/* SIU - Port B */
+	PINMUX_DATA(SIUBOLR_MARK, PSB11_SIUBOLR, SIOSTRB1_SIUBOLR),
+	PINMUX_DATA(SIUBOBT_MARK, PSB10_SIUBOBT, SIOSCK_SIUBOBT),
+	PINMUX_DATA(SIUBISLD_MARK, PSB14_SIUBISLD, SIORXD_SIUBISLD),
+	PINMUX_DATA(SIUBILR_MARK, PSB13_SIUBILR, SIOD_SIUBILR),
+	PINMUX_DATA(SIUBIBT_MARK, PSB12_SIUBIBT, SIOSTRB0_SIUBIBT),
+	PINMUX_DATA(SIUBOSLD_MARK, PSB15_SIUBOSLD, SIOTXD_SIUBOSLD),
+	PINMUX_DATA(SIUMCKB_MARK, PSD9_SIOMCK_SIUMCKB, PSB9_SIUMCKB, PTF6),
+	PINMUX_DATA(SIUFCKB_MARK, PSD9_SIUFCKB, PTF6),
+
+	/* AUD */
+	PINMUX_DATA(AUDSYNC_MARK, AUDSYNC),
+	PINMUX_DATA(AUDATA3_MARK, AUDATA3),
+	PINMUX_DATA(AUDATA2_MARK, AUDATA2),
+	PINMUX_DATA(AUDATA1_MARK, AUDATA1),
+	PINMUX_DATA(AUDATA0_MARK, AUDATA0),
+
+	/* DMAC */
+	PINMUX_DATA(DACK_MARK, PSE12_DACK, LCDVSYN2_DACK),
+	PINMUX_DATA(DREQ0_MARK, DREQ0),
+
+	/* VOU */
+	PINMUX_DATA(DV_CLKI_MARK, PSD0_DV, LCDD19_DV_CLKI),
+	PINMUX_DATA(DV_CLK_MARK, PSD0_DV, LCDD18_DV_CLK),
+	PINMUX_DATA(DV_HSYNC_MARK, PSD0_DV, LCDD17_DV_HSYNC),
+	PINMUX_DATA(DV_VSYNC_MARK, PSD0_DV, LCDD16_DV_VSYNC),
+	PINMUX_DATA(DV_D15_MARK, PSD0_DV, LCDD15_DV_D15),
+	PINMUX_DATA(DV_D14_MARK, PSD0_DV, LCDD14_DV_D14),
+	PINMUX_DATA(DV_D13_MARK, PSD0_DV, LCDD13_DV_D13),
+	PINMUX_DATA(DV_D12_MARK, PSD0_DV, LCDD12_DV_D12),
+	PINMUX_DATA(DV_D11_MARK, PSD0_DV, LCDD11_DV_D11),
+	PINMUX_DATA(DV_D10_MARK, PSD0_DV, LCDD10_DV_D10),
+	PINMUX_DATA(DV_D9_MARK, PSD0_DV, LCDD9_DV_D9),
+	PINMUX_DATA(DV_D8_MARK, PSD0_DV, LCDD8_DV_D8),
+	PINMUX_DATA(DV_D7_MARK, PSD0_DV, LCDD7_DV_D7),
+	PINMUX_DATA(DV_D6_MARK, PSD0_DV, LCDD6_DV_D6),
+	PINMUX_DATA(DV_D5_MARK, PSD0_DV, LCDD5_DV_D5),
+	PINMUX_DATA(DV_D4_MARK, PSD0_DV, LCDD4_DV_D4),
+	PINMUX_DATA(DV_D3_MARK, PSD0_DV, LCDD3_DV_D3),
+	PINMUX_DATA(DV_D2_MARK, PSD0_DV, LCDD2_DV_D2),
+	PINMUX_DATA(DV_D1_MARK, PSD0_DV, LCDD1_DV_D1),
+	PINMUX_DATA(DV_D0_MARK, PSD0_DV, LCDD0_DV_D0),
+
+	/* CPG */
+	PINMUX_DATA(STATUS0_MARK, STATUS0),
+	PINMUX_DATA(PDSTATUS_MARK, PDSTATUS),
+
+	/* SIOF0 */
+	PINMUX_DATA(SIOF0_MCK_MARK, PSE15_SIOF0_MCK_IRQ3, PSB8_SIOF0_MCK, PTQ0),
+	PINMUX_DATA(SIOF0_SCK_MARK, PSB5_SIOF0_SCK, SIOF0_SCK_TS_SCK),
+	PINMUX_DATA(SIOF0_SYNC_MARK, PSB4_SIOF0_SYNC, SIOF0_SYNC_TS_SDEN),
+	PINMUX_DATA(SIOF0_SS1_MARK, PSB3_SIOF0_SS1, SIOF0_SS1_TS_SPSYNC),
+	PINMUX_DATA(SIOF0_SS2_MARK, PSB2_SIOF0_SS2, SIOF0_SS2_SIM_RST),
+	PINMUX_DATA(SIOF0_TXD_MARK, PSE14_SIOF0_TXD_IRDA_OUT,
+		    PSB7_SIOF0_TXD, PTQ1),
+	PINMUX_DATA(SIOF0_RXD_MARK, PSE13_SIOF0_RXD_IRDA_IN,
+		    PSB6_SIOF0_RXD, PTQ2),
+
+	/* SIOF1 */
+	PINMUX_DATA(SIOF1_MCK_MARK, PSE11_SIUMCKA_SIOF1_MCK,
+		    PSB1_SIOF1_MCK, PTK0),
+	PINMUX_DATA(SIOF1_SCK_MARK, PSC14_SIOF1_SCK, SIUAOBT_SIOF1_SCK),
+	PINMUX_DATA(SIOF1_SYNC_MARK, PSC13_SIOF1_SYNC, SIUAOLR_SIOF1_SYNC),
+	PINMUX_DATA(SIOF1_SS1_MARK, PSC12_SIOF1_SS1, SIUAIBT_SIOF1_SS1),
+	PINMUX_DATA(SIOF1_SS2_MARK, PSC11_SIOF1_SS2, SIUAILR_SIOF1_SS2),
+	PINMUX_DATA(SIOF1_TXD_MARK, PSB0_SIOF1_TXD, SIUAOSLD_SIOF1_TXD),
+	PINMUX_DATA(SIOF1_RXD_MARK, PSC15_SIOF1_RXD, SIUAISLD_SIOF1_RXD),
+
+	/* SIM */
+	PINMUX_DATA(SIM_D_MARK, PSE15_SIM_D, PTQ0),
+	PINMUX_DATA(SIM_CLK_MARK, PSE14_SIM_CLK, PTQ1),
+	PINMUX_DATA(SIM_RST_MARK, PSB2_SIM_RST, SIOF0_SS2_SIM_RST),
+
+	/* TSIF */
+	PINMUX_DATA(TS_SDAT_MARK, PSE13_TS_SDAT, PTQ2),
+	PINMUX_DATA(TS_SCK_MARK, PSB5_TS_SCK, SIOF0_SCK_TS_SCK),
+	PINMUX_DATA(TS_SDEN_MARK, PSB4_TS_SDEN, SIOF0_SYNC_TS_SDEN),
+	PINMUX_DATA(TS_SPSYNC_MARK, PSB3_TS_SPSYNC, SIOF0_SS1_TS_SPSYNC),
+
+	/* IRDA */
+	PINMUX_DATA(IRDA_IN_MARK, PSE13_SIOF0_RXD_IRDA_IN, PSB6_IRDA_IN, PTQ2),
+	PINMUX_DATA(IRDA_OUT_MARK, PSE14_SIOF0_TXD_IRDA_OUT,
+		    PSB7_IRDA_OUT, PTQ1),
+
+	/* TPU */
+	PINMUX_DATA(TPUTO_MARK, PSD8_TPUTO, SCIF0_SCK_TPUTO),
+
+	/* FLCTL */
+	PINMUX_DATA(FCE_MARK, PSE3_FLCTL, FCE_VIO_HD2),
+	PINMUX_DATA(NAF7_MARK, PSC0_NAF, HIZA10_NAF, NAF7_VIO_D15),
+	PINMUX_DATA(NAF6_MARK, PSC0_NAF, HIZA10_NAF, NAF6_VIO_D14),
+	PINMUX_DATA(NAF5_MARK, PSC0_NAF, HIZA10_NAF, NAF5_VIO_D13),
+	PINMUX_DATA(NAF4_MARK, PSC0_NAF, HIZA10_NAF, NAF4_VIO_D12),
+	PINMUX_DATA(NAF3_MARK, PSC0_NAF, HIZA10_NAF, NAF3_VIO_D11),
+	PINMUX_DATA(NAF2_MARK, PSE2_NAF2, HIZB0_VIO, NAF2_VIO_D10),
+	PINMUX_DATA(NAF1_MARK, PSE1_NAF1, HIZB0_VIO, NAF1_VIO_D9),
+	PINMUX_DATA(NAF0_MARK, PSE0_NAF0, HIZB0_VIO, NAF0_VIO_D8),
+	PINMUX_DATA(FCDE_MARK, FCDE),
+	PINMUX_DATA(FOE_MARK, PSE3_FLCTL, HIZB0_VIO, FOE_VIO_VD2),
+	PINMUX_DATA(FSC_MARK, FSC),
+	PINMUX_DATA(FWE_MARK, FWE),
+	PINMUX_DATA(FRB_MARK, PSE3_FLCTL, FRB_VIO_CLK2),
+
+	/* KEYSC */
+	PINMUX_DATA(KEYIN0_MARK, PSA15_KEYIN0, HIZC14_IRQ6, KEYIN0_IRQ6),
+	PINMUX_DATA(KEYIN1_MARK, HIZA14_KEYSC, KEYIN1),
+	PINMUX_DATA(KEYIN2_MARK, HIZA14_KEYSC, KEYIN2),
+	PINMUX_DATA(KEYIN3_MARK, HIZA14_KEYSC, KEYIN3),
+	PINMUX_DATA(KEYIN4_MARK, PSA14_KEYIN4, HIZC15_IRQ7, KEYIN4_IRQ7),
+	PINMUX_DATA(KEYOUT0_MARK, HIZA14_KEYSC, KEYOUT0),
+	PINMUX_DATA(KEYOUT1_MARK, HIZA14_KEYSC, KEYOUT1),
+	PINMUX_DATA(KEYOUT2_MARK, HIZA14_KEYSC, KEYOUT2),
+	PINMUX_DATA(KEYOUT3_MARK, HIZA14_KEYSC, KEYOUT3),
+	PINMUX_DATA(KEYOUT4_IN6_MARK, HIZA14_KEYSC, KEYOUT4_IN6),
+	PINMUX_DATA(KEYOUT5_IN5_MARK, HIZA14_KEYSC, KEYOUT5_IN5),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+	/* PTA */
+	PINMUX_GPIO(GPIO_PTA7, PTA7_DATA),
+	PINMUX_GPIO(GPIO_PTA6, PTA6_DATA),
+	PINMUX_GPIO(GPIO_PTA5, PTA5_DATA),
+	PINMUX_GPIO(GPIO_PTA4, PTA4_DATA),
+	PINMUX_GPIO(GPIO_PTA3, PTA3_DATA),
+	PINMUX_GPIO(GPIO_PTA2, PTA2_DATA),
+	PINMUX_GPIO(GPIO_PTA1, PTA1_DATA),
+	PINMUX_GPIO(GPIO_PTA0, PTA0_DATA),
+
+	/* PTB */
+	PINMUX_GPIO(GPIO_PTB7, PTB7_DATA),
+	PINMUX_GPIO(GPIO_PTB6, PTB6_DATA),
+	PINMUX_GPIO(GPIO_PTB5, PTB5_DATA),
+	PINMUX_GPIO(GPIO_PTB4, PTB4_DATA),
+	PINMUX_GPIO(GPIO_PTB3, PTB3_DATA),
+	PINMUX_GPIO(GPIO_PTB2, PTB2_DATA),
+	PINMUX_GPIO(GPIO_PTB1, PTB1_DATA),
+	PINMUX_GPIO(GPIO_PTB0, PTB0_DATA),
+
+	/* PTC */
+	PINMUX_GPIO(GPIO_PTC7, PTC7_DATA),
+	PINMUX_GPIO(GPIO_PTC5, PTC5_DATA),
+	PINMUX_GPIO(GPIO_PTC4, PTC4_DATA),
+	PINMUX_GPIO(GPIO_PTC3, PTC3_DATA),
+	PINMUX_GPIO(GPIO_PTC2, PTC2_DATA),
+	PINMUX_GPIO(GPIO_PTC0, PTC0_DATA),
+
+	/* PTD */
+	PINMUX_GPIO(GPIO_PTD7, PTD7_DATA),
+	PINMUX_GPIO(GPIO_PTD6, PTD6_DATA),
+	PINMUX_GPIO(GPIO_PTD5, PTD5_DATA),
+	PINMUX_GPIO(GPIO_PTD4, PTD4_DATA),
+	PINMUX_GPIO(GPIO_PTD3, PTD3_DATA),
+	PINMUX_GPIO(GPIO_PTD2, PTD2_DATA),
+	PINMUX_GPIO(GPIO_PTD1, PTD1_DATA),
+	PINMUX_GPIO(GPIO_PTD0, PTD0_DATA),
+
+	/* PTE */
+	PINMUX_GPIO(GPIO_PTE7, PTE7_DATA),
+	PINMUX_GPIO(GPIO_PTE6, PTE6_DATA),
+	PINMUX_GPIO(GPIO_PTE5, PTE5_DATA),
+	PINMUX_GPIO(GPIO_PTE4, PTE4_DATA),
+	PINMUX_GPIO(GPIO_PTE1, PTE1_DATA),
+	PINMUX_GPIO(GPIO_PTE0, PTE0_DATA),
+
+	/* PTF */
+	PINMUX_GPIO(GPIO_PTF6, PTF6_DATA),
+	PINMUX_GPIO(GPIO_PTF5, PTF5_DATA),
+	PINMUX_GPIO(GPIO_PTF4, PTF4_DATA),
+	PINMUX_GPIO(GPIO_PTF3, PTF3_DATA),
+	PINMUX_GPIO(GPIO_PTF2, PTF2_DATA),
+	PINMUX_GPIO(GPIO_PTF1, PTF1_DATA),
+	PINMUX_GPIO(GPIO_PTF0, PTF0_DATA),
+
+	/* PTG */
+	PINMUX_GPIO(GPIO_PTG4, PTG4_DATA),
+	PINMUX_GPIO(GPIO_PTG3, PTG3_DATA),
+	PINMUX_GPIO(GPIO_PTG2, PTG2_DATA),
+	PINMUX_GPIO(GPIO_PTG1, PTG1_DATA),
+	PINMUX_GPIO(GPIO_PTG0, PTG0_DATA),
+
+	/* PTH */
+	PINMUX_GPIO(GPIO_PTH7, PTH7_DATA),
+	PINMUX_GPIO(GPIO_PTH6, PTH6_DATA),
+	PINMUX_GPIO(GPIO_PTH5, PTH5_DATA),
+	PINMUX_GPIO(GPIO_PTH4, PTH4_DATA),
+	PINMUX_GPIO(GPIO_PTH3, PTH3_DATA),
+	PINMUX_GPIO(GPIO_PTH2, PTH2_DATA),
+	PINMUX_GPIO(GPIO_PTH1, PTH1_DATA),
+	PINMUX_GPIO(GPIO_PTH0, PTH0_DATA),
+
+	/* PTJ */
+	PINMUX_GPIO(GPIO_PTJ7, PTJ7_DATA),
+	PINMUX_GPIO(GPIO_PTJ6, PTJ6_DATA),
+	PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA),
+	PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA),
+	PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA),
+
+	/* PTK */
+	PINMUX_GPIO(GPIO_PTK6, PTK6_DATA),
+	PINMUX_GPIO(GPIO_PTK5, PTK5_DATA),
+	PINMUX_GPIO(GPIO_PTK4, PTK4_DATA),
+	PINMUX_GPIO(GPIO_PTK3, PTK3_DATA),
+	PINMUX_GPIO(GPIO_PTK2, PTK2_DATA),
+	PINMUX_GPIO(GPIO_PTK1, PTK1_DATA),
+	PINMUX_GPIO(GPIO_PTK0, PTK0_DATA),
+
+	/* PTL */
+	PINMUX_GPIO(GPIO_PTL7, PTL7_DATA),
+	PINMUX_GPIO(GPIO_PTL6, PTL6_DATA),
+	PINMUX_GPIO(GPIO_PTL5, PTL5_DATA),
+	PINMUX_GPIO(GPIO_PTL4, PTL4_DATA),
+	PINMUX_GPIO(GPIO_PTL3, PTL3_DATA),
+	PINMUX_GPIO(GPIO_PTL2, PTL2_DATA),
+	PINMUX_GPIO(GPIO_PTL1, PTL1_DATA),
+	PINMUX_GPIO(GPIO_PTL0, PTL0_DATA),
+
+	/* PTM */
+	PINMUX_GPIO(GPIO_PTM7, PTM7_DATA),
+	PINMUX_GPIO(GPIO_PTM6, PTM6_DATA),
+	PINMUX_GPIO(GPIO_PTM5, PTM5_DATA),
+	PINMUX_GPIO(GPIO_PTM4, PTM4_DATA),
+	PINMUX_GPIO(GPIO_PTM3, PTM3_DATA),
+	PINMUX_GPIO(GPIO_PTM2, PTM2_DATA),
+	PINMUX_GPIO(GPIO_PTM1, PTM1_DATA),
+	PINMUX_GPIO(GPIO_PTM0, PTM0_DATA),
+
+	/* PTN */
+	PINMUX_GPIO(GPIO_PTN7, PTN7_DATA),
+	PINMUX_GPIO(GPIO_PTN6, PTN6_DATA),
+	PINMUX_GPIO(GPIO_PTN5, PTN5_DATA),
+	PINMUX_GPIO(GPIO_PTN4, PTN4_DATA),
+	PINMUX_GPIO(GPIO_PTN3, PTN3_DATA),
+	PINMUX_GPIO(GPIO_PTN2, PTN2_DATA),
+	PINMUX_GPIO(GPIO_PTN1, PTN1_DATA),
+	PINMUX_GPIO(GPIO_PTN0, PTN0_DATA),
+
+	/* PTQ */
+	PINMUX_GPIO(GPIO_PTQ6, PTQ6_DATA),
+	PINMUX_GPIO(GPIO_PTQ5, PTQ5_DATA),
+	PINMUX_GPIO(GPIO_PTQ4, PTQ4_DATA),
+	PINMUX_GPIO(GPIO_PTQ3, PTQ3_DATA),
+	PINMUX_GPIO(GPIO_PTQ2, PTQ2_DATA),
+	PINMUX_GPIO(GPIO_PTQ1, PTQ1_DATA),
+	PINMUX_GPIO(GPIO_PTQ0, PTQ0_DATA),
+
+	/* PTR */
+	PINMUX_GPIO(GPIO_PTR4, PTR4_DATA),
+	PINMUX_GPIO(GPIO_PTR3, PTR3_DATA),
+	PINMUX_GPIO(GPIO_PTR2, PTR2_DATA),
+	PINMUX_GPIO(GPIO_PTR1, PTR1_DATA),
+	PINMUX_GPIO(GPIO_PTR0, PTR0_DATA),
+
+	/* PTS */
+	PINMUX_GPIO(GPIO_PTS4, PTS4_DATA),
+	PINMUX_GPIO(GPIO_PTS3, PTS3_DATA),
+	PINMUX_GPIO(GPIO_PTS2, PTS2_DATA),
+	PINMUX_GPIO(GPIO_PTS1, PTS1_DATA),
+	PINMUX_GPIO(GPIO_PTS0, PTS0_DATA),
+
+	/* PTT */
+	PINMUX_GPIO(GPIO_PTT4, PTT4_DATA),
+	PINMUX_GPIO(GPIO_PTT3, PTT3_DATA),
+	PINMUX_GPIO(GPIO_PTT2, PTT2_DATA),
+	PINMUX_GPIO(GPIO_PTT1, PTT1_DATA),
+	PINMUX_GPIO(GPIO_PTT0, PTT0_DATA),
+
+	/* PTU */
+	PINMUX_GPIO(GPIO_PTU4, PTU4_DATA),
+	PINMUX_GPIO(GPIO_PTU3, PTU3_DATA),
+	PINMUX_GPIO(GPIO_PTU2, PTU2_DATA),
+	PINMUX_GPIO(GPIO_PTU1, PTU1_DATA),
+	PINMUX_GPIO(GPIO_PTU0, PTU0_DATA),
+
+	/* PTV */
+	PINMUX_GPIO(GPIO_PTV4, PTV4_DATA),
+	PINMUX_GPIO(GPIO_PTV3, PTV3_DATA),
+	PINMUX_GPIO(GPIO_PTV2, PTV2_DATA),
+	PINMUX_GPIO(GPIO_PTV1, PTV1_DATA),
+	PINMUX_GPIO(GPIO_PTV0, PTV0_DATA),
+
+	/* PTW */
+	PINMUX_GPIO(GPIO_PTW6, PTW6_DATA),
+	PINMUX_GPIO(GPIO_PTW5, PTW5_DATA),
+	PINMUX_GPIO(GPIO_PTW4, PTW4_DATA),
+	PINMUX_GPIO(GPIO_PTW3, PTW3_DATA),
+	PINMUX_GPIO(GPIO_PTW2, PTW2_DATA),
+	PINMUX_GPIO(GPIO_PTW1, PTW1_DATA),
+	PINMUX_GPIO(GPIO_PTW0, PTW0_DATA),
+
+	/* PTX */
+	PINMUX_GPIO(GPIO_PTX6, PTX6_DATA),
+	PINMUX_GPIO(GPIO_PTX5, PTX5_DATA),
+	PINMUX_GPIO(GPIO_PTX4, PTX4_DATA),
+	PINMUX_GPIO(GPIO_PTX3, PTX3_DATA),
+	PINMUX_GPIO(GPIO_PTX2, PTX2_DATA),
+	PINMUX_GPIO(GPIO_PTX1, PTX1_DATA),
+	PINMUX_GPIO(GPIO_PTX0, PTX0_DATA),
+
+	/* PTY */
+	PINMUX_GPIO(GPIO_PTY5, PTY5_DATA),
+	PINMUX_GPIO(GPIO_PTY4, PTY4_DATA),
+	PINMUX_GPIO(GPIO_PTY3, PTY3_DATA),
+	PINMUX_GPIO(GPIO_PTY2, PTY2_DATA),
+	PINMUX_GPIO(GPIO_PTY1, PTY1_DATA),
+	PINMUX_GPIO(GPIO_PTY0, PTY0_DATA),
+
+	/* PTZ */
+	PINMUX_GPIO(GPIO_PTZ5, PTZ5_DATA),
+	PINMUX_GPIO(GPIO_PTZ4, PTZ4_DATA),
+	PINMUX_GPIO(GPIO_PTZ3, PTZ3_DATA),
+	PINMUX_GPIO(GPIO_PTZ2, PTZ2_DATA),
+	PINMUX_GPIO(GPIO_PTZ1, PTZ1_DATA),
+
+	/* SCIF0 */
+	PINMUX_GPIO(GPIO_FN_SCIF0_TXD, SCIF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RXD, SCIF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RTS, SCIF0_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_CTS, SCIF0_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_SCK, SCIF0_SCK_MARK),
+
+	/* SCIF1 */
+	PINMUX_GPIO(GPIO_FN_SCIF1_TXD, SCIF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RXD, SCIF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RTS, SCIF1_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_CTS, SCIF1_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_SCK, SCIF1_SCK_MARK),
+
+	/* SCIF2 */
+	PINMUX_GPIO(GPIO_FN_SCIF2_TXD, SCIF2_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_RXD, SCIF2_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_RTS, SCIF2_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_CTS, SCIF2_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_SCK, SCIF2_SCK_MARK),
+
+	/* SIO */
+	PINMUX_GPIO(GPIO_FN_SIOTXD, SIOTXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIORXD, SIORXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOD, SIOD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOSTRB0, SIOSTRB0_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOSTRB1, SIOSTRB1_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOSCK, SIOSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOMCK, SIOMCK_MARK),
+
+	/* CEU */
+	PINMUX_GPIO(GPIO_FN_VIO_D15, VIO_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D14, VIO_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D13, VIO_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D12, VIO_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D11, VIO_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D10, VIO_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D9, VIO_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D8, VIO_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D7, VIO_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D6, VIO_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D5, VIO_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D4, VIO_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D3, VIO_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D2, VIO_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D1, VIO_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D0, VIO_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CLK, VIO_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_VD, VIO_VD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_HD, VIO_HD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_FLD, VIO_FLD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CKO, VIO_CKO_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_STEX, VIO_STEX_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_STEM, VIO_STEM_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_VD2, VIO_VD2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_HD2, VIO_HD2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CLK2, VIO_CLK2_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCDD23, LCDD23_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD22, LCDD22_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD21, LCDD21_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD20, LCDD20_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD19, LCDD19_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD18, LCDD18_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD17, LCDD17_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD16, LCDD16_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD15, LCDD15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD14, LCDD14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD13, LCDD13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD12, LCDD12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD11, LCDD11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD10, LCDD10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD9, LCDD9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD8, LCDD8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD7, LCDD7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD6, LCDD6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD5, LCDD5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD4, LCDD4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD3, LCDD3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD2, LCDD2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD1, LCDD1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD0, LCDD0_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDLCLK, LCDLCLK_MARK),
+	/* Main LCD */
+	PINMUX_GPIO(GPIO_FN_LCDDON, LCDDON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVCPWC, LCDVCPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVEPWC, LCDVEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVSYN, LCDVSYN_MARK),
+	/* Main LCD - RGB Mode */
+	PINMUX_GPIO(GPIO_FN_LCDDCK, LCDDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDHSYN, LCDHSYN_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDISP, LCDDISP_MARK),
+	/* Main LCD - SYS Mode */
+	PINMUX_GPIO(GPIO_FN_LCDRS, LCDRS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDCS, LCDCS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDWR, LCDWR_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDRD, LCDRD_MARK),
+	/* Sub LCD - SYS Mode */
+	PINMUX_GPIO(GPIO_FN_LCDDON2, LCDDON2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVCPWC2, LCDVCPWC2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVEPWC2, LCDVEPWC2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVSYN2, LCDVSYN2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDCS2, LCDCS2_MARK),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_A25, A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24, A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23, A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22, A22_MARK),
+	PINMUX_GPIO(GPIO_FN_BS, BS_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6B_CE1B, CS6B_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_WAIT, WAIT_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6A_CE2B, CS6A_CE2B_MARK),
+
+	/* SBSC */
+	PINMUX_GPIO(GPIO_FN_HPD63, HPD63_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD62, HPD62_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD61, HPD61_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD60, HPD60_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD59, HPD59_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD58, HPD58_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD57, HPD57_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD56, HPD56_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD55, HPD55_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD54, HPD54_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD53, HPD53_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD52, HPD52_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD51, HPD51_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD50, HPD50_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD49, HPD49_MARK),
+	PINMUX_GPIO(GPIO_FN_HPD48, HPD48_MARK),
+	PINMUX_GPIO(GPIO_FN_HPDQM7, HPDQM7_MARK),
+	PINMUX_GPIO(GPIO_FN_HPDQM6, HPDQM6_MARK),
+	PINMUX_GPIO(GPIO_FN_HPDQM5, HPDQM5_MARK),
+	PINMUX_GPIO(GPIO_FN_HPDQM4, HPDQM4_MARK),
+
+	/* IRQ */
+	PINMUX_GPIO(GPIO_FN_IRQ0, IRQ0_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1, IRQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2, IRQ2_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3, IRQ3_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4, IRQ4_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5, IRQ5_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6, IRQ6_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7, IRQ7_MARK),
+
+	/* SDHI */
+	PINMUX_GPIO(GPIO_FN_SDHICD, SDHICD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHIWP, SDHIWP_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHID3, SDHID3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHID2, SDHID2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHID1, SDHID1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHID0, SDHID0_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHICMD, SDHICMD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHICLK, SDHICLK_MARK),
+
+	/* SIU - Port A */
+	PINMUX_GPIO(GPIO_FN_SIUAOLR, SIUAOLR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOBT, SIUAOBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAISLD, SIUAISLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAILR, SIUAILR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAIBT, SIUAIBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOSLD, SIUAOSLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUMCKA, SIUMCKA_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUFCKA, SIUFCKA_MARK),
+
+	/* SIU - Port B */
+	PINMUX_GPIO(GPIO_FN_SIUBOLR, SIUBOLR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOBT, SIUBOBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBISLD, SIUBISLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBILR, SIUBILR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBIBT, SIUBIBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOSLD, SIUBOSLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUMCKB, SIUMCKB_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUFCKB, SIUFCKB_MARK),
+
+	/* AUD */
+	PINMUX_GPIO(GPIO_FN_AUDSYNC, AUDSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA3, AUDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA2, AUDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA1, AUDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA0, AUDATA0_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_DACK, DACK_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0, DREQ0_MARK),
+
+	/* VOU */
+	PINMUX_GPIO(GPIO_FN_DV_CLKI, DV_CLKI_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_CLK, DV_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_HSYNC, DV_HSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_VSYNC, DV_VSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D15, DV_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D14, DV_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D13, DV_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D12, DV_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D11, DV_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D10, DV_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D9, DV_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D8, DV_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D7, DV_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D6, DV_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D5, DV_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D4, DV_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D3, DV_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D2, DV_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D1, DV_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D0, DV_D0_MARK),
+
+	/* CPG */
+	PINMUX_GPIO(GPIO_FN_STATUS0, STATUS0_MARK),
+	PINMUX_GPIO(GPIO_FN_PDSTATUS, PDSTATUS_MARK),
+
+	/* SIOF0 */
+	PINMUX_GPIO(GPIO_FN_SIOF0_MCK, SIOF0_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SCK, SIOF0_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SYNC, SIOF0_SYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SS1, SIOF0_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_SS2, SIOF0_SS2_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_TXD, SIOF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF0_RXD, SIOF0_RXD_MARK),
+
+	/* SIOF1 */
+	PINMUX_GPIO(GPIO_FN_SIOF1_MCK, SIOF1_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SCK, SIOF1_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SYNC, SIOF1_SYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SS1, SIOF1_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_SS2, SIOF1_SS2_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_TXD, SIOF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIOF1_RXD, SIOF1_RXD_MARK),
+
+	/* SIM */
+	PINMUX_GPIO(GPIO_FN_SIM_D, SIM_D_MARK),
+	PINMUX_GPIO(GPIO_FN_SIM_CLK, SIM_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIM_RST, SIM_RST_MARK),
+
+	/* TSIF */
+	PINMUX_GPIO(GPIO_FN_TS_SDAT, TS_SDAT_MARK),
+	PINMUX_GPIO(GPIO_FN_TS_SCK, TS_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_TS_SDEN, TS_SDEN_MARK),
+	PINMUX_GPIO(GPIO_FN_TS_SPSYNC, TS_SPSYNC_MARK),
+
+	/* IRDA */
+	PINMUX_GPIO(GPIO_FN_IRDA_IN, IRDA_IN_MARK),
+	PINMUX_GPIO(GPIO_FN_IRDA_OUT, IRDA_OUT_MARK),
+
+	/* TPU */
+	PINMUX_GPIO(GPIO_FN_TPUTO, TPUTO_MARK),
+
+	/* FLCTL */
+	PINMUX_GPIO(GPIO_FN_FCE, FCE_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF7, NAF7_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF6, NAF6_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF5, NAF5_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF4, NAF4_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF3, NAF3_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF2, NAF2_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF1, NAF1_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF0, NAF0_MARK),
+	PINMUX_GPIO(GPIO_FN_FCDE, FCDE_MARK),
+	PINMUX_GPIO(GPIO_FN_FOE, FOE_MARK),
+	PINMUX_GPIO(GPIO_FN_FSC, FSC_MARK),
+	PINMUX_GPIO(GPIO_FN_FWE, FWE_MARK),
+	PINMUX_GPIO(GPIO_FN_FRB, FRB_MARK),
+
+	/* KEYSC */
+	PINMUX_GPIO(GPIO_FN_KEYIN0, KEYIN0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN1, KEYIN1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN2, KEYIN2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN3, KEYIN3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN4, KEYIN4_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT0, KEYOUT0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT1, KEYOUT1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT2, KEYOUT2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT3, KEYOUT3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT4_IN6, KEYOUT4_IN6_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT5_IN5, KEYOUT5_IN5_MARK),
+};
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) {
+		VIO_D7_SCIF1_SCK, PTA7_OUT, PTA7_IN_PD, PTA7_IN,
+		VIO_D6_SCIF1_RXD, 0, PTA6_IN_PD, PTA6_IN,
+		VIO_D5_SCIF1_TXD, PTA5_OUT, PTA5_IN_PD, PTA5_IN,
+		VIO_D4, 0, PTA4_IN_PD, PTA4_IN,
+		VIO_D3, 0, PTA3_IN_PD, PTA3_IN,
+		VIO_D2, 0, PTA2_IN_PD, PTA2_IN,
+		VIO_D1, 0, PTA1_IN_PD, PTA1_IN,
+		VIO_D0_LCDLCLK, 0, PTA0_IN_PD, PTA0_IN }
+	},
+	{ PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) {
+		HPD55, PTB7_OUT, 0, PTB7_IN,
+		HPD54, PTB6_OUT, 0, PTB6_IN,
+		HPD53, PTB5_OUT, 0, PTB5_IN,
+		HPD52, PTB4_OUT, 0, PTB4_IN,
+		HPD51, PTB3_OUT, 0, PTB3_IN,
+		HPD50, PTB2_OUT, 0, PTB2_IN,
+		HPD49, PTB1_OUT, 0, PTB1_IN,
+		HPD48, PTB0_OUT, 0, PTB0_IN }
+	},
+	{ PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) {
+		0, 0, PTC7_IN_PU, PTC7_IN,
+		0, 0, 0, 0,
+		IOIS16, 0, PTC5_IN_PU, PTC5_IN,
+		HPDQM7, PTC4_OUT, 0, PTC4_IN,
+		HPDQM6, PTC3_OUT, 0, PTC3_IN,
+		HPDQM5, PTC2_OUT, 0, PTC2_IN,
+		0, 0, 0, 0,
+		HPDQM4, PTC0_OUT, 0, PTC0_IN }
+	},
+	{ PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) {
+		SDHICD, 0, PTD7_IN_PU, PTD7_IN,
+		SDHIWP, PTD6_OUT, PTD6_IN_PU, PTD6_IN,
+		SDHID3, PTD5_OUT, PTD5_IN_PU, PTD5_IN,
+		IRQ2_SDHID2, PTD4_OUT, PTD4_IN_PU, PTD4_IN,
+		SDHID1, PTD3_OUT, PTD3_IN_PU, PTD3_IN,
+		SDHID0, PTD2_OUT, PTD2_IN_PU, PTD2_IN,
+		SDHICMD, PTD1_OUT, PTD1_IN_PU, PTD1_IN,
+		SDHICLK, PTD0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) {
+		A25, PTE7_OUT, PTE7_IN_PD, PTE7_IN,
+		A24, PTE6_OUT, PTE6_IN_PD, PTE6_IN,
+		A23, PTE5_OUT, PTE5_IN_PD, PTE5_IN,
+		A22, PTE4_OUT, PTE4_IN_PD, PTE4_IN,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		IRQ5, PTE1_OUT, PTE1_IN_PD, PTE1_IN,
+		IRQ4_BS, PTE0_OUT, PTE0_IN_PD, PTE0_IN }
+	},
+	{ PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) {
+		0, 0, 0, 0,
+		PTF6, PTF6_OUT, PTF6_IN_PD, PTF6_IN,
+		SIOSCK_SIUBOBT, PTF5_OUT, PTF5_IN_PD, PTF5_IN,
+		SIOSTRB1_SIUBOLR, PTF4_OUT, PTF4_IN_PD, PTF4_IN,
+		SIOSTRB0_SIUBIBT, PTF3_OUT, PTF3_IN_PD, PTF3_IN,
+		SIOD_SIUBILR, PTF2_OUT, PTF2_IN_PD, PTF2_IN,
+		SIORXD_SIUBISLD, 0, PTF1_IN_PD, PTF1_IN,
+		SIOTXD_SIUBOSLD, PTF0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		AUDSYNC, PTG4_OUT, 0, 0,
+		AUDATA3, PTG3_OUT, 0, 0,
+		AUDATA2, PTG2_OUT, 0, 0,
+		AUDATA1, PTG1_OUT, 0, 0,
+		AUDATA0, PTG0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) {
+		LCDVCPWC_LCDVCPWC2, PTH7_OUT, 0, 0,
+		LCDVSYN2_DACK, PTH6_OUT, PTH6_IN_PD, PTH6_IN,
+		LCDVSYN, PTH5_OUT, PTH5_IN_PD, PTH5_IN,
+		LCDDISP_LCDRS, PTH4_OUT, 0, 0,
+		LCDHSYN_LCDCS, PTH3_OUT, 0, 0,
+		LCDDON_LCDDON2, PTH2_OUT, 0, 0,
+		LCDD17_DV_HSYNC, PTH1_OUT, PTH1_IN_PD, PTH1_IN,
+		LCDD16_DV_VSYNC, PTH0_OUT, PTH0_IN_PD, PTH0_IN }
+	},
+	{ PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) {
+		STATUS0, PTJ7_OUT, 0, 0,
+		0, PTJ6_OUT, 0, 0,
+		PDSTATUS, PTJ5_OUT, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		IRQ1, PTJ1_OUT, PTJ1_IN_PU, PTJ1_IN,
+		IRQ0, PTJ0_OUT, PTJ0_IN_PU, PTJ0_IN }
+	},
+	{ PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) {
+		0, 0, 0, 0,
+		SIUAILR_SIOF1_SS2, PTK6_OUT, PTK6_IN_PD, PTK6_IN,
+		SIUAIBT_SIOF1_SS1, PTK5_OUT, PTK5_IN_PD, PTK5_IN,
+		SIUAOLR_SIOF1_SYNC, PTK4_OUT, PTK4_IN_PD, PTK4_IN,
+		SIUAOBT_SIOF1_SCK, PTK3_OUT, PTK3_IN_PD, PTK3_IN,
+		SIUAISLD_SIOF1_RXD, 0, PTK2_IN_PD, PTK2_IN,
+		SIUAOSLD_SIOF1_TXD, PTK1_OUT, 0, 0,
+		PTK0, PTK0_OUT, PTK0_IN_PD, PTK0_IN }
+	},
+	{ PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) {
+		LCDD15_DV_D15, PTL7_OUT, PTL7_IN_PD, PTL7_IN,
+		LCDD14_DV_D14, PTL6_OUT, PTL6_IN_PD, PTL6_IN,
+		LCDD13_DV_D13, PTL5_OUT, PTL5_IN_PD, PTL5_IN,
+		LCDD12_DV_D12, PTL4_OUT, PTL4_IN_PD, PTL4_IN,
+		LCDD11_DV_D11, PTL3_OUT, PTL3_IN_PD, PTL3_IN,
+		LCDD10_DV_D10, PTL2_OUT, PTL2_IN_PD, PTL2_IN,
+		LCDD9_DV_D9, PTL1_OUT, PTL1_IN_PD, PTL1_IN,
+		LCDD8_DV_D8, PTL0_OUT, PTL0_IN_PD, PTL0_IN }
+	},
+	{ PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) {
+		LCDD7_DV_D7, PTM7_OUT, PTM7_IN_PD, PTM7_IN,
+		LCDD6_DV_D6, PTM6_OUT, PTM6_IN_PD, PTM6_IN,
+		LCDD5_DV_D5, PTM5_OUT, PTM5_IN_PD, PTM5_IN,
+		LCDD4_DV_D4, PTM4_OUT, PTM4_IN_PD, PTM4_IN,
+		LCDD3_DV_D3, PTM3_OUT, PTM3_IN_PD, PTM3_IN,
+		LCDD2_DV_D2, PTM2_OUT, PTM2_IN_PD, PTM2_IN,
+		LCDD1_DV_D1, PTM1_OUT, PTM1_IN_PD, PTM1_IN,
+		LCDD0_DV_D0, PTM0_OUT, PTM0_IN_PD, PTM0_IN }
+	},
+	{ PINMUX_CFG_REG("PNCR", 0xa4050118, 16, 2) {
+		HPD63, PTN7_OUT, 0, PTN7_IN,
+		HPD62, PTN6_OUT, 0, PTN6_IN,
+		HPD61, PTN5_OUT, 0, PTN5_IN,
+		HPD60, PTN4_OUT, 0, PTN4_IN,
+		HPD59, PTN3_OUT, 0, PTN3_IN,
+		HPD58, PTN2_OUT, 0, PTN2_IN,
+		HPD57, PTN1_OUT, 0, PTN1_IN,
+		HPD56, PTN0_OUT, 0, PTN0_IN }
+	},
+	{ PINMUX_CFG_REG("PQCR", 0xa405011a, 16, 2) {
+		0, 0, 0, 0,
+		SIOF0_SS2_SIM_RST, PTQ6_OUT, 0, 0,
+		SIOF0_SS1_TS_SPSYNC, PTQ5_OUT, PTQ5_IN_PD, PTQ5_IN,
+		SIOF0_SYNC_TS_SDEN, PTQ4_OUT, PTQ4_IN_PD, PTQ4_IN,
+		SIOF0_SCK_TS_SCK, PTQ3_OUT, PTQ3_IN_PD, PTQ3_IN,
+		PTQ2, 0, PTQ2_IN_PD, PTQ2_IN,
+		PTQ1, PTQ1_OUT, 0, 0,
+		PTQ0, PTQ0_OUT, PTQ0_IN_PU, PTQ0_IN }
+	},
+	{ PINMUX_CFG_REG("PRCR", 0xa405011c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		LCDRD, PTR4_OUT, 0, 0,
+		CS6B_CE1B_LCDCS2, PTR3_OUT, 0, 0,
+		WAIT, 0, PTR2_IN_PU, PTR2_IN,
+		LCDDCK_LCDWR, PTR1_OUT, 0, 0,
+		LCDVEPWC_LCDVEPWC2, PTR0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSCR", 0xa405011e, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		SCIF0_CTS_SIUAISPD, 0, PTS4_IN_PD, PTS4_IN,
+		SCIF0_RTS_SIUAOSPD, PTS3_OUT, 0, 0,
+		SCIF0_SCK_TPUTO, PTS2_OUT, PTS2_IN_PD, PTS2_IN,
+		SCIF0_RXD, 0, PTS1_IN_PD, PTS1_IN,
+		SCIF0_TXD, PTS0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PTCR", 0xa4050140, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		FOE_VIO_VD2, PTT4_OUT, PTT4_IN_PD, PTT4_IN,
+		FWE, PTT3_OUT, PTT3_IN_PD, PTT3_IN,
+		FSC, PTT2_OUT, PTT2_IN_PD, PTT2_IN,
+		DREQ0, 0, PTT1_IN_PD, PTT1_IN,
+		FCDE, PTT0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PUCR", 0xa4050142, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		NAF2_VIO_D10, PTU4_OUT, PTU4_IN_PD, PTU4_IN,
+		NAF1_VIO_D9, PTU3_OUT, PTU3_IN_PD, PTU3_IN,
+		NAF0_VIO_D8, PTU2_OUT, PTU2_IN_PD, PTU2_IN,
+		FRB_VIO_CLK2, 0, PTU1_IN_PD, PTU1_IN,
+		FCE_VIO_HD2, PTU0_OUT, PTU0_IN_PD, PTU0_IN }
+	},
+	{ PINMUX_CFG_REG("PVCR", 0xa4050144, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		NAF7_VIO_D15, PTV4_OUT, PTV4_IN_PD, PTV4_IN,
+		NAF6_VIO_D14, PTV3_OUT, PTV3_IN_PD, PTV3_IN,
+		NAF5_VIO_D13, PTV2_OUT, PTV2_IN_PD, PTV2_IN,
+		NAF4_VIO_D12, PTV1_OUT, PTV1_IN_PD, PTV1_IN,
+		NAF3_VIO_D11, PTV0_OUT, PTV0_IN_PD, PTV0_IN }
+	},
+	{ PINMUX_CFG_REG("PWCR", 0xa4050146, 16, 2) {
+		0, 0, 0, 0,
+		VIO_FLD_SCIF2_CTS, 0, PTW6_IN_PD, PTW6_IN,
+		VIO_CKO_SCIF2_RTS, PTW5_OUT, 0, 0,
+		VIO_STEX_SCIF2_SCK, PTW4_OUT, PTW4_IN_PD, PTW4_IN,
+		VIO_STEM_SCIF2_TXD, PTW3_OUT, PTW3_IN_PD, PTW3_IN,
+		VIO_HD_SCIF2_RXD, PTW2_OUT, PTW2_IN_PD, PTW2_IN,
+		VIO_VD_SCIF1_CTS, PTW1_OUT, PTW1_IN_PD, PTW1_IN,
+		VIO_CLK_SCIF1_RTS, PTW0_OUT, PTW0_IN_PD, PTW0_IN }
+	},
+	{ PINMUX_CFG_REG("PXCR", 0xa4050148, 16, 2) {
+		0, 0, 0, 0,
+		CS6A_CE2B, PTX6_OUT, PTX6_IN_PU, PTX6_IN,
+		LCDD23, PTX5_OUT, PTX5_IN_PD, PTX5_IN,
+		LCDD22, PTX4_OUT, PTX4_IN_PD, PTX4_IN,
+		LCDD21, PTX3_OUT, PTX3_IN_PD, PTX3_IN,
+		LCDD20, PTX2_OUT, PTX2_IN_PD, PTX2_IN,
+		LCDD19_DV_CLKI, PTX1_OUT, PTX1_IN_PD, PTX1_IN,
+		LCDD18_DV_CLK, PTX0_OUT, PTX0_IN_PD, PTX0_IN }
+	},
+	{ PINMUX_CFG_REG("PYCR", 0xa405014a, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		KEYOUT5_IN5, PTY5_OUT, PTY5_IN_PU, PTY5_IN,
+		KEYOUT4_IN6, PTY4_OUT, PTY4_IN_PU, PTY4_IN,
+		KEYOUT3, PTY3_OUT, PTY3_IN_PU, PTY3_IN,
+		KEYOUT2, PTY2_OUT, PTY2_IN_PU, PTY2_IN,
+		KEYOUT1, PTY1_OUT, 0, 0,
+		KEYOUT0, PTY0_OUT, PTY0_IN_PU, PTY0_IN }
+	},
+	{ PINMUX_CFG_REG("PZCR", 0xa405014c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		KEYIN4_IRQ7, 0, PTZ5_IN_PU, PTZ5_IN,
+		KEYIN3, 0, PTZ4_IN_PU, PTZ4_IN,
+		KEYIN2, 0, PTZ3_IN_PU, PTZ3_IN,
+		KEYIN1, 0, PTZ2_IN_PU, PTZ2_IN,
+		KEYIN0_IRQ6, 0, PTZ1_IN_PU, PTZ1_IN,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELA", 0xa405014e, 16, 1) {
+		PSA15_KEYIN0, PSA15_IRQ6,
+		PSA14_KEYIN4, PSA14_IRQ7,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PSA9_IRQ4, PSA9_BS,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PSA4_IRQ2, PSA4_SDHID2,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELB", 0xa4050150, 16, 1) {
+		PSB15_SIOTXD, PSB15_SIUBOSLD,
+		PSB14_SIORXD, PSB14_SIUBISLD,
+		PSB13_SIOD, PSB13_SIUBILR,
+		PSB12_SIOSTRB0, PSB12_SIUBIBT,
+		PSB11_SIOSTRB1, PSB11_SIUBOLR,
+		PSB10_SIOSCK, PSB10_SIUBOBT,
+		PSB9_SIOMCK, PSB9_SIUMCKB,
+		PSB8_SIOF0_MCK, PSB8_IRQ3,
+		PSB7_SIOF0_TXD, PSB7_IRDA_OUT,
+		PSB6_SIOF0_RXD, PSB6_IRDA_IN,
+		PSB5_SIOF0_SCK, PSB5_TS_SCK,
+		PSB4_SIOF0_SYNC, PSB4_TS_SDEN,
+		PSB3_SIOF0_SS1, PSB3_TS_SPSYNC,
+		PSB2_SIOF0_SS2, PSB2_SIM_RST,
+		PSB1_SIUMCKA, PSB1_SIOF1_MCK,
+		PSB0_SIUAOSLD, PSB0_SIOF1_TXD }
+	},
+	{ PINMUX_CFG_REG("PSELC", 0xa4050152, 16, 1) {
+		PSC15_SIUAISLD, PSC15_SIOF1_RXD,
+		PSC14_SIUAOBT, PSC14_SIOF1_SCK,
+		PSC13_SIUAOLR, PSC13_SIOF1_SYNC,
+		PSC12_SIUAIBT, PSC12_SIOF1_SS1,
+		PSC11_SIUAILR, PSC11_SIOF1_SS2,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PSC0_NAF, PSC0_VIO }
+	},
+	{ PINMUX_CFG_REG("PSELD", 0xa4050154, 16, 1) {
+		0, 0,
+		0, 0,
+		PSD13_VIO, PSD13_SCIF2,
+		PSD12_VIO, PSD12_SCIF1,
+		PSD11_VIO, PSD11_SCIF1,
+		PSD10_VIO_D0, PSD10_LCDLCLK,
+		PSD9_SIOMCK_SIUMCKB, PSD9_SIUFCKB,
+		PSD8_SCIF0_SCK, PSD8_TPUTO,
+		PSD7_SCIF0_RTS, PSD7_SIUAOSPD,
+		PSD6_SCIF0_CTS, PSD6_SIUAISPD,
+		PSD5_CS6B_CE1B, PSD5_LCDCS2,
+		0, 0,
+		PSD3_LCDVEPWC_LCDVCPWC, PSD3_LCDVEPWC2_LCDVCPWC2,
+		PSD2_LCDDON, PSD2_LCDDON2,
+		0, 0,
+		PSD0_LCDD19_LCDD0, PSD0_DV }
+	},
+	{ PINMUX_CFG_REG("PSELE", 0xa4050156, 16, 1) {
+		PSE15_SIOF0_MCK_IRQ3, PSE15_SIM_D,
+		PSE14_SIOF0_TXD_IRDA_OUT, PSE14_SIM_CLK,
+		PSE13_SIOF0_RXD_IRDA_IN, PSE13_TS_SDAT,
+		PSE12_LCDVSYN2, PSE12_DACK,
+		PSE11_SIUMCKA_SIOF1_MCK, PSE11_SIUFCKA,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		PSE3_FLCTL, PSE3_VIO,
+		PSE2_NAF2, PSE2_VIO_D10,
+		PSE1_NAF1, PSE1_VIO_D9,
+		PSE0_NAF0, PSE0_VIO_D8 }
+	},
+	{ PINMUX_CFG_REG("HIZCRA", 0xa4050158, 16, 1) {
+		0, 0,
+		HIZA14_KEYSC, HIZA14_HIZ,
+		0, 0,
+		0, 0,
+		0, 0,
+		HIZA10_NAF, HIZA10_HIZ,
+		HIZA9_VIO, HIZA9_HIZ,
+		HIZA8_LCDC, HIZA8_HIZ,
+		HIZA7_LCDC, HIZA7_HIZ,
+		HIZA6_LCDC, HIZA6_HIZ,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{ PINMUX_CFG_REG("HIZCRB", 0xa405015a, 16, 1) {
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		HIZB1_VIO, HIZB1_HIZ,
+		HIZB0_VIO, HIZB0_HIZ }
+	},
+	{ PINMUX_CFG_REG("HIZCRC", 0xa405015c, 16, 1) {
+		HIZC15_IRQ7, HIZC15_HIZ,
+		HIZC14_IRQ6, HIZC14_HIZ,
+		HIZC13_IRQ5, HIZC13_HIZ,
+		HIZC12_IRQ4, HIZC12_HIZ,
+		HIZC11_IRQ3, HIZC11_HIZ,
+		HIZC10_IRQ2, HIZC10_HIZ,
+		HIZC9_IRQ1, HIZC9_HIZ,
+		HIZC8_IRQ0, HIZC8_HIZ,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{ PINMUX_CFG_REG("MSELCRB", 0xa4050182, 16, 1) {
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		MSELB9_VIO, MSELB9_VIO2,
+		MSELB8_RGB, MSELB8_SYS,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0 }
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADR", 0xa4050120, 8) {
+		PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+		PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDR", 0xa4050122, 8) {
+		PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+		PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDR", 0xa4050124, 8) {
+		PTC7_DATA, 0, PTC5_DATA, PTC4_DATA,
+		PTC3_DATA, PTC2_DATA, 0, PTC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDR", 0xa4050126, 8) {
+		PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+		PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDR", 0xa4050128, 8) {
+		PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA,
+		0, 0, PTE1_DATA, PTE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDR", 0xa405012a, 8) {
+		0, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+		PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA }
+	},
+	{ PINMUX_DATA_REG("PGDR", 0xa405012c, 8) {
+		0, 0, 0, PTG4_DATA,
+		PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA }
+	},
+	{ PINMUX_DATA_REG("PHDR", 0xa405012e, 8) {
+		PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+		PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA }
+	},
+	{ PINMUX_DATA_REG("PJDR", 0xa4050130, 8) {
+		PTJ7_DATA, PTJ6_DATA, PTJ5_DATA, 0,
+		0, 0, PTJ1_DATA, PTJ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PKDR", 0xa4050132, 8) {
+		0, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+		PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA }
+	},
+	{ PINMUX_DATA_REG("PLDR", 0xa4050134, 8) {
+		PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+		PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA }
+	},
+	{ PINMUX_DATA_REG("PMDR", 0xa4050136, 8) {
+		PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+		PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA }
+	},
+	{ PINMUX_DATA_REG("PNDR", 0xa4050138, 8) {
+		PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+		PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA }
+	},
+	{ PINMUX_DATA_REG("PQDR", 0xa405013a, 8) {
+		0, PTQ6_DATA, PTQ5_DATA, PTQ4_DATA,
+		PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PRDR", 0xa405013c, 8) {
+		0, 0, 0, PTR4_DATA,
+		PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA }
+	},
+	{ PINMUX_DATA_REG("PSDR", 0xa405013e, 8) {
+		0, 0, 0, PTS4_DATA,
+		PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA }
+	},
+	{ PINMUX_DATA_REG("PTDR", 0xa4050160, 8) {
+		0, 0, 0, PTT4_DATA,
+		PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA }
+	},
+	{ PINMUX_DATA_REG("PUDR", 0xa4050162, 8) {
+		0, 0, 0, PTU4_DATA,
+		PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA }
+	},
+	{ PINMUX_DATA_REG("PVDR", 0xa4050164, 8) {
+		0, 0, 0, PTV4_DATA,
+		PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA }
+	},
+	{ PINMUX_DATA_REG("PWDR", 0xa4050166, 8) {
+		0, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+		PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA }
+	},
+	{ PINMUX_DATA_REG("PXDR", 0xa4050168, 8) {
+		0, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+		PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA }
+	},
+	{ PINMUX_DATA_REG("PYDR", 0xa405016a, 8) {
+		0, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+		PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA }
+	},
+	{ PINMUX_DATA_REG("PZDR", 0xa405016c, 8) {
+		0, 0, PTZ5_DATA, PTZ4_DATA,
+		PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7722_pinmux_info = {
+	.name = "sh7722_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.input_pd = { PINMUX_INPUT_PULLDOWN_BEGIN, PINMUX_INPUT_PULLDOWN_END },
+	.input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PTA7,
+	.last_gpio = GPIO_FN_KEYOUT5_IN5,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7722_pinmux_info);
+}
+
+arch_initcall(plat_pinmux_setup);
diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c
new file mode 100644
index 00000000000..88bf5ecda84
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7723.c
@@ -0,0 +1,1909 @@
+/*
+ * SH7723 Pinmux
+ *
+ *  Copyright (C) 2008  Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <cpu/sh7723.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+	PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA,
+	PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+	PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA,
+	PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+	PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA,
+	PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+	PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA,
+	PTE5_DATA, PTE4_DATA, PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA,
+	PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+	PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA,
+	PTG5_DATA, PTG4_DATA, PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA,
+	PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+	PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA,
+	PTJ7_DATA, PTJ5_DATA, PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA,
+	PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+	PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA,
+	PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+	PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA,
+	PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+	PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA,
+	PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+	PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA,
+	PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA,
+	PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+	PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA,
+	PTS7_DATA, PTS6_DATA, PTS5_DATA, PTS4_DATA,
+	PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA,
+	PTT5_DATA, PTT4_DATA, PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA,
+	PTU5_DATA, PTU4_DATA, PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA,
+	PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA,
+	PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA,
+	PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+	PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA,
+	PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+	PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA,
+	PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+	PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA,
+	PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA,
+	PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN,
+	PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN,
+	PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN,
+	PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN,
+	PTC7_IN, PTC6_IN, PTC5_IN, PTC4_IN,
+	PTC3_IN, PTC2_IN, PTC1_IN, PTC0_IN,
+	PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN,
+	PTD3_IN, PTD2_IN, PTD1_IN, PTD0_IN,
+	PTE5_IN, PTE4_IN, PTE3_IN, PTE2_IN, PTE1_IN, PTE0_IN,
+	PTF7_IN, PTF6_IN, PTF5_IN, PTF4_IN,
+	PTF3_IN, PTF2_IN, PTF1_IN, PTF0_IN,
+	PTH7_IN, PTH6_IN, PTH5_IN, PTH4_IN,
+	PTH3_IN, PTH2_IN, PTH1_IN, PTH0_IN,
+	PTJ3_IN, PTJ2_IN, PTJ1_IN, PTJ0_IN,
+	PTK7_IN, PTK6_IN, PTK5_IN, PTK4_IN,
+	PTK3_IN, PTK2_IN, PTK1_IN, PTK0_IN,
+	PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN,
+	PTL3_IN, PTL2_IN, PTL1_IN, PTL0_IN,
+	PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN,
+	PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN,
+	PTN7_IN, PTN6_IN, PTN5_IN, PTN4_IN,
+	PTN3_IN, PTN2_IN, PTN1_IN, PTN0_IN,
+	PTQ3_IN, PTQ2_IN, PTQ1_IN, PTQ0_IN,
+	PTR7_IN, PTR6_IN, PTR5_IN, PTR4_IN,
+	PTR3_IN, PTR2_IN, PTR1_IN, PTR0_IN,
+	PTS7_IN, PTS6_IN, PTS5_IN, PTS4_IN,
+	PTS3_IN, PTS2_IN, PTS1_IN, PTS0_IN,
+	PTT5_IN, PTT4_IN, PTT3_IN, PTT2_IN, PTT1_IN, PTT0_IN,
+	PTU5_IN, PTU4_IN, PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN,
+	PTV7_IN, PTV6_IN, PTV5_IN, PTV4_IN,
+	PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN,
+	PTW7_IN, PTW6_IN, PTW5_IN, PTW4_IN,
+	PTW3_IN, PTW2_IN, PTW1_IN, PTW0_IN,
+	PTX7_IN, PTX6_IN, PTX5_IN, PTX4_IN,
+	PTX3_IN, PTX2_IN, PTX1_IN, PTX0_IN,
+	PTY7_IN, PTY6_IN, PTY5_IN, PTY4_IN,
+	PTY3_IN, PTY2_IN, PTY1_IN, PTY0_IN,
+	PTZ7_IN, PTZ6_IN, PTZ5_IN, PTZ4_IN,
+	PTZ3_IN, PTZ2_IN, PTZ1_IN, PTZ0_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_INPUT_PULLUP_BEGIN,
+	PTA4_IN_PU, PTA3_IN_PU, PTA2_IN_PU, PTA1_IN_PU, PTA0_IN_PU,
+	PTB2_IN_PU, PTB1_IN_PU,
+	PTR2_IN_PU,
+	PINMUX_INPUT_PULLUP_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PTA7_OUT, PTA6_OUT, PTA5_OUT, PTA4_OUT,
+	PTA3_OUT, PTA2_OUT, PTA1_OUT, PTA0_OUT,
+	PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT,
+	PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT,
+	PTC7_OUT, PTC6_OUT, PTC5_OUT, PTC4_OUT,
+	PTC3_OUT, PTC2_OUT, PTC1_OUT, PTC0_OUT,
+	PTD7_OUT, PTD6_OUT, PTD5_OUT, PTD4_OUT,
+	PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT,
+	PTE5_OUT, PTE4_OUT, PTE3_OUT, PTE2_OUT, PTE1_OUT, PTE0_OUT,
+	PTF7_OUT, PTF6_OUT, PTF5_OUT, PTF4_OUT,
+	PTF3_OUT, PTF2_OUT, PTF1_OUT, PTF0_OUT,
+	PTG5_OUT, PTG4_OUT, PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT,
+	PTH7_OUT, PTH6_OUT, PTH5_OUT, PTH4_OUT,
+	PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT,
+	PTJ7_OUT, PTJ5_OUT, PTJ3_OUT, PTJ2_OUT, PTJ1_OUT, PTJ0_OUT,
+	PTK7_OUT, PTK6_OUT, PTK5_OUT, PTK4_OUT,
+	PTK3_OUT, PTK2_OUT, PTK1_OUT, PTK0_OUT,
+	PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT,
+	PTL3_OUT, PTL2_OUT, PTL1_OUT, PTL0_OUT,
+	PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT,
+	PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT,
+	PTN7_OUT, PTN6_OUT, PTN5_OUT, PTN4_OUT,
+	PTN3_OUT, PTN2_OUT, PTN1_OUT, PTN0_OUT,
+	PTR7_OUT, PTR6_OUT, PTR5_OUT, PTR4_OUT,
+	PTR1_OUT, PTR0_OUT,
+	PTS7_OUT, PTS6_OUT, PTS5_OUT, PTS4_OUT,
+	PTS3_OUT, PTS2_OUT, PTS1_OUT, PTS0_OUT,
+	PTT5_OUT, PTT4_OUT, PTT3_OUT, PTT2_OUT, PTT1_OUT, PTT0_OUT,
+	PTU5_OUT, PTU4_OUT, PTU3_OUT, PTU2_OUT, PTU1_OUT, PTU0_OUT,
+	PTV7_OUT, PTV6_OUT, PTV5_OUT, PTV4_OUT,
+	PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT,
+	PTW7_OUT, PTW6_OUT, PTW5_OUT, PTW4_OUT,
+	PTW3_OUT, PTW2_OUT, PTW1_OUT, PTW0_OUT,
+	PTX7_OUT, PTX6_OUT, PTX5_OUT, PTX4_OUT,
+	PTX3_OUT, PTX2_OUT, PTX1_OUT, PTX0_OUT,
+	PTY7_OUT, PTY6_OUT, PTY5_OUT, PTY4_OUT,
+	PTY3_OUT, PTY2_OUT, PTY1_OUT, PTY0_OUT,
+	PTZ7_OUT, PTZ6_OUT, PTZ5_OUT, PTZ4_OUT,
+	PTZ3_OUT, PTZ2_OUT, PTZ1_OUT, PTZ0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	PTA7_FN, PTA6_FN, PTA5_FN, PTA4_FN,
+	PTA3_FN, PTA2_FN, PTA1_FN, PTA0_FN,
+	PTB7_FN, PTB6_FN, PTB5_FN, PTB4_FN,
+	PTB3_FN, PTB2_FN, PTB1_FN, PTB0_FN,
+	PTC7_FN, PTC6_FN, PTC5_FN, PTC4_FN,
+	PTC3_FN, PTC2_FN, PTC1_FN, PTC0_FN,
+	PTD7_FN, PTD6_FN, PTD5_FN, PTD4_FN,
+	PTD3_FN, PTD2_FN, PTD1_FN, PTD0_FN,
+	PTE5_FN, PTE4_FN, PTE3_FN, PTE2_FN, PTE1_FN, PTE0_FN,
+	PTF7_FN, PTF6_FN, PTF5_FN, PTF4_FN,
+	PTF3_FN, PTF2_FN, PTF1_FN, PTF0_FN,
+	PTG5_FN, PTG4_FN, PTG3_FN, PTG2_FN, PTG1_FN, PTG0_FN,
+	PTH7_FN, PTH6_FN, PTH5_FN, PTH4_FN,
+	PTH3_FN, PTH2_FN, PTH1_FN, PTH0_FN,
+	PTJ7_FN, PTJ5_FN, PTJ3_FN, PTJ2_FN, PTJ1_FN, PTJ0_FN,
+	PTK7_FN, PTK6_FN, PTK5_FN, PTK4_FN,
+	PTK3_FN, PTK2_FN, PTK1_FN, PTK0_FN,
+	PTL7_FN, PTL6_FN, PTL5_FN, PTL4_FN,
+	PTL3_FN, PTL2_FN, PTL1_FN, PTL0_FN,
+	PTM7_FN, PTM6_FN, PTM5_FN, PTM4_FN,
+	PTM3_FN, PTM2_FN, PTM1_FN, PTM0_FN,
+	PTN7_FN, PTN6_FN, PTN5_FN, PTN4_FN,
+	PTN3_FN, PTN2_FN, PTN1_FN, PTN0_FN,
+	PTQ3_FN, PTQ2_FN, PTQ1_FN, PTQ0_FN,
+	PTR7_FN, PTR6_FN, PTR5_FN, PTR4_FN,
+	PTR3_FN, PTR2_FN, PTR1_FN, PTR0_FN,
+	PTS7_FN, PTS6_FN, PTS5_FN, PTS4_FN,
+	PTS3_FN, PTS2_FN, PTS1_FN, PTS0_FN,
+	PTT5_FN, PTT4_FN, PTT3_FN, PTT2_FN, PTT1_FN, PTT0_FN,
+	PTU5_FN, PTU4_FN, PTU3_FN, PTU2_FN, PTU1_FN, PTU0_FN,
+	PTV7_FN, PTV6_FN, PTV5_FN, PTV4_FN,
+	PTV3_FN, PTV2_FN, PTV1_FN, PTV0_FN,
+	PTW7_FN, PTW6_FN, PTW5_FN, PTW4_FN,
+	PTW3_FN, PTW2_FN, PTW1_FN, PTW0_FN,
+	PTX7_FN, PTX6_FN, PTX5_FN, PTX4_FN,
+	PTX3_FN, PTX2_FN, PTX1_FN, PTX0_FN,
+	PTY7_FN, PTY6_FN, PTY5_FN, PTY4_FN,
+	PTY3_FN, PTY2_FN, PTY1_FN, PTY0_FN,
+	PTZ7_FN, PTZ6_FN, PTZ5_FN, PTZ4_FN,
+	PTZ3_FN, PTZ2_FN, PTZ1_FN, PTZ0_FN,
+
+
+	PSA15_PSA14_FN1, PSA15_PSA14_FN2,
+	PSA13_PSA12_FN1, PSA13_PSA12_FN2,
+	PSA11_PSA10_FN1, PSA11_PSA10_FN2,
+	PSA5_PSA4_FN1, PSA5_PSA4_FN2, PSA5_PSA4_FN3,
+	PSA3_PSA2_FN1, PSA3_PSA2_FN2,
+	PSB15_PSB14_FN1, PSB15_PSB14_FN2,
+	PSB13_PSB12_LCDC_RGB, PSB13_PSB12_LCDC_SYS,
+	PSB9_PSB8_FN1, PSB9_PSB8_FN2, PSB9_PSB8_FN3,
+	PSB7_PSB6_FN1, PSB7_PSB6_FN2,
+	PSB5_PSB4_FN1, PSB5_PSB4_FN2,
+	PSB3_PSB2_FN1, PSB3_PSB2_FN2,
+	PSC15_PSC14_FN1, PSC15_PSC14_FN2,
+	PSC13_PSC12_FN1, PSC13_PSC12_FN2,
+	PSC11_PSC10_FN1, PSC11_PSC10_FN2, PSC11_PSC10_FN3,
+	PSC9_PSC8_FN1, PSC9_PSC8_FN2,
+	PSC7_PSC6_FN1, PSC7_PSC6_FN2, PSC7_PSC6_FN3,
+	PSD15_PSD14_FN1, PSD15_PSD14_FN2,
+	PSD13_PSD12_FN1, PSD13_PSD12_FN2,
+	PSD11_PSD10_FN1, PSD11_PSD10_FN2, PSD11_PSD10_FN3,
+	PSD9_PSD8_FN1, PSD9_PSD8_FN2,
+	PSD7_PSD6_FN1, PSD7_PSD6_FN2,
+	PSD5_PSD4_FN1, PSD5_PSD4_FN2,
+	PSD3_PSD2_FN1, PSD3_PSD2_FN2,
+	PSD1_PSD0_FN1, PSD1_PSD0_FN2,
+	PINMUX_FUNCTION_END,
+
+	PINMUX_MARK_BEGIN,
+	SCIF0_PTT_TXD_MARK, SCIF0_PTT_RXD_MARK,
+	SCIF0_PTT_SCK_MARK, SCIF0_PTU_TXD_MARK,
+	SCIF0_PTU_RXD_MARK, SCIF0_PTU_SCK_MARK,
+
+	SCIF1_PTS_TXD_MARK, SCIF1_PTS_RXD_MARK,
+	SCIF1_PTS_SCK_MARK, SCIF1_PTV_TXD_MARK,
+	SCIF1_PTV_RXD_MARK, SCIF1_PTV_SCK_MARK,
+
+	SCIF2_PTT_TXD_MARK, SCIF2_PTT_RXD_MARK,
+	SCIF2_PTT_SCK_MARK, SCIF2_PTU_TXD_MARK,
+	SCIF2_PTU_RXD_MARK, SCIF2_PTU_SCK_MARK,
+
+	SCIF3_PTS_TXD_MARK, SCIF3_PTS_RXD_MARK,
+	SCIF3_PTS_SCK_MARK, SCIF3_PTS_RTS_MARK,
+	SCIF3_PTS_CTS_MARK, SCIF3_PTV_TXD_MARK,
+	SCIF3_PTV_RXD_MARK, SCIF3_PTV_SCK_MARK,
+	SCIF3_PTV_RTS_MARK, SCIF3_PTV_CTS_MARK,
+
+	SCIF4_PTE_TXD_MARK, SCIF4_PTE_RXD_MARK,
+	SCIF4_PTE_SCK_MARK, SCIF4_PTN_TXD_MARK,
+	SCIF4_PTN_RXD_MARK, SCIF4_PTN_SCK_MARK,
+
+	SCIF5_PTE_TXD_MARK, SCIF5_PTE_RXD_MARK,
+	SCIF5_PTE_SCK_MARK, SCIF5_PTN_TXD_MARK,
+	SCIF5_PTN_RXD_MARK, SCIF5_PTN_SCK_MARK,
+
+	VIO_D15_MARK, VIO_D14_MARK, VIO_D13_MARK, VIO_D12_MARK,
+	VIO_D11_MARK, VIO_D10_MARK, VIO_D9_MARK, VIO_D8_MARK,
+	VIO_D7_MARK, VIO_D6_MARK, VIO_D5_MARK, VIO_D4_MARK,
+	VIO_D3_MARK, VIO_D2_MARK, VIO_D1_MARK, VIO_D0_MARK,
+	VIO_FLD_MARK, VIO_CKO_MARK,
+	VIO_VD1_MARK, VIO_HD1_MARK, VIO_CLK1_MARK,
+	VIO_HD2_MARK, VIO_VD2_MARK, VIO_CLK2_MARK,
+
+	LCDD23_MARK, LCDD22_MARK, LCDD21_MARK, LCDD20_MARK,
+	LCDD19_MARK, LCDD18_MARK, LCDD17_MARK, LCDD16_MARK,
+	LCDD15_MARK, LCDD14_MARK, LCDD13_MARK, LCDD12_MARK,
+	LCDD11_MARK, LCDD10_MARK, LCDD9_MARK, LCDD8_MARK,
+	LCDD7_MARK, LCDD6_MARK, LCDD5_MARK, LCDD4_MARK,
+	LCDD3_MARK, LCDD2_MARK, LCDD1_MARK, LCDD0_MARK,
+	LCDDON_MARK, LCDVCPWC_MARK, LCDVEPWC_MARK,
+	LCDVSYN_MARK, LCDDCK_MARK, LCDHSYN_MARK, LCDDISP_MARK,
+	LCDRS_MARK, LCDCS_MARK, LCDWR_MARK, LCDRD_MARK,
+	LCDLCLK_PTR_MARK, LCDLCLK_PTW_MARK,
+
+	IRQ0_MARK, IRQ1_MARK, IRQ2_MARK, IRQ3_MARK,
+	IRQ4_MARK, IRQ5_MARK, IRQ6_MARK, IRQ7_MARK,
+
+	AUDATA3_MARK, AUDATA2_MARK, AUDATA1_MARK, AUDATA0_MARK,
+	AUDCK_MARK, AUDSYNC_MARK,
+
+	SDHI0CD_PTD_MARK, SDHI0WP_PTD_MARK,
+	SDHI0D3_PTD_MARK, SDHI0D2_PTD_MARK,
+	SDHI0D1_PTD_MARK, SDHI0D0_PTD_MARK,
+	SDHI0CMD_PTD_MARK, SDHI0CLK_PTD_MARK,
+
+	SDHI0CD_PTS_MARK, SDHI0WP_PTS_MARK,
+	SDHI0D3_PTS_MARK, SDHI0D2_PTS_MARK,
+	SDHI0D1_PTS_MARK, SDHI0D0_PTS_MARK,
+	SDHI0CMD_PTS_MARK, SDHI0CLK_PTS_MARK,
+
+	SDHI1CD_MARK, SDHI1WP_MARK, SDHI1D3_MARK, SDHI1D2_MARK,
+	SDHI1D1_MARK, SDHI1D0_MARK, SDHI1CMD_MARK, SDHI1CLK_MARK,
+
+	SIUAFCK_MARK, SIUAILR_MARK, SIUAIBT_MARK, SIUAISLD_MARK,
+	SIUAOLR_MARK, SIUAOBT_MARK, SIUAOSLD_MARK, SIUAMCK_MARK,
+	SIUAISPD_MARK, SIUAOSPD_MARK,
+
+	SIUBFCK_MARK, SIUBILR_MARK, SIUBIBT_MARK, SIUBISLD_MARK,
+	SIUBOLR_MARK, SIUBOBT_MARK, SIUBOSLD_MARK, SIUBMCK_MARK,
+
+	IRDA_IN_MARK, IRDA_OUT_MARK,
+
+	DV_CLKI_MARK, DV_CLK_MARK, DV_HSYNC_MARK, DV_VSYNC_MARK,
+	DV_D15_MARK, DV_D14_MARK, DV_D13_MARK, DV_D12_MARK,
+	DV_D11_MARK, DV_D10_MARK, DV_D9_MARK, DV_D8_MARK,
+	DV_D7_MARK, DV_D6_MARK, DV_D5_MARK, DV_D4_MARK,
+	DV_D3_MARK, DV_D2_MARK, DV_D1_MARK, DV_D0_MARK,
+
+	KEYIN0_MARK, KEYIN1_MARK, KEYIN2_MARK, KEYIN3_MARK, KEYIN4_MARK,
+	KEYOUT0_MARK, KEYOUT1_MARK, KEYOUT2_MARK, KEYOUT3_MARK,
+	KEYOUT4_IN6_MARK, KEYOUT5_IN5_MARK,
+
+	MSIOF0_PTF_TXD_MARK, MSIOF0_PTF_RXD_MARK, MSIOF0_PTF_MCK_MARK,
+	MSIOF0_PTF_TSYNC_MARK, MSIOF0_PTF_TSCK_MARK, MSIOF0_PTF_RSYNC_MARK,
+	MSIOF0_PTF_RSCK_MARK, MSIOF0_PTF_SS1_MARK, MSIOF0_PTF_SS2_MARK,
+
+	MSIOF0_PTT_TXD_MARK, MSIOF0_PTT_RXD_MARK, MSIOF0_PTX_MCK_MARK,
+	MSIOF0_PTT_TSYNC_MARK, MSIOF0_PTT_TSCK_MARK, MSIOF0_PTT_RSYNC_MARK,
+	MSIOF0_PTT_RSCK_MARK, MSIOF0_PTT_SS1_MARK, MSIOF0_PTT_SS2_MARK,
+
+	MSIOF1_TXD_MARK, MSIOF1_RXD_MARK, MSIOF1_MCK_MARK,
+	MSIOF1_TSYNC_MARK, MSIOF1_TSCK_MARK, MSIOF1_RSYNC_MARK,
+	MSIOF1_RSCK_MARK, MSIOF1_SS1_MARK, MSIOF1_SS2_MARK,
+
+	TS0_SDAT_MARK, TS0_SCK_MARK, TS0_SDEN_MARK, TS0_SPSYNC_MARK,
+
+	FCE_MARK, NAF7_MARK, NAF6_MARK, NAF5_MARK, NAF4_MARK,
+	NAF3_MARK, NAF2_MARK, NAF1_MARK, NAF0_MARK, FCDE_MARK,
+	FOE_MARK, FSC_MARK, FWE_MARK, FRB_MARK,
+
+	DACK1_MARK, DREQ1_MARK, DACK0_MARK, DREQ0_MARK,
+
+	AN3_MARK, AN2_MARK, AN1_MARK, AN0_MARK, ADTRG_MARK,
+
+	STATUS0_MARK, PDSTATUS_MARK,
+
+	TPUTO3_MARK, TPUTO2_MARK, TPUTO1_MARK, TPUTO0_MARK,
+
+	D31_MARK, D30_MARK, D29_MARK, D28_MARK,
+	D27_MARK, D26_MARK, D25_MARK, D24_MARK,
+	D23_MARK, D22_MARK, D21_MARK, D20_MARK,
+	D19_MARK, D18_MARK, D17_MARK, D16_MARK,
+	IOIS16_MARK, WAIT_MARK, BS_MARK,
+	A25_MARK, A24_MARK, A23_MARK, A22_MARK,
+	CS6B_CE1B_MARK, CS6A_CE2B_MARK,
+	CS5B_CE1A_MARK, CS5A_CE2A_MARK,
+	WE3_ICIOWR_MARK, WE2_ICIORD_MARK,
+
+	IDED15_MARK, IDED14_MARK, IDED13_MARK, IDED12_MARK,
+	IDED11_MARK, IDED10_MARK, IDED9_MARK, IDED8_MARK,
+	IDED7_MARK, IDED6_MARK, IDED5_MARK, IDED4_MARK,
+	IDED3_MARK, IDED2_MARK, IDED1_MARK, IDED0_MARK,
+	DIRECTION_MARK, EXBUF_ENB_MARK, IDERST_MARK, IODACK_MARK,
+	IODREQ_MARK, IDEIORDY_MARK, IDEINT_MARK, IDEIOWR_MARK,
+	IDEIORD_MARK, IDECS1_MARK, IDECS0_MARK, IDEA2_MARK,
+	IDEA1_MARK, IDEA0_MARK,
+	PINMUX_MARK_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+	/* PTA GPIO */
+	PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_OUT),
+	PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_OUT),
+	PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_OUT),
+	PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_OUT, PTA4_IN_PU),
+	PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_OUT, PTA3_IN_PU),
+	PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_OUT, PTA2_IN_PU),
+	PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_OUT, PTA1_IN_PU),
+	PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_OUT, PTA0_IN_PU),
+
+	/* PTB GPIO */
+	PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT),
+	PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT),
+	PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT),
+	PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT),
+	PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT),
+	PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT, PTB2_IN_PU),
+	PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT, PTB1_IN_PU),
+	PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT),
+
+	/* PTC GPIO */
+	PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_OUT),
+	PINMUX_DATA(PTC6_DATA, PTC6_IN, PTC6_OUT),
+	PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_OUT),
+	PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT),
+	PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT),
+	PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT),
+	PINMUX_DATA(PTC1_DATA, PTC1_IN, PTC1_OUT),
+	PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT),
+
+	/* PTD GPIO */
+	PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_OUT),
+	PINMUX_DATA(PTD6_DATA, PTD6_IN, PTD6_OUT),
+	PINMUX_DATA(PTD5_DATA, PTD5_IN, PTD5_OUT),
+	PINMUX_DATA(PTD4_DATA, PTD4_IN, PTD4_OUT),
+	PINMUX_DATA(PTD3_DATA, PTD3_IN, PTD3_OUT),
+	PINMUX_DATA(PTD2_DATA, PTD2_IN, PTD2_OUT),
+	PINMUX_DATA(PTD1_DATA, PTD1_IN, PTD1_OUT),
+	PINMUX_DATA(PTD0_DATA, PTD0_IN, PTD0_OUT),
+
+	/* PTE GPIO */
+	PINMUX_DATA(PTE5_DATA, PTE5_IN, PTE5_OUT),
+	PINMUX_DATA(PTE4_DATA, PTE4_IN, PTE4_OUT),
+	PINMUX_DATA(PTE3_DATA, PTE3_IN, PTE3_OUT),
+	PINMUX_DATA(PTE2_DATA, PTE2_IN, PTE2_OUT),
+	PINMUX_DATA(PTE1_DATA, PTE1_IN, PTE1_OUT),
+	PINMUX_DATA(PTE0_DATA, PTE0_IN, PTE0_OUT),
+
+	/* PTF GPIO */
+	PINMUX_DATA(PTF7_DATA, PTF7_IN, PTF7_OUT),
+	PINMUX_DATA(PTF6_DATA, PTF6_IN, PTF6_OUT),
+	PINMUX_DATA(PTF5_DATA, PTF5_IN, PTF5_OUT),
+	PINMUX_DATA(PTF4_DATA, PTF4_IN, PTF4_OUT),
+	PINMUX_DATA(PTF3_DATA, PTF3_IN, PTF3_OUT),
+	PINMUX_DATA(PTF2_DATA, PTF2_IN, PTF2_OUT),
+	PINMUX_DATA(PTF1_DATA, PTF1_IN, PTF1_OUT),
+	PINMUX_DATA(PTF0_DATA, PTF0_IN, PTF0_OUT),
+
+	/* PTG GPIO */
+	PINMUX_DATA(PTG5_DATA, PTG5_OUT),
+	PINMUX_DATA(PTG4_DATA, PTG4_OUT),
+	PINMUX_DATA(PTG3_DATA, PTG3_OUT),
+	PINMUX_DATA(PTG2_DATA, PTG2_OUT),
+	PINMUX_DATA(PTG1_DATA, PTG1_OUT),
+	PINMUX_DATA(PTG0_DATA, PTG0_OUT),
+
+	/* PTH GPIO */
+	PINMUX_DATA(PTH7_DATA, PTH7_IN, PTH7_OUT),
+	PINMUX_DATA(PTH6_DATA, PTH6_IN, PTH6_OUT),
+	PINMUX_DATA(PTH5_DATA, PTH5_IN, PTH5_OUT),
+	PINMUX_DATA(PTH4_DATA, PTH4_IN, PTH4_OUT),
+	PINMUX_DATA(PTH3_DATA, PTH3_IN, PTH3_OUT),
+	PINMUX_DATA(PTH2_DATA, PTH2_IN, PTH2_OUT),
+	PINMUX_DATA(PTH1_DATA, PTH1_IN, PTH1_OUT),
+	PINMUX_DATA(PTH0_DATA, PTH0_IN, PTH0_OUT),
+
+	/* PTJ GPIO */
+	PINMUX_DATA(PTJ7_DATA, PTJ7_OUT),
+	PINMUX_DATA(PTJ5_DATA, PTJ5_OUT),
+	PINMUX_DATA(PTJ3_DATA, PTJ3_IN, PTJ3_OUT),
+	PINMUX_DATA(PTJ2_DATA, PTJ2_IN, PTJ2_OUT),
+	PINMUX_DATA(PTJ1_DATA, PTJ1_IN, PTJ1_OUT),
+	PINMUX_DATA(PTJ0_DATA, PTJ0_IN, PTJ0_OUT),
+
+	/* PTK GPIO */
+	PINMUX_DATA(PTK7_DATA, PTK7_IN, PTK7_OUT),
+	PINMUX_DATA(PTK6_DATA, PTK6_IN, PTK6_OUT),
+	PINMUX_DATA(PTK5_DATA, PTK5_IN, PTK5_OUT),
+	PINMUX_DATA(PTK4_DATA, PTK4_IN, PTK4_OUT),
+	PINMUX_DATA(PTK3_DATA, PTK3_IN, PTK3_OUT),
+	PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_OUT),
+	PINMUX_DATA(PTK1_DATA, PTK1_IN, PTK1_OUT),
+	PINMUX_DATA(PTK0_DATA, PTK0_IN, PTK0_OUT),
+
+	/* PTL GPIO */
+	PINMUX_DATA(PTL7_DATA, PTL7_IN, PTL7_OUT),
+	PINMUX_DATA(PTL6_DATA, PTL6_IN, PTL6_OUT),
+	PINMUX_DATA(PTL5_DATA, PTL5_IN, PTL5_OUT),
+	PINMUX_DATA(PTL4_DATA, PTL4_IN, PTL4_OUT),
+	PINMUX_DATA(PTL3_DATA, PTL3_IN, PTL3_OUT),
+	PINMUX_DATA(PTL2_DATA, PTL2_IN, PTL2_OUT),
+	PINMUX_DATA(PTL1_DATA, PTL1_IN, PTL1_OUT),
+	PINMUX_DATA(PTL0_DATA, PTL0_IN, PTL0_OUT),
+
+	/* PTM GPIO */
+	PINMUX_DATA(PTM7_DATA, PTM7_IN, PTM7_OUT),
+	PINMUX_DATA(PTM6_DATA, PTM6_IN, PTM6_OUT),
+	PINMUX_DATA(PTM5_DATA, PTM5_IN, PTM5_OUT),
+	PINMUX_DATA(PTM4_DATA, PTM4_IN, PTM4_OUT),
+	PINMUX_DATA(PTM3_DATA, PTM3_IN, PTM3_OUT),
+	PINMUX_DATA(PTM2_DATA, PTM2_IN, PTM2_OUT),
+	PINMUX_DATA(PTM1_DATA, PTM1_IN, PTM1_OUT),
+	PINMUX_DATA(PTM0_DATA, PTM0_IN, PTM0_OUT),
+
+	/* PTN GPIO */
+	PINMUX_DATA(PTN7_DATA, PTN7_IN, PTN7_OUT),
+	PINMUX_DATA(PTN6_DATA, PTN6_IN, PTN6_OUT),
+	PINMUX_DATA(PTN5_DATA, PTN5_IN, PTN5_OUT),
+	PINMUX_DATA(PTN4_DATA, PTN4_IN, PTN4_OUT),
+	PINMUX_DATA(PTN3_DATA, PTN3_IN, PTN3_OUT),
+	PINMUX_DATA(PTN2_DATA, PTN2_IN, PTN2_OUT),
+	PINMUX_DATA(PTN1_DATA, PTN1_IN, PTN1_OUT),
+	PINMUX_DATA(PTN0_DATA, PTN0_IN, PTN0_OUT),
+
+	/* PTQ GPIO */
+	PINMUX_DATA(PTQ3_DATA, PTQ3_IN),
+	PINMUX_DATA(PTQ2_DATA, PTQ2_IN),
+	PINMUX_DATA(PTQ1_DATA, PTQ1_IN),
+	PINMUX_DATA(PTQ0_DATA, PTQ0_IN),
+
+	/* PTR GPIO */
+	PINMUX_DATA(PTR7_DATA, PTR7_IN, PTR7_OUT),
+	PINMUX_DATA(PTR6_DATA, PTR6_IN, PTR6_OUT),
+	PINMUX_DATA(PTR5_DATA, PTR5_IN, PTR5_OUT),
+	PINMUX_DATA(PTR4_DATA, PTR4_IN, PTR4_OUT),
+	PINMUX_DATA(PTR3_DATA, PTR3_IN),
+	PINMUX_DATA(PTR2_DATA, PTR2_IN, PTR2_IN_PU),
+	PINMUX_DATA(PTR1_DATA, PTR1_IN, PTR1_OUT),
+	PINMUX_DATA(PTR0_DATA, PTR0_IN, PTR0_OUT),
+
+	/* PTS GPIO */
+	PINMUX_DATA(PTS7_DATA, PTS7_IN, PTS7_OUT),
+	PINMUX_DATA(PTS6_DATA, PTS6_IN, PTS6_OUT),
+	PINMUX_DATA(PTS5_DATA, PTS5_IN, PTS5_OUT),
+	PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_OUT),
+	PINMUX_DATA(PTS3_DATA, PTS3_IN, PTS3_OUT),
+	PINMUX_DATA(PTS2_DATA, PTS2_IN, PTS2_OUT),
+	PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_OUT),
+	PINMUX_DATA(PTS0_DATA, PTS0_IN, PTS0_OUT),
+
+	/* PTT GPIO */
+	PINMUX_DATA(PTT5_DATA, PTT5_IN, PTT5_OUT),
+	PINMUX_DATA(PTT4_DATA, PTT4_IN, PTT4_OUT),
+	PINMUX_DATA(PTT3_DATA, PTT3_IN, PTT3_OUT),
+	PINMUX_DATA(PTT2_DATA, PTT2_IN, PTT2_OUT),
+	PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_OUT),
+	PINMUX_DATA(PTT0_DATA, PTT0_IN, PTT0_OUT),
+
+	/* PTU GPIO */
+	PINMUX_DATA(PTU5_DATA, PTU5_IN, PTU5_OUT),
+	PINMUX_DATA(PTU4_DATA, PTU4_IN, PTU4_OUT),
+	PINMUX_DATA(PTU3_DATA, PTU3_IN, PTU3_OUT),
+	PINMUX_DATA(PTU2_DATA, PTU2_IN, PTU2_OUT),
+	PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_OUT),
+	PINMUX_DATA(PTU0_DATA, PTU0_IN, PTU0_OUT),
+
+	/* PTV GPIO */
+	PINMUX_DATA(PTV7_DATA, PTV7_IN, PTV7_OUT),
+	PINMUX_DATA(PTV6_DATA, PTV6_IN, PTV6_OUT),
+	PINMUX_DATA(PTV5_DATA, PTV5_IN, PTV5_OUT),
+	PINMUX_DATA(PTV4_DATA, PTV4_IN, PTV4_OUT),
+	PINMUX_DATA(PTV3_DATA, PTV3_IN, PTV3_OUT),
+	PINMUX_DATA(PTV2_DATA, PTV2_IN, PTV2_OUT),
+	PINMUX_DATA(PTV1_DATA, PTV1_IN, PTV1_OUT),
+	PINMUX_DATA(PTV0_DATA, PTV0_IN, PTV0_OUT),
+
+	/* PTW GPIO */
+	PINMUX_DATA(PTW7_DATA, PTW7_IN, PTW7_OUT),
+	PINMUX_DATA(PTW6_DATA, PTW6_IN, PTW6_OUT),
+	PINMUX_DATA(PTW5_DATA, PTW5_IN, PTW5_OUT),
+	PINMUX_DATA(PTW4_DATA, PTW4_IN, PTW4_OUT),
+	PINMUX_DATA(PTW3_DATA, PTW3_IN, PTW3_OUT),
+	PINMUX_DATA(PTW2_DATA, PTW2_IN, PTW2_OUT),
+	PINMUX_DATA(PTW1_DATA, PTW1_IN, PTW1_OUT),
+	PINMUX_DATA(PTW0_DATA, PTW0_IN, PTW0_OUT),
+
+	/* PTX GPIO */
+	PINMUX_DATA(PTX7_DATA, PTX7_IN, PTX7_OUT),
+	PINMUX_DATA(PTX6_DATA, PTX6_IN, PTX6_OUT),
+	PINMUX_DATA(PTX5_DATA, PTX5_IN, PTX5_OUT),
+	PINMUX_DATA(PTX4_DATA, PTX4_IN, PTX4_OUT),
+	PINMUX_DATA(PTX3_DATA, PTX3_IN, PTX3_OUT),
+	PINMUX_DATA(PTX2_DATA, PTX2_IN, PTX2_OUT),
+	PINMUX_DATA(PTX1_DATA, PTX1_IN, PTX1_OUT),
+	PINMUX_DATA(PTX0_DATA, PTX0_IN, PTX0_OUT),
+
+	/* PTY GPIO */
+	PINMUX_DATA(PTY7_DATA, PTY7_IN, PTY7_OUT),
+	PINMUX_DATA(PTY6_DATA, PTY6_IN, PTY6_OUT),
+	PINMUX_DATA(PTY5_DATA, PTY5_IN, PTY5_OUT),
+	PINMUX_DATA(PTY4_DATA, PTY4_IN, PTY4_OUT),
+	PINMUX_DATA(PTY3_DATA, PTY3_IN, PTY3_OUT),
+	PINMUX_DATA(PTY2_DATA, PTY2_IN, PTY2_OUT),
+	PINMUX_DATA(PTY1_DATA, PTY1_IN, PTY1_OUT),
+	PINMUX_DATA(PTY0_DATA, PTY0_IN, PTY0_OUT),
+
+	/* PTZ GPIO */
+	PINMUX_DATA(PTZ7_DATA, PTZ7_IN, PTZ7_OUT),
+	PINMUX_DATA(PTZ6_DATA, PTZ6_IN, PTZ6_OUT),
+	PINMUX_DATA(PTZ5_DATA, PTZ5_IN, PTZ5_OUT),
+	PINMUX_DATA(PTZ4_DATA, PTZ4_IN, PTZ4_OUT),
+	PINMUX_DATA(PTZ3_DATA, PTZ3_IN, PTZ3_OUT),
+	PINMUX_DATA(PTZ2_DATA, PTZ2_IN, PTZ2_OUT),
+	PINMUX_DATA(PTZ1_DATA, PTZ1_IN, PTZ1_OUT),
+	PINMUX_DATA(PTZ0_DATA, PTZ0_IN, PTZ0_OUT),
+
+	/* PTA FN */
+	PINMUX_DATA(D23_MARK, PSA15_PSA14_FN1, PTA7_FN),
+	PINMUX_DATA(KEYOUT2_MARK, PSA15_PSA14_FN2, PTA7_FN),
+	PINMUX_DATA(D22_MARK, PSA15_PSA14_FN1, PTA6_FN),
+	PINMUX_DATA(KEYOUT1_MARK, PSA15_PSA14_FN2, PTA6_FN),
+	PINMUX_DATA(D21_MARK, PSA15_PSA14_FN1, PTA5_FN),
+	PINMUX_DATA(KEYOUT0_MARK, PSA15_PSA14_FN2, PTA5_FN),
+	PINMUX_DATA(D20_MARK, PSA15_PSA14_FN1, PTA4_FN),
+	PINMUX_DATA(KEYIN4_MARK, PSA15_PSA14_FN2, PTA4_FN),
+	PINMUX_DATA(D19_MARK, PSA15_PSA14_FN1, PTA3_FN),
+	PINMUX_DATA(KEYIN3_MARK, PSA15_PSA14_FN2, PTA3_FN),
+	PINMUX_DATA(D18_MARK, PSA15_PSA14_FN1, PTA2_FN),
+	PINMUX_DATA(KEYIN2_MARK, PSA15_PSA14_FN2, PTA2_FN),
+	PINMUX_DATA(D17_MARK, PSA15_PSA14_FN1, PTA1_FN),
+	PINMUX_DATA(KEYIN1_MARK, PSA15_PSA14_FN2, PTA1_FN),
+	PINMUX_DATA(D16_MARK, PSA15_PSA14_FN1, PTA0_FN),
+	PINMUX_DATA(KEYIN0_MARK, PSA15_PSA14_FN2, PTA0_FN),
+
+	/* PTB FN */
+	PINMUX_DATA(D31_MARK, PTB7_FN),
+	PINMUX_DATA(D30_MARK, PTB6_FN),
+	PINMUX_DATA(D29_MARK, PTB5_FN),
+	PINMUX_DATA(D28_MARK, PTB4_FN),
+	PINMUX_DATA(D27_MARK, PTB3_FN),
+	PINMUX_DATA(D26_MARK, PSA15_PSA14_FN1, PTB2_FN),
+	PINMUX_DATA(KEYOUT5_IN5_MARK, PSA15_PSA14_FN2, PTB2_FN),
+	PINMUX_DATA(D25_MARK, PSA15_PSA14_FN1, PTB1_FN),
+	PINMUX_DATA(KEYOUT4_IN6_MARK, PSA15_PSA14_FN2, PTB1_FN),
+	PINMUX_DATA(D24_MARK, PSA15_PSA14_FN1, PTB0_FN),
+	PINMUX_DATA(KEYOUT3_MARK, PSA15_PSA14_FN2, PTB0_FN),
+
+	/* PTC FN */
+	PINMUX_DATA(IDED15_MARK, PSA11_PSA10_FN1, PTC7_FN),
+	PINMUX_DATA(SDHI1CD_MARK, PSA11_PSA10_FN2, PTC7_FN),
+	PINMUX_DATA(IDED14_MARK, PSA11_PSA10_FN1, PTC6_FN),
+	PINMUX_DATA(SDHI1WP_MARK, PSA11_PSA10_FN2, PTC6_FN),
+	PINMUX_DATA(IDED13_MARK, PSA11_PSA10_FN1, PTC5_FN),
+	PINMUX_DATA(SDHI1D3_MARK, PSA11_PSA10_FN2, PTC5_FN),
+	PINMUX_DATA(IDED12_MARK, PSA11_PSA10_FN1, PTC4_FN),
+	PINMUX_DATA(SDHI1D2_MARK, PSA11_PSA10_FN2, PTC4_FN),
+	PINMUX_DATA(IDED11_MARK, PSA11_PSA10_FN1, PTC3_FN),
+	PINMUX_DATA(SDHI1D1_MARK, PSA11_PSA10_FN2, PTC3_FN),
+	PINMUX_DATA(IDED10_MARK, PSA11_PSA10_FN1, PTC2_FN),
+	PINMUX_DATA(SDHI1D0_MARK, PSA11_PSA10_FN2, PTC2_FN),
+	PINMUX_DATA(IDED9_MARK, PSA11_PSA10_FN1, PTC1_FN),
+	PINMUX_DATA(SDHI1CMD_MARK, PSA11_PSA10_FN2, PTC1_FN),
+	PINMUX_DATA(IDED8_MARK, PSA11_PSA10_FN1, PTC0_FN),
+	PINMUX_DATA(SDHI1CLK_MARK, PSA11_PSA10_FN2, PTC0_FN),
+
+	/* PTD FN */
+	PINMUX_DATA(IDED7_MARK, PSA11_PSA10_FN1, PTD7_FN),
+	PINMUX_DATA(SDHI0CD_PTD_MARK, PSA11_PSA10_FN2, PTD7_FN),
+	PINMUX_DATA(IDED6_MARK, PSA11_PSA10_FN1, PTD6_FN),
+	PINMUX_DATA(SDHI0WP_PTD_MARK, PSA11_PSA10_FN2, PTD6_FN),
+	PINMUX_DATA(IDED5_MARK, PSA11_PSA10_FN1, PTD5_FN),
+	PINMUX_DATA(SDHI0D3_PTD_MARK, PSA11_PSA10_FN2, PTD5_FN),
+	PINMUX_DATA(IDED4_MARK, PSA11_PSA10_FN1, PTD4_FN),
+	PINMUX_DATA(SDHI0D2_PTD_MARK, PSA11_PSA10_FN2, PTD4_FN),
+	PINMUX_DATA(IDED3_MARK, PSA11_PSA10_FN1, PTD3_FN),
+	PINMUX_DATA(SDHI0D1_PTD_MARK, PSA11_PSA10_FN2, PTD3_FN),
+	PINMUX_DATA(IDED2_MARK, PSA11_PSA10_FN1, PTD2_FN),
+	PINMUX_DATA(SDHI0D0_PTD_MARK, PSA11_PSA10_FN2, PTD2_FN),
+	PINMUX_DATA(IDED1_MARK, PSA11_PSA10_FN1, PTD1_FN),
+	PINMUX_DATA(SDHI0CMD_PTD_MARK, PSA11_PSA10_FN2, PTD1_FN),
+	PINMUX_DATA(IDED0_MARK, PSA11_PSA10_FN1, PTD0_FN),
+	PINMUX_DATA(SDHI0CLK_PTD_MARK, PSA11_PSA10_FN2, PTD0_FN),
+
+	/* PTE FN */
+	PINMUX_DATA(DIRECTION_MARK, PSA11_PSA10_FN1, PTE5_FN),
+	PINMUX_DATA(SCIF5_PTE_SCK_MARK, PSA11_PSA10_FN2, PTE5_FN),
+	PINMUX_DATA(EXBUF_ENB_MARK, PSA11_PSA10_FN1, PTE4_FN),
+	PINMUX_DATA(SCIF5_PTE_RXD_MARK, PSA11_PSA10_FN2, PTE4_FN),
+	PINMUX_DATA(IDERST_MARK, PSA11_PSA10_FN1, PTE3_FN),
+	PINMUX_DATA(SCIF5_PTE_TXD_MARK, PSA11_PSA10_FN2, PTE3_FN),
+	PINMUX_DATA(IODACK_MARK, PSA11_PSA10_FN1, PTE2_FN),
+	PINMUX_DATA(SCIF4_PTE_SCK_MARK, PSA11_PSA10_FN2, PTE2_FN),
+	PINMUX_DATA(IODREQ_MARK, PSA11_PSA10_FN1, PTE1_FN),
+	PINMUX_DATA(SCIF4_PTE_RXD_MARK, PSA11_PSA10_FN2, PTE1_FN),
+	PINMUX_DATA(IDEIORDY_MARK, PSA11_PSA10_FN1, PTE0_FN),
+	PINMUX_DATA(SCIF4_PTE_TXD_MARK, PSA11_PSA10_FN2, PTE0_FN),
+
+	/* PTF FN */
+	PINMUX_DATA(IDEINT_MARK, PTF7_FN),
+	PINMUX_DATA(IDEIOWR_MARK, PSA5_PSA4_FN1, PTF6_FN),
+	PINMUX_DATA(MSIOF0_PTF_SS2_MARK, PSA5_PSA4_FN2, PTF6_FN),
+	PINMUX_DATA(MSIOF0_PTF_RSYNC_MARK, PSA5_PSA4_FN3, PTF6_FN),
+	PINMUX_DATA(IDEIORD_MARK, PSA5_PSA4_FN1, PTF5_FN),
+	PINMUX_DATA(MSIOF0_PTF_SS1_MARK, PSA5_PSA4_FN2, PTF5_FN),
+	PINMUX_DATA(MSIOF0_PTF_RSCK_MARK, PSA5_PSA4_FN3, PTF5_FN),
+	PINMUX_DATA(IDECS1_MARK, PSA11_PSA10_FN1, PTF4_FN),
+	PINMUX_DATA(MSIOF0_PTF_TSYNC_MARK, PSA11_PSA10_FN2, PTF4_FN),
+	PINMUX_DATA(IDECS0_MARK, PSA11_PSA10_FN1, PTF3_FN),
+	PINMUX_DATA(MSIOF0_PTF_TSCK_MARK, PSA11_PSA10_FN2, PTF3_FN),
+	PINMUX_DATA(IDEA2_MARK, PSA11_PSA10_FN1, PTF2_FN),
+	PINMUX_DATA(MSIOF0_PTF_RXD_MARK, PSA11_PSA10_FN2, PTF2_FN),
+	PINMUX_DATA(IDEA1_MARK, PSA11_PSA10_FN1, PTF1_FN),
+	PINMUX_DATA(MSIOF0_PTF_TXD_MARK, PSA11_PSA10_FN2, PTF1_FN),
+	PINMUX_DATA(IDEA0_MARK, PSA11_PSA10_FN1, PTF0_FN),
+	PINMUX_DATA(MSIOF0_PTF_MCK_MARK, PSA11_PSA10_FN2, PTF0_FN),
+
+	/* PTG FN */
+	PINMUX_DATA(AUDCK_MARK, PTG5_FN),
+	PINMUX_DATA(AUDSYNC_MARK, PTG4_FN),
+	PINMUX_DATA(AUDATA3_MARK, PSA3_PSA2_FN1, PTG3_FN),
+	PINMUX_DATA(TPUTO3_MARK, PSA3_PSA2_FN2, PTG3_FN),
+	PINMUX_DATA(AUDATA2_MARK, PSA3_PSA2_FN1, PTG2_FN),
+	PINMUX_DATA(TPUTO2_MARK, PSA3_PSA2_FN2, PTG2_FN),
+	PINMUX_DATA(AUDATA1_MARK, PSA3_PSA2_FN1, PTG1_FN),
+	PINMUX_DATA(TPUTO1_MARK, PSA3_PSA2_FN2, PTG1_FN),
+	PINMUX_DATA(AUDATA0_MARK, PSA3_PSA2_FN1, PTG0_FN),
+	PINMUX_DATA(TPUTO0_MARK, PSA3_PSA2_FN2, PTG0_FN),
+
+	/* PTG FN */
+	PINMUX_DATA(LCDVCPWC_MARK, PTH7_FN),
+	PINMUX_DATA(LCDRD_MARK, PSB15_PSB14_FN1, PTH6_FN),
+	PINMUX_DATA(DV_CLKI_MARK, PSB15_PSB14_FN2, PTH6_FN),
+	PINMUX_DATA(LCDVSYN_MARK, PSB15_PSB14_FN1, PTH5_FN),
+	PINMUX_DATA(DV_CLK_MARK, PSB15_PSB14_FN2, PTH5_FN),
+	PINMUX_DATA(LCDDISP_MARK, PSB13_PSB12_LCDC_RGB, PTH4_FN),
+	PINMUX_DATA(LCDRS_MARK, PSB13_PSB12_LCDC_SYS, PTH4_FN),
+	PINMUX_DATA(LCDHSYN_MARK, PSB13_PSB12_LCDC_RGB, PTH3_FN),
+	PINMUX_DATA(LCDCS_MARK, PSB13_PSB12_LCDC_SYS, PTH3_FN),
+	PINMUX_DATA(LCDDON_MARK, PTH2_FN),
+	PINMUX_DATA(LCDDCK_MARK, PSB13_PSB12_LCDC_RGB, PTH1_FN),
+	PINMUX_DATA(LCDWR_MARK, PSB13_PSB12_LCDC_SYS, PTH1_FN),
+	PINMUX_DATA(LCDVEPWC_MARK, PTH0_FN),
+
+	/* PTJ FN */
+	PINMUX_DATA(STATUS0_MARK, PTJ7_FN),
+	PINMUX_DATA(PDSTATUS_MARK, PTJ5_FN),
+	PINMUX_DATA(A25_MARK, PTJ3_FN),
+	PINMUX_DATA(A24_MARK, PTJ2_FN),
+	PINMUX_DATA(A23_MARK, PTJ1_FN),
+	PINMUX_DATA(A22_MARK, PTJ0_FN),
+
+	/* PTK FN */
+	PINMUX_DATA(SIUAFCK_MARK, PTK7_FN),
+	PINMUX_DATA(SIUAILR_MARK, PSB9_PSB8_FN1, PTK6_FN),
+	PINMUX_DATA(MSIOF1_SS2_MARK, PSB9_PSB8_FN2, PTK6_FN),
+	PINMUX_DATA(MSIOF1_RSYNC_MARK, PSB9_PSB8_FN3, PTK6_FN),
+	PINMUX_DATA(SIUAIBT_MARK, PSB9_PSB8_FN1, PTK5_FN),
+	PINMUX_DATA(MSIOF1_SS1_MARK, PSB9_PSB8_FN2, PTK5_FN),
+	PINMUX_DATA(MSIOF1_RSCK_MARK, PSB9_PSB8_FN3, PTK5_FN),
+	PINMUX_DATA(SIUAISLD_MARK, PSB7_PSB6_FN1, PTK4_FN),
+	PINMUX_DATA(MSIOF1_RXD_MARK, PSB7_PSB6_FN2, PTK4_FN),
+	PINMUX_DATA(SIUAOLR_MARK, PSB7_PSB6_FN1, PTK3_FN),
+	PINMUX_DATA(MSIOF1_TSYNC_MARK, PSB7_PSB6_FN2, PTK3_FN),
+	PINMUX_DATA(SIUAOBT_MARK, PSB7_PSB6_FN1, PTK2_FN),
+	PINMUX_DATA(MSIOF1_TSCK_MARK, PSB7_PSB6_FN2, PTK2_FN),
+	PINMUX_DATA(SIUAOSLD_MARK, PSB7_PSB6_FN1, PTK1_FN),
+	PINMUX_DATA(MSIOF1_RXD_MARK, PSB7_PSB6_FN2, PTK1_FN),
+	PINMUX_DATA(SIUAMCK_MARK, PSB7_PSB6_FN1, PTK0_FN),
+	PINMUX_DATA(MSIOF1_MCK_MARK, PSB7_PSB6_FN2, PTK0_FN),
+
+	/* PTL FN */
+	PINMUX_DATA(LCDD15_MARK, PSB5_PSB4_FN1, PTL7_FN),
+	PINMUX_DATA(DV_D15_MARK, PSB5_PSB4_FN2, PTL7_FN),
+	PINMUX_DATA(LCDD14_MARK, PSB5_PSB4_FN1, PTL6_FN),
+	PINMUX_DATA(DV_D14_MARK, PSB5_PSB4_FN2, PTL6_FN),
+	PINMUX_DATA(LCDD13_MARK, PSB5_PSB4_FN1, PTL5_FN),
+	PINMUX_DATA(DV_D13_MARK, PSB5_PSB4_FN2, PTL5_FN),
+	PINMUX_DATA(LCDD12_MARK, PSB5_PSB4_FN1, PTL4_FN),
+	PINMUX_DATA(DV_D12_MARK, PSB5_PSB4_FN2, PTL4_FN),
+	PINMUX_DATA(LCDD11_MARK, PSB5_PSB4_FN1, PTL3_FN),
+	PINMUX_DATA(DV_D11_MARK, PSB5_PSB4_FN2, PTL3_FN),
+	PINMUX_DATA(LCDD10_MARK, PSB5_PSB4_FN1, PTL2_FN),
+	PINMUX_DATA(DV_D10_MARK, PSB5_PSB4_FN2, PTL2_FN),
+	PINMUX_DATA(LCDD9_MARK, PSB5_PSB4_FN1, PTL1_FN),
+	PINMUX_DATA(DV_D9_MARK, PSB5_PSB4_FN2, PTL1_FN),
+	PINMUX_DATA(LCDD8_MARK, PSB5_PSB4_FN1, PTL0_FN),
+	PINMUX_DATA(DV_D8_MARK, PSB5_PSB4_FN2, PTL0_FN),
+
+	/* PTM FN */
+	PINMUX_DATA(LCDD7_MARK, PSB5_PSB4_FN1, PTM7_FN),
+	PINMUX_DATA(DV_D7_MARK, PSB5_PSB4_FN2, PTM7_FN),
+	PINMUX_DATA(LCDD6_MARK, PSB5_PSB4_FN1, PTM6_FN),
+	PINMUX_DATA(DV_D6_MARK, PSB5_PSB4_FN2, PTM6_FN),
+	PINMUX_DATA(LCDD5_MARK, PSB5_PSB4_FN1, PTM5_FN),
+	PINMUX_DATA(DV_D5_MARK, PSB5_PSB4_FN2, PTM5_FN),
+	PINMUX_DATA(LCDD4_MARK, PSB5_PSB4_FN1, PTM4_FN),
+	PINMUX_DATA(DV_D4_MARK, PSB5_PSB4_FN2, PTM4_FN),
+	PINMUX_DATA(LCDD3_MARK, PSB5_PSB4_FN1, PTM3_FN),
+	PINMUX_DATA(DV_D3_MARK, PSB5_PSB4_FN2, PTM3_FN),
+	PINMUX_DATA(LCDD2_MARK, PSB5_PSB4_FN1, PTM2_FN),
+	PINMUX_DATA(DV_D2_MARK, PSB5_PSB4_FN2, PTM2_FN),
+	PINMUX_DATA(LCDD1_MARK, PSB5_PSB4_FN1, PTM1_FN),
+	PINMUX_DATA(DV_D1_MARK, PSB5_PSB4_FN2, PTM1_FN),
+	PINMUX_DATA(LCDD0_MARK, PSB5_PSB4_FN1, PTM0_FN),
+	PINMUX_DATA(DV_D0_MARK, PSB5_PSB4_FN2, PTM0_FN),
+
+	/* PTN FN */
+	PINMUX_DATA(LCDD23_MARK, PSB3_PSB2_FN1, PTN7_FN),
+	PINMUX_DATA(SCIF5_PTN_SCK_MARK, PSB3_PSB2_FN2, PTN7_FN),
+	PINMUX_DATA(LCDD22_MARK, PSB3_PSB2_FN1, PTN6_FN),
+	PINMUX_DATA(SCIF5_PTN_RXD_MARK, PSB3_PSB2_FN2, PTN6_FN),
+	PINMUX_DATA(LCDD21_MARK, PSB3_PSB2_FN1, PTN5_FN),
+	PINMUX_DATA(SCIF5_PTN_TXD_MARK, PSB3_PSB2_FN2, PTN5_FN),
+	PINMUX_DATA(LCDD20_MARK, PSB3_PSB2_FN1, PTN4_FN),
+	PINMUX_DATA(SCIF4_PTN_SCK_MARK, PSB3_PSB2_FN2, PTN4_FN),
+	PINMUX_DATA(LCDD19_MARK, PSB3_PSB2_FN1, PTN3_FN),
+	PINMUX_DATA(SCIF4_PTN_RXD_MARK, PSB3_PSB2_FN2, PTN3_FN),
+	PINMUX_DATA(LCDD18_MARK, PSB3_PSB2_FN1, PTN2_FN),
+	PINMUX_DATA(SCIF4_PTN_TXD_MARK, PSB3_PSB2_FN2, PTN2_FN),
+	PINMUX_DATA(LCDD17_MARK, PSB5_PSB4_FN1, PTN1_FN),
+	PINMUX_DATA(DV_VSYNC_MARK, PSB5_PSB4_FN2, PTN1_FN),
+	PINMUX_DATA(LCDD16_MARK, PSB5_PSB4_FN1, PTN0_FN),
+	PINMUX_DATA(DV_HSYNC_MARK, PSB5_PSB4_FN2, PTN0_FN),
+
+	/* PTQ FN */
+	PINMUX_DATA(AN3_MARK, PTQ3_FN),
+	PINMUX_DATA(AN2_MARK, PTQ2_FN),
+	PINMUX_DATA(AN1_MARK, PTQ1_FN),
+	PINMUX_DATA(AN0_MARK, PTQ0_FN),
+
+	/* PTR FN */
+	PINMUX_DATA(CS6B_CE1B_MARK, PTR7_FN),
+	PINMUX_DATA(CS6A_CE2B_MARK, PTR6_FN),
+	PINMUX_DATA(CS5B_CE1A_MARK, PTR5_FN),
+	PINMUX_DATA(CS5A_CE2A_MARK, PTR4_FN),
+	PINMUX_DATA(IOIS16_MARK, PSA13_PSA12_FN1, PTR3_FN),
+	PINMUX_DATA(LCDLCLK_PTR_MARK, PSA13_PSA12_FN2, PTR3_FN),
+	PINMUX_DATA(WAIT_MARK, PTR2_FN),
+	PINMUX_DATA(WE3_ICIOWR_MARK, PTR1_FN),
+	PINMUX_DATA(WE2_ICIORD_MARK, PTR0_FN),
+
+	/* PTS FN */
+	PINMUX_DATA(SCIF1_PTS_SCK_MARK, PSC15_PSC14_FN1, PTS7_FN),
+	PINMUX_DATA(SDHI0CD_PTS_MARK, PSC15_PSC14_FN2, PTS7_FN),
+	PINMUX_DATA(SCIF1_PTS_RXD_MARK, PSC15_PSC14_FN1, PTS6_FN),
+	PINMUX_DATA(SDHI0WP_PTS_MARK, PSC15_PSC14_FN2, PTS6_FN),
+	PINMUX_DATA(SCIF1_PTS_TXD_MARK, PSC15_PSC14_FN1, PTS5_FN),
+	PINMUX_DATA(SDHI0D3_PTS_MARK, PSC15_PSC14_FN2, PTS5_FN),
+	PINMUX_DATA(SCIF3_PTS_CTS_MARK, PSC15_PSC14_FN1, PTS4_FN),
+	PINMUX_DATA(SDHI0D2_PTS_MARK, PSC15_PSC14_FN2, PTS4_FN),
+	PINMUX_DATA(SCIF3_PTS_RTS_MARK, PSC15_PSC14_FN1, PTS3_FN),
+	PINMUX_DATA(SDHI0D1_PTS_MARK, PSC15_PSC14_FN2, PTS3_FN),
+	PINMUX_DATA(SCIF3_PTS_SCK_MARK, PSC15_PSC14_FN1, PTS2_FN),
+	PINMUX_DATA(SDHI0D0_PTS_MARK, PSC15_PSC14_FN2, PTS2_FN),
+	PINMUX_DATA(SCIF3_PTS_RXD_MARK, PSC15_PSC14_FN1, PTS1_FN),
+	PINMUX_DATA(SDHI0CMD_PTS_MARK, PSC15_PSC14_FN2, PTS1_FN),
+	PINMUX_DATA(SCIF3_PTS_TXD_MARK, PSC15_PSC14_FN1, PTS0_FN),
+	PINMUX_DATA(SDHI0CLK_PTS_MARK, PSC15_PSC14_FN2, PTS0_FN),
+
+	/* PTT FN */
+	PINMUX_DATA(SCIF0_PTT_SCK_MARK, PSC13_PSC12_FN1, PTT5_FN),
+	PINMUX_DATA(MSIOF0_PTT_TSCK_MARK, PSC13_PSC12_FN2, PTT5_FN),
+	PINMUX_DATA(SCIF0_PTT_RXD_MARK, PSC13_PSC12_FN1, PTT4_FN),
+	PINMUX_DATA(MSIOF0_PTT_RXD_MARK, PSC13_PSC12_FN2, PTT4_FN),
+	PINMUX_DATA(SCIF0_PTT_TXD_MARK, PSC13_PSC12_FN1, PTT3_FN),
+	PINMUX_DATA(MSIOF0_PTT_TXD_MARK, PSC13_PSC12_FN2, PTT3_FN),
+	PINMUX_DATA(SCIF2_PTT_SCK_MARK, PSC11_PSC10_FN1, PTT2_FN),
+	PINMUX_DATA(MSIOF0_PTT_TSYNC_MARK, PSC11_PSC10_FN2, PTT2_FN),
+	PINMUX_DATA(SCIF2_PTT_RXD_MARK, PSC11_PSC10_FN1, PTT1_FN),
+	PINMUX_DATA(MSIOF0_PTT_SS1_MARK, PSC11_PSC10_FN2, PTT1_FN),
+	PINMUX_DATA(MSIOF0_PTT_RSCK_MARK, PSC11_PSC10_FN3, PTT1_FN),
+	PINMUX_DATA(SCIF2_PTT_TXD_MARK, PSC11_PSC10_FN1, PTT0_FN),
+	PINMUX_DATA(MSIOF0_PTT_SS2_MARK, PSC11_PSC10_FN2, PTT0_FN),
+	PINMUX_DATA(MSIOF0_PTT_RSYNC_MARK, PSC11_PSC10_FN3, PTT0_FN),
+
+	/* PTU FN */
+	PINMUX_DATA(FCDE_MARK, PSC9_PSC8_FN1, PTU5_FN),
+	PINMUX_DATA(SCIF0_PTU_SCK_MARK, PSC9_PSC8_FN2, PTU5_FN),
+	PINMUX_DATA(FSC_MARK, PSC9_PSC8_FN1, PTU4_FN),
+	PINMUX_DATA(SCIF0_PTU_RXD_MARK, PSC9_PSC8_FN2, PTU4_FN),
+	PINMUX_DATA(FWE_MARK, PSC9_PSC8_FN1, PTU3_FN),
+	PINMUX_DATA(SCIF0_PTU_TXD_MARK, PSC9_PSC8_FN2, PTU3_FN),
+	PINMUX_DATA(FOE_MARK, PSC7_PSC6_FN1, PTU2_FN),
+	PINMUX_DATA(SCIF2_PTU_SCK_MARK, PSC7_PSC6_FN2, PTU2_FN),
+	PINMUX_DATA(VIO_VD2_MARK, PSC7_PSC6_FN3, PTU2_FN),
+	PINMUX_DATA(FRB_MARK, PSC7_PSC6_FN1, PTU1_FN),
+	PINMUX_DATA(SCIF2_PTU_RXD_MARK, PSC7_PSC6_FN2, PTU1_FN),
+	PINMUX_DATA(VIO_CLK2_MARK, PSC7_PSC6_FN3, PTU1_FN),
+	PINMUX_DATA(FCE_MARK, PSC7_PSC6_FN1, PTU0_FN),
+	PINMUX_DATA(SCIF2_PTU_TXD_MARK, PSC7_PSC6_FN2, PTU0_FN),
+	PINMUX_DATA(VIO_HD2_MARK, PSC7_PSC6_FN3, PTU0_FN),
+
+	/* PTV FN */
+	PINMUX_DATA(NAF7_MARK, PSC7_PSC6_FN1, PTV7_FN),
+	PINMUX_DATA(SCIF1_PTV_SCK_MARK, PSC7_PSC6_FN2, PTV7_FN),
+	PINMUX_DATA(VIO_D15_MARK, PSC7_PSC6_FN3, PTV7_FN),
+	PINMUX_DATA(NAF6_MARK, PSC7_PSC6_FN1, PTV6_FN),
+	PINMUX_DATA(SCIF1_PTV_RXD_MARK, PSC7_PSC6_FN2, PTV6_FN),
+	PINMUX_DATA(VIO_D14_MARK, PSC7_PSC6_FN3, PTV6_FN),
+	PINMUX_DATA(NAF5_MARK, PSC7_PSC6_FN1, PTV5_FN),
+	PINMUX_DATA(SCIF1_PTV_TXD_MARK, PSC7_PSC6_FN2, PTV5_FN),
+	PINMUX_DATA(VIO_D13_MARK, PSC7_PSC6_FN3, PTV5_FN),
+	PINMUX_DATA(NAF4_MARK, PSC7_PSC6_FN1, PTV4_FN),
+	PINMUX_DATA(SCIF3_PTV_CTS_MARK, PSC7_PSC6_FN2, PTV4_FN),
+	PINMUX_DATA(VIO_D12_MARK, PSC7_PSC6_FN3, PTV4_FN),
+	PINMUX_DATA(NAF3_MARK, PSC7_PSC6_FN1, PTV3_FN),
+	PINMUX_DATA(SCIF3_PTV_RTS_MARK, PSC7_PSC6_FN2, PTV3_FN),
+	PINMUX_DATA(VIO_D11_MARK, PSC7_PSC6_FN3, PTV3_FN),
+	PINMUX_DATA(NAF2_MARK, PSC7_PSC6_FN1, PTV2_FN),
+	PINMUX_DATA(SCIF3_PTV_SCK_MARK, PSC7_PSC6_FN2, PTV2_FN),
+	PINMUX_DATA(VIO_D10_MARK, PSC7_PSC6_FN3, PTV2_FN),
+	PINMUX_DATA(NAF1_MARK, PSC7_PSC6_FN1, PTV1_FN),
+	PINMUX_DATA(SCIF3_PTV_RXD_MARK, PSC7_PSC6_FN2, PTV1_FN),
+	PINMUX_DATA(VIO_D9_MARK, PSC7_PSC6_FN3, PTV1_FN),
+	PINMUX_DATA(NAF0_MARK, PSC7_PSC6_FN1, PTV0_FN),
+	PINMUX_DATA(SCIF3_PTV_TXD_MARK, PSC7_PSC6_FN2, PTV0_FN),
+	PINMUX_DATA(VIO_D8_MARK, PSC7_PSC6_FN3, PTV0_FN),
+
+	/* PTW FN */
+	PINMUX_DATA(IRQ7_MARK, PTW7_FN),
+	PINMUX_DATA(IRQ6_MARK, PTW6_FN),
+	PINMUX_DATA(IRQ5_MARK, PTW5_FN),
+	PINMUX_DATA(IRQ4_MARK, PSD15_PSD14_FN1, PTW4_FN),
+	PINMUX_DATA(LCDLCLK_PTW_MARK, PSD15_PSD14_FN2, PTW4_FN),
+	PINMUX_DATA(IRQ3_MARK, PSD13_PSD12_FN1, PTW3_FN),
+	PINMUX_DATA(ADTRG_MARK, PSD13_PSD12_FN2, PTW3_FN),
+	PINMUX_DATA(IRQ2_MARK, PSD11_PSD10_FN1, PTW2_FN),
+	PINMUX_DATA(BS_MARK, PSD11_PSD10_FN2, PTW2_FN),
+	PINMUX_DATA(VIO_CKO_MARK, PSD11_PSD10_FN3, PTW2_FN),
+	PINMUX_DATA(IRQ1_MARK, PSD9_PSD8_FN1, PTW1_FN),
+	PINMUX_DATA(SIUAISPD_MARK, PSD9_PSD8_FN2, PTW1_FN),
+	PINMUX_DATA(IRQ0_MARK, PSD7_PSD6_FN1, PTW0_FN),
+	PINMUX_DATA(SIUAOSPD_MARK, PSD7_PSD6_FN2, PTW0_FN),
+
+	/* PTX FN */
+	PINMUX_DATA(DACK1_MARK, PTX7_FN),
+	PINMUX_DATA(DREQ1_MARK, PSD3_PSD2_FN1, PTX6_FN),
+	PINMUX_DATA(MSIOF0_PTX_MCK_MARK, PSD3_PSD2_FN2, PTX6_FN),
+	PINMUX_DATA(DACK1_MARK, PTX5_FN),
+	PINMUX_DATA(IRDA_OUT_MARK, PSD5_PSD4_FN2, PTX5_FN),
+	PINMUX_DATA(DREQ1_MARK, PTX4_FN),
+	PINMUX_DATA(IRDA_IN_MARK, PSD5_PSD4_FN2, PTX4_FN),
+	PINMUX_DATA(TS0_SDAT_MARK, PTX3_FN),
+	PINMUX_DATA(TS0_SCK_MARK, PTX2_FN),
+	PINMUX_DATA(TS0_SDEN_MARK, PTX1_FN),
+	PINMUX_DATA(TS0_SPSYNC_MARK, PTX0_FN),
+
+	/* PTY FN */
+	PINMUX_DATA(VIO_D7_MARK, PTY7_FN),
+	PINMUX_DATA(VIO_D6_MARK, PTY6_FN),
+	PINMUX_DATA(VIO_D5_MARK, PTY5_FN),
+	PINMUX_DATA(VIO_D4_MARK, PTY4_FN),
+	PINMUX_DATA(VIO_D3_MARK, PTY3_FN),
+	PINMUX_DATA(VIO_D2_MARK, PTY2_FN),
+	PINMUX_DATA(VIO_D1_MARK, PTY1_FN),
+	PINMUX_DATA(VIO_D0_MARK, PTY0_FN),
+
+	/* PTZ FN */
+	PINMUX_DATA(SIUBOBT_MARK, PTZ7_FN),
+	PINMUX_DATA(SIUBOLR_MARK, PTZ6_FN),
+	PINMUX_DATA(SIUBOSLD_MARK, PTZ5_FN),
+	PINMUX_DATA(SIUBMCK_MARK, PTZ4_FN),
+	PINMUX_DATA(VIO_FLD_MARK, PSD1_PSD0_FN1, PTZ3_FN),
+	PINMUX_DATA(SIUBFCK_MARK, PSD1_PSD0_FN2, PTZ3_FN),
+	PINMUX_DATA(VIO_HD1_MARK, PSD1_PSD0_FN1, PTZ2_FN),
+	PINMUX_DATA(SIUBILR_MARK, PSD1_PSD0_FN2, PTZ2_FN),
+	PINMUX_DATA(VIO_VD1_MARK, PSD1_PSD0_FN1, PTZ1_FN),
+	PINMUX_DATA(SIUBIBT_MARK, PSD1_PSD0_FN2, PTZ1_FN),
+	PINMUX_DATA(VIO_CLK1_MARK, PSD1_PSD0_FN1, PTZ0_FN),
+	PINMUX_DATA(SIUBISLD_MARK, PSD1_PSD0_FN2, PTZ0_FN),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+	/* PTA */
+	PINMUX_GPIO(GPIO_PTA7, PTA7_DATA),
+	PINMUX_GPIO(GPIO_PTA6, PTA6_DATA),
+	PINMUX_GPIO(GPIO_PTA5, PTA5_DATA),
+	PINMUX_GPIO(GPIO_PTA4, PTA4_DATA),
+	PINMUX_GPIO(GPIO_PTA3, PTA3_DATA),
+	PINMUX_GPIO(GPIO_PTA2, PTA2_DATA),
+	PINMUX_GPIO(GPIO_PTA1, PTA1_DATA),
+	PINMUX_GPIO(GPIO_PTA0, PTA0_DATA),
+
+	/* PTB */
+	PINMUX_GPIO(GPIO_PTB7, PTB7_DATA),
+	PINMUX_GPIO(GPIO_PTB6, PTB6_DATA),
+	PINMUX_GPIO(GPIO_PTB5, PTB5_DATA),
+	PINMUX_GPIO(GPIO_PTB4, PTB4_DATA),
+	PINMUX_GPIO(GPIO_PTB3, PTB3_DATA),
+	PINMUX_GPIO(GPIO_PTB2, PTB2_DATA),
+	PINMUX_GPIO(GPIO_PTB1, PTB1_DATA),
+	PINMUX_GPIO(GPIO_PTB0, PTB0_DATA),
+
+	/* PTC */
+	PINMUX_GPIO(GPIO_PTC7, PTC7_DATA),
+	PINMUX_GPIO(GPIO_PTC6, PTC6_DATA),
+	PINMUX_GPIO(GPIO_PTC5, PTC5_DATA),
+	PINMUX_GPIO(GPIO_PTC4, PTC4_DATA),
+	PINMUX_GPIO(GPIO_PTC3, PTC3_DATA),
+	PINMUX_GPIO(GPIO_PTC2, PTC2_DATA),
+	PINMUX_GPIO(GPIO_PTC1, PTC1_DATA),
+	PINMUX_GPIO(GPIO_PTC0, PTC0_DATA),
+
+	/* PTD */
+	PINMUX_GPIO(GPIO_PTD7, PTD7_DATA),
+	PINMUX_GPIO(GPIO_PTD6, PTD6_DATA),
+	PINMUX_GPIO(GPIO_PTD5, PTD5_DATA),
+	PINMUX_GPIO(GPIO_PTD4, PTD4_DATA),
+	PINMUX_GPIO(GPIO_PTD3, PTD3_DATA),
+	PINMUX_GPIO(GPIO_PTD2, PTD2_DATA),
+	PINMUX_GPIO(GPIO_PTD1, PTD1_DATA),
+	PINMUX_GPIO(GPIO_PTD0, PTD0_DATA),
+
+	/* PTE */
+	PINMUX_GPIO(GPIO_PTE5, PTE5_DATA),
+	PINMUX_GPIO(GPIO_PTE4, PTE4_DATA),
+	PINMUX_GPIO(GPIO_PTE3, PTE3_DATA),
+	PINMUX_GPIO(GPIO_PTE2, PTE2_DATA),
+	PINMUX_GPIO(GPIO_PTE1, PTE1_DATA),
+	PINMUX_GPIO(GPIO_PTE0, PTE0_DATA),
+
+	/* PTF */
+	PINMUX_GPIO(GPIO_PTF7, PTF7_DATA),
+	PINMUX_GPIO(GPIO_PTF6, PTF6_DATA),
+	PINMUX_GPIO(GPIO_PTF5, PTF5_DATA),
+	PINMUX_GPIO(GPIO_PTF4, PTF4_DATA),
+	PINMUX_GPIO(GPIO_PTF3, PTF3_DATA),
+	PINMUX_GPIO(GPIO_PTF2, PTF2_DATA),
+	PINMUX_GPIO(GPIO_PTF1, PTF1_DATA),
+	PINMUX_GPIO(GPIO_PTF0, PTF0_DATA),
+
+	/* PTG */
+	PINMUX_GPIO(GPIO_PTG5, PTG5_DATA),
+	PINMUX_GPIO(GPIO_PTG4, PTG4_DATA),
+	PINMUX_GPIO(GPIO_PTG3, PTG3_DATA),
+	PINMUX_GPIO(GPIO_PTG2, PTG2_DATA),
+	PINMUX_GPIO(GPIO_PTG1, PTG1_DATA),
+	PINMUX_GPIO(GPIO_PTG0, PTG0_DATA),
+
+	/* PTH */
+	PINMUX_GPIO(GPIO_PTH7, PTH7_DATA),
+	PINMUX_GPIO(GPIO_PTH6, PTH6_DATA),
+	PINMUX_GPIO(GPIO_PTH5, PTH5_DATA),
+	PINMUX_GPIO(GPIO_PTH4, PTH4_DATA),
+	PINMUX_GPIO(GPIO_PTH3, PTH3_DATA),
+	PINMUX_GPIO(GPIO_PTH2, PTH2_DATA),
+	PINMUX_GPIO(GPIO_PTH1, PTH1_DATA),
+	PINMUX_GPIO(GPIO_PTH0, PTH0_DATA),
+
+	/* PTJ */
+	PINMUX_GPIO(GPIO_PTJ7, PTJ7_DATA),
+	PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA),
+	PINMUX_GPIO(GPIO_PTJ3, PTJ3_DATA),
+	PINMUX_GPIO(GPIO_PTJ2, PTJ2_DATA),
+	PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA),
+	PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA),
+
+	/* PTK */
+	PINMUX_GPIO(GPIO_PTK7, PTK7_DATA),
+	PINMUX_GPIO(GPIO_PTK6, PTK6_DATA),
+	PINMUX_GPIO(GPIO_PTK5, PTK5_DATA),
+	PINMUX_GPIO(GPIO_PTK4, PTK4_DATA),
+	PINMUX_GPIO(GPIO_PTK3, PTK3_DATA),
+	PINMUX_GPIO(GPIO_PTK2, PTK2_DATA),
+	PINMUX_GPIO(GPIO_PTK1, PTK1_DATA),
+	PINMUX_GPIO(GPIO_PTK0, PTK0_DATA),
+
+	/* PTL */
+	PINMUX_GPIO(GPIO_PTL7, PTL7_DATA),
+	PINMUX_GPIO(GPIO_PTL6, PTL6_DATA),
+	PINMUX_GPIO(GPIO_PTL5, PTL5_DATA),
+	PINMUX_GPIO(GPIO_PTL4, PTL4_DATA),
+	PINMUX_GPIO(GPIO_PTL3, PTL3_DATA),
+	PINMUX_GPIO(GPIO_PTL2, PTL2_DATA),
+	PINMUX_GPIO(GPIO_PTL1, PTL1_DATA),
+	PINMUX_GPIO(GPIO_PTL0, PTL0_DATA),
+
+	/* PTM */
+	PINMUX_GPIO(GPIO_PTM7, PTM7_DATA),
+	PINMUX_GPIO(GPIO_PTM6, PTM6_DATA),
+	PINMUX_GPIO(GPIO_PTM5, PTM5_DATA),
+	PINMUX_GPIO(GPIO_PTM4, PTM4_DATA),
+	PINMUX_GPIO(GPIO_PTM3, PTM3_DATA),
+	PINMUX_GPIO(GPIO_PTM2, PTM2_DATA),
+	PINMUX_GPIO(GPIO_PTM1, PTM1_DATA),
+	PINMUX_GPIO(GPIO_PTM0, PTM0_DATA),
+
+	/* PTN */
+	PINMUX_GPIO(GPIO_PTN7, PTN7_DATA),
+	PINMUX_GPIO(GPIO_PTN6, PTN6_DATA),
+	PINMUX_GPIO(GPIO_PTN5, PTN5_DATA),
+	PINMUX_GPIO(GPIO_PTN4, PTN4_DATA),
+	PINMUX_GPIO(GPIO_PTN3, PTN3_DATA),
+	PINMUX_GPIO(GPIO_PTN2, PTN2_DATA),
+	PINMUX_GPIO(GPIO_PTN1, PTN1_DATA),
+	PINMUX_GPIO(GPIO_PTN0, PTN0_DATA),
+
+	/* PTQ */
+	PINMUX_GPIO(GPIO_PTQ3, PTQ3_DATA),
+	PINMUX_GPIO(GPIO_PTQ2, PTQ2_DATA),
+	PINMUX_GPIO(GPIO_PTQ1, PTQ1_DATA),
+	PINMUX_GPIO(GPIO_PTQ0, PTQ0_DATA),
+
+	/* PTR */
+	PINMUX_GPIO(GPIO_PTR7, PTR7_DATA),
+	PINMUX_GPIO(GPIO_PTR6, PTR6_DATA),
+	PINMUX_GPIO(GPIO_PTR5, PTR5_DATA),
+	PINMUX_GPIO(GPIO_PTR4, PTR4_DATA),
+	PINMUX_GPIO(GPIO_PTR3, PTR3_DATA),
+	PINMUX_GPIO(GPIO_PTR2, PTR2_DATA),
+	PINMUX_GPIO(GPIO_PTR1, PTR1_DATA),
+	PINMUX_GPIO(GPIO_PTR0, PTR0_DATA),
+
+	/* PTS */
+	PINMUX_GPIO(GPIO_PTS7, PTS7_DATA),
+	PINMUX_GPIO(GPIO_PTS6, PTS6_DATA),
+	PINMUX_GPIO(GPIO_PTS5, PTS5_DATA),
+	PINMUX_GPIO(GPIO_PTS4, PTS4_DATA),
+	PINMUX_GPIO(GPIO_PTS3, PTS3_DATA),
+	PINMUX_GPIO(GPIO_PTS2, PTS2_DATA),
+	PINMUX_GPIO(GPIO_PTS1, PTS1_DATA),
+	PINMUX_GPIO(GPIO_PTS0, PTS0_DATA),
+
+	/* PTT */
+	PINMUX_GPIO(GPIO_PTT5, PTT5_DATA),
+	PINMUX_GPIO(GPIO_PTT4, PTT4_DATA),
+	PINMUX_GPIO(GPIO_PTT3, PTT3_DATA),
+	PINMUX_GPIO(GPIO_PTT2, PTT2_DATA),
+	PINMUX_GPIO(GPIO_PTT1, PTT1_DATA),
+	PINMUX_GPIO(GPIO_PTT0, PTT0_DATA),
+
+	/* PTU */
+	PINMUX_GPIO(GPIO_PTU5, PTU5_DATA),
+	PINMUX_GPIO(GPIO_PTU4, PTU4_DATA),
+	PINMUX_GPIO(GPIO_PTU3, PTU3_DATA),
+	PINMUX_GPIO(GPIO_PTU2, PTU2_DATA),
+	PINMUX_GPIO(GPIO_PTU1, PTU1_DATA),
+	PINMUX_GPIO(GPIO_PTU0, PTU0_DATA),
+
+	/* PTV */
+	PINMUX_GPIO(GPIO_PTV7, PTV7_DATA),
+	PINMUX_GPIO(GPIO_PTV6, PTV6_DATA),
+	PINMUX_GPIO(GPIO_PTV5, PTV5_DATA),
+	PINMUX_GPIO(GPIO_PTV4, PTV4_DATA),
+	PINMUX_GPIO(GPIO_PTV3, PTV3_DATA),
+	PINMUX_GPIO(GPIO_PTV2, PTV2_DATA),
+	PINMUX_GPIO(GPIO_PTV1, PTV1_DATA),
+	PINMUX_GPIO(GPIO_PTV0, PTV0_DATA),
+
+	/* PTW */
+	PINMUX_GPIO(GPIO_PTW7, PTW7_DATA),
+	PINMUX_GPIO(GPIO_PTW6, PTW6_DATA),
+	PINMUX_GPIO(GPIO_PTW5, PTW5_DATA),
+	PINMUX_GPIO(GPIO_PTW4, PTW4_DATA),
+	PINMUX_GPIO(GPIO_PTW3, PTW3_DATA),
+	PINMUX_GPIO(GPIO_PTW2, PTW2_DATA),
+	PINMUX_GPIO(GPIO_PTW1, PTW1_DATA),
+	PINMUX_GPIO(GPIO_PTW0, PTW0_DATA),
+
+	/* PTX */
+	PINMUX_GPIO(GPIO_PTX7, PTX7_DATA),
+	PINMUX_GPIO(GPIO_PTX6, PTX6_DATA),
+	PINMUX_GPIO(GPIO_PTX5, PTX5_DATA),
+	PINMUX_GPIO(GPIO_PTX4, PTX4_DATA),
+	PINMUX_GPIO(GPIO_PTX3, PTX3_DATA),
+	PINMUX_GPIO(GPIO_PTX2, PTX2_DATA),
+	PINMUX_GPIO(GPIO_PTX1, PTX1_DATA),
+	PINMUX_GPIO(GPIO_PTX0, PTX0_DATA),
+
+	/* PTY */
+	PINMUX_GPIO(GPIO_PTY7, PTY7_DATA),
+	PINMUX_GPIO(GPIO_PTY6, PTY6_DATA),
+	PINMUX_GPIO(GPIO_PTY5, PTY5_DATA),
+	PINMUX_GPIO(GPIO_PTY4, PTY4_DATA),
+	PINMUX_GPIO(GPIO_PTY3, PTY3_DATA),
+	PINMUX_GPIO(GPIO_PTY2, PTY2_DATA),
+	PINMUX_GPIO(GPIO_PTY1, PTY1_DATA),
+	PINMUX_GPIO(GPIO_PTY0, PTY0_DATA),
+
+	/* PTZ */
+	PINMUX_GPIO(GPIO_PTZ7, PTZ7_DATA),
+	PINMUX_GPIO(GPIO_PTZ6, PTZ6_DATA),
+	PINMUX_GPIO(GPIO_PTZ5, PTZ5_DATA),
+	PINMUX_GPIO(GPIO_PTZ4, PTZ4_DATA),
+	PINMUX_GPIO(GPIO_PTZ3, PTZ3_DATA),
+	PINMUX_GPIO(GPIO_PTZ2, PTZ2_DATA),
+	PINMUX_GPIO(GPIO_PTZ1, PTZ1_DATA),
+	PINMUX_GPIO(GPIO_PTZ0, PTZ0_DATA),
+
+	/* SCIF0 */
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTT_TXD, SCIF0_PTT_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTT_RXD, SCIF0_PTT_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTT_SCK, SCIF0_PTT_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTU_TXD, SCIF0_PTU_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTU_RXD, SCIF0_PTU_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_PTU_SCK, SCIF0_PTU_SCK_MARK),
+
+	/* SCIF1 */
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTS_TXD, SCIF1_PTS_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTS_RXD, SCIF1_PTS_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTS_SCK, SCIF1_PTS_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTV_TXD, SCIF1_PTV_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTV_RXD, SCIF1_PTV_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_PTV_SCK, SCIF1_PTV_SCK_MARK),
+
+	/* SCIF2 */
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTT_TXD, SCIF2_PTT_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTT_RXD, SCIF2_PTT_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTT_SCK, SCIF2_PTT_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTU_TXD, SCIF2_PTU_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTU_RXD, SCIF2_PTU_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_PTU_SCK, SCIF2_PTU_SCK_MARK),
+
+	/* SCIF3 */
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_TXD, SCIF3_PTS_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_RXD, SCIF3_PTS_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_SCK, SCIF3_PTS_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_RTS, SCIF3_PTS_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTS_CTS, SCIF3_PTS_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_TXD, SCIF3_PTV_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_RXD, SCIF3_PTV_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_SCK, SCIF3_PTV_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_RTS, SCIF3_PTV_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_PTV_CTS, SCIF3_PTV_CTS_MARK),
+
+	/* SCIF4 */
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTE_TXD, SCIF4_PTE_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTE_RXD, SCIF4_PTE_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTE_SCK, SCIF4_PTE_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTN_TXD, SCIF4_PTN_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTN_RXD, SCIF4_PTN_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_PTN_SCK, SCIF4_PTN_SCK_MARK),
+
+	/* SCIF5 */
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTE_TXD, SCIF5_PTE_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTE_RXD, SCIF5_PTE_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTE_SCK, SCIF5_PTE_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTN_TXD, SCIF5_PTN_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTN_RXD, SCIF5_PTN_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_PTN_SCK, SCIF5_PTN_SCK_MARK),
+
+	/* CEU */
+	PINMUX_GPIO(GPIO_FN_VIO_D15, VIO_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D14, VIO_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D13, VIO_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D12, VIO_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D11, VIO_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D10, VIO_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D9, VIO_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D8, VIO_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D7, VIO_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D6, VIO_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D5, VIO_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D4, VIO_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D3, VIO_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D2, VIO_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D1, VIO_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_D0, VIO_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CLK1, VIO_CLK1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_VD1, VIO_VD1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_HD1, VIO_HD1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_FLD, VIO_FLD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CKO, VIO_CKO_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_VD2, VIO_VD2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_HD2, VIO_HD2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO_CLK2, VIO_CLK2_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCDD23, LCDD23_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD22, LCDD22_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD21, LCDD21_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD20, LCDD20_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD19, LCDD19_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD18, LCDD18_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD17, LCDD17_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD16, LCDD16_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD15, LCDD15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD14, LCDD14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD13, LCDD13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD12, LCDD12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD11, LCDD11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD10, LCDD10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD9, LCDD9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD8, LCDD8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD7, LCDD7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD6, LCDD6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD5, LCDD5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD4, LCDD4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD3, LCDD3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD2, LCDD2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD1, LCDD1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD0, LCDD0_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDLCLK_PTR, LCDLCLK_PTR_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDLCLK_PTW, LCDLCLK_PTW_MARK),
+	/* Main LCD */
+	PINMUX_GPIO(GPIO_FN_LCDDON, LCDDON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVCPWC, LCDVCPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVEPWC, LCDVEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVSYN, LCDVSYN_MARK),
+	/* Main LCD - RGB Mode */
+	PINMUX_GPIO(GPIO_FN_LCDDCK, LCDDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDHSYN, LCDHSYN_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDISP, LCDDISP_MARK),
+	/* Main LCD - SYS Mode */
+	PINMUX_GPIO(GPIO_FN_LCDRS, LCDRS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDCS, LCDCS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDWR, LCDWR_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDRD, LCDRD_MARK),
+
+	/* IRQ */
+	PINMUX_GPIO(GPIO_FN_IRQ0, IRQ0_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ1, IRQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ2, IRQ2_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ3, IRQ3_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ4, IRQ4_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ5, IRQ5_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ6, IRQ6_MARK),
+	PINMUX_GPIO(GPIO_FN_IRQ7, IRQ7_MARK),
+
+	/* AUD */
+	PINMUX_GPIO(GPIO_FN_AUDCK, AUDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDSYNC, AUDSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA3, AUDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA2, AUDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA1, AUDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA0, AUDATA0_MARK),
+
+	/* SDHI0 (PTD) */
+	PINMUX_GPIO(GPIO_FN_SDHI0CD_PTD, SDHI0CD_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0WP_PTD, SDHI0WP_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D3_PTD, SDHI0D3_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D2_PTD, SDHI0D2_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D1_PTD, SDHI0D1_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D0_PTD, SDHI0D0_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CMD_PTD, SDHI0CMD_PTD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CLK_PTD, SDHI0CLK_PTD_MARK),
+
+	/* SDHI0 (PTS) */
+	PINMUX_GPIO(GPIO_FN_SDHI0CD_PTS, SDHI0CD_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0WP_PTS, SDHI0WP_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D3_PTS, SDHI0D3_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D2_PTS, SDHI0D2_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D1_PTS, SDHI0D1_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D0_PTS, SDHI0D0_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CMD_PTS, SDHI0CMD_PTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CLK_PTS, SDHI0CLK_PTS_MARK),
+
+	/* SDHI1 */
+	PINMUX_GPIO(GPIO_FN_SDHI1CD, SDHI1CD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1WP, SDHI1WP_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D3, SDHI1D3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D2, SDHI1D2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D1, SDHI1D1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D0, SDHI1D0_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1CMD, SDHI1CMD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1CLK, SDHI1CLK_MARK),
+
+	/* SIUA */
+	PINMUX_GPIO(GPIO_FN_SIUAFCK, SIUAFCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAILR, SIUAILR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAIBT, SIUAIBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAISLD, SIUAISLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOLR, SIUAOLR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOBT, SIUAOBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAOSLD, SIUAOSLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAMCK, SIUAMCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUAISPD, SIUAISPD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUOSPD, SIUAOSPD_MARK),
+
+	/* SIUB */
+	PINMUX_GPIO(GPIO_FN_SIUBFCK, SIUBFCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBILR, SIUBILR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBIBT, SIUBIBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBISLD, SIUBISLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOLR, SIUBOLR_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOBT, SIUBOBT_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBOSLD, SIUBOSLD_MARK),
+	PINMUX_GPIO(GPIO_FN_SIUBMCK, SIUBMCK_MARK),
+
+	/* IRDA */
+	PINMUX_GPIO(GPIO_FN_IRDA_IN, IRDA_IN_MARK),
+	PINMUX_GPIO(GPIO_FN_IRDA_OUT, IRDA_OUT_MARK),
+
+	/* VOU */
+	PINMUX_GPIO(GPIO_FN_DV_CLKI, DV_CLKI_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_CLK, DV_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_HSYNC, DV_HSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_VSYNC, DV_VSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D15, DV_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D14, DV_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D13, DV_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D12, DV_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D11, DV_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D10, DV_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D9, DV_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D8, DV_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D7, DV_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D6, DV_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D5, DV_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D4, DV_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D3, DV_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D2, DV_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D1, DV_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D0, DV_D0_MARK),
+
+	/* KEYSC */
+	PINMUX_GPIO(GPIO_FN_KEYIN0, KEYIN0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN1, KEYIN1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN2, KEYIN2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN3, KEYIN3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN4, KEYIN4_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT0, KEYOUT0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT1, KEYOUT1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT2, KEYOUT2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT3, KEYOUT3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT4_IN6, KEYOUT4_IN6_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT5_IN5, KEYOUT5_IN5_MARK),
+
+	/* MSIOF0 (PTF) */
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_TXD, MSIOF0_PTF_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_RXD, MSIOF0_PTF_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_MCK, MSIOF0_PTF_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_TSYNC, MSIOF0_PTF_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_TSCK, MSIOF0_PTF_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_RSYNC, MSIOF0_PTF_RSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_RSCK, MSIOF0_PTF_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_SS1, MSIOF0_PTF_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTF_SS2, MSIOF0_PTF_SS2_MARK),
+
+	/* MSIOF0 (PTT+PTX) */
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_TXD, MSIOF0_PTT_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_RXD, MSIOF0_PTT_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTX_MCK, MSIOF0_PTX_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_TSYNC, MSIOF0_PTT_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_TSCK, MSIOF0_PTT_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_RSYNC, MSIOF0_PTT_RSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_RSCK, MSIOF0_PTT_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_SS1, MSIOF0_PTT_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_PTT_SS2, MSIOF0_PTT_SS2_MARK),
+
+	/* MSIOF1 */
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TXD, MSIOF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RXD, MSIOF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_MCK, MSIOF1_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TSYNC, MSIOF1_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TSCK, MSIOF1_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RSYNC, MSIOF1_RSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RSCK, MSIOF1_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_SS1, MSIOF1_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_SS2, MSIOF1_SS2_MARK),
+
+	/* TSIF */
+	PINMUX_GPIO(GPIO_FN_TS0_SDAT, TS0_SDAT_MARK),
+	PINMUX_GPIO(GPIO_FN_TS0_SCK, TS0_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_TS0_SDEN, TS0_SDEN_MARK),
+	PINMUX_GPIO(GPIO_FN_TS0_SPSYNC, TS0_SPSYNC_MARK),
+
+	/* FLCTL */
+	PINMUX_GPIO(GPIO_FN_FCE, FCE_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF7, NAF7_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF6, NAF6_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF5, NAF5_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF4, NAF4_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF3, NAF3_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF2, NAF2_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF1, NAF1_MARK),
+	PINMUX_GPIO(GPIO_FN_NAF0, NAF0_MARK),
+	PINMUX_GPIO(GPIO_FN_FCDE, FCDE_MARK),
+	PINMUX_GPIO(GPIO_FN_FOE, FOE_MARK),
+	PINMUX_GPIO(GPIO_FN_FSC, FSC_MARK),
+	PINMUX_GPIO(GPIO_FN_FWE, FWE_MARK),
+	PINMUX_GPIO(GPIO_FN_FRB, FRB_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_DACK1, DACK1_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ1, DREQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_DACK0, DACK0_MARK),
+	PINMUX_GPIO(GPIO_FN_DREQ0, DREQ0_MARK),
+
+	/* ADC */
+	PINMUX_GPIO(GPIO_FN_AN3, AN3_MARK),
+	PINMUX_GPIO(GPIO_FN_AN2, AN2_MARK),
+	PINMUX_GPIO(GPIO_FN_AN1, AN1_MARK),
+	PINMUX_GPIO(GPIO_FN_AN0, AN0_MARK),
+	PINMUX_GPIO(GPIO_FN_ADTRG, ADTRG_MARK),
+
+	/* CPG */
+	PINMUX_GPIO(GPIO_FN_STATUS0, STATUS0_MARK),
+	PINMUX_GPIO(GPIO_FN_PDSTATUS, PDSTATUS_MARK),
+
+	/* TPU */
+	PINMUX_GPIO(GPIO_FN_TPUTO0, TPUTO0_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO1, TPUTO1_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO2, TPUTO2_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO3, TPUTO3_MARK),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_D31, D31_MARK),
+	PINMUX_GPIO(GPIO_FN_D30, D30_MARK),
+	PINMUX_GPIO(GPIO_FN_D29, D29_MARK),
+	PINMUX_GPIO(GPIO_FN_D28, D28_MARK),
+	PINMUX_GPIO(GPIO_FN_D27, D27_MARK),
+	PINMUX_GPIO(GPIO_FN_D26, D26_MARK),
+	PINMUX_GPIO(GPIO_FN_D25, D25_MARK),
+	PINMUX_GPIO(GPIO_FN_D24, D24_MARK),
+	PINMUX_GPIO(GPIO_FN_D23, D23_MARK),
+	PINMUX_GPIO(GPIO_FN_D22, D22_MARK),
+	PINMUX_GPIO(GPIO_FN_D21, D21_MARK),
+	PINMUX_GPIO(GPIO_FN_D20, D20_MARK),
+	PINMUX_GPIO(GPIO_FN_D19, D19_MARK),
+	PINMUX_GPIO(GPIO_FN_D18, D18_MARK),
+	PINMUX_GPIO(GPIO_FN_D17, D17_MARK),
+	PINMUX_GPIO(GPIO_FN_D16, D16_MARK),
+	PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_WAIT, WAIT_MARK),
+	PINMUX_GPIO(GPIO_FN_BS, BS_MARK),
+	PINMUX_GPIO(GPIO_FN_A25, A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24, A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23, A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22, A22_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6B_CE1B, CS6B_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6A_CE2B, CS6A_CE2B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5B_CE1A, CS5B_CE1A_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5A_CE2A, CS5A_CE2A_MARK),
+	PINMUX_GPIO(GPIO_FN_WE3_ICIOWR, WE3_ICIOWR_MARK),
+	PINMUX_GPIO(GPIO_FN_WE2_ICIORD, WE2_ICIORD_MARK),
+
+	/* ATAPI */
+	PINMUX_GPIO(GPIO_FN_IDED15, IDED15_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED14, IDED14_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED13, IDED13_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED12, IDED12_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED11, IDED11_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED10, IDED10_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED9, IDED9_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED8, IDED8_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED7, IDED7_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED6, IDED6_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED5, IDED5_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED4, IDED4_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED3, IDED3_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED2, IDED2_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED1, IDED1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED0, IDED0_MARK),
+	PINMUX_GPIO(GPIO_FN_DIRECTION, DIRECTION_MARK),
+	PINMUX_GPIO(GPIO_FN_EXBUF_ENB, EXBUF_ENB_MARK),
+	PINMUX_GPIO(GPIO_FN_IDERST, IDERST_MARK),
+	PINMUX_GPIO(GPIO_FN_IODACK, IODACK_MARK),
+	PINMUX_GPIO(GPIO_FN_IODREQ, IODREQ_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIORDY, IDEIORDY_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEINT, IDEINT_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIOWR, IDEIOWR_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIORD, IDEIORD_MARK),
+	PINMUX_GPIO(GPIO_FN_IDECS1, IDECS1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDECS0, IDECS0_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA2, IDEA2_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA1, IDEA1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA0, IDEA0_MARK),
+ };
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) {
+		PTA7_FN, PTA7_OUT, 0, PTA7_IN,
+		PTA6_FN, PTA6_OUT, 0, PTA6_IN,
+		PTA5_FN, PTA5_OUT, 0, PTA5_IN,
+		PTA4_FN, PTA4_OUT, PTA4_IN_PU, PTA4_IN,
+		PTA3_FN, PTA3_OUT, PTA3_IN_PU, PTA3_IN,
+		PTA2_FN, PTA2_OUT, PTA2_IN_PU, PTA2_IN,
+		PTA1_FN, PTA1_OUT, PTA1_IN_PU, PTA1_IN,
+		PTA0_FN, PTA0_OUT, PTA0_IN_PU, PTA0_IN }
+	},
+	{ PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) {
+		PTB7_FN, PTB7_OUT, 0, PTB7_IN,
+		PTB6_FN, PTB6_OUT, 0, PTB6_IN,
+		PTB5_FN, PTB5_OUT, 0, PTB5_IN,
+		PTB4_FN, PTB4_OUT, 0, PTB4_IN,
+		PTB3_FN, PTB3_OUT, 0, PTB3_IN,
+		PTB2_FN, PTB2_OUT, PTB2_IN_PU, PTB2_IN,
+		PTB1_FN, PTB1_OUT, PTB1_IN_PU, PTB1_IN,
+		PTB0_FN, PTB0_OUT, 0, PTB0_IN }
+	},
+	{ PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) {
+		PTC7_FN, PTC7_OUT, 0, PTC7_IN,
+		PTC6_FN, PTC6_OUT, 0, PTC6_IN,
+		PTC5_FN, PTC5_OUT, 0, PTC5_IN,
+		PTC4_FN, PTC4_OUT, 0, PTC4_IN,
+		PTC3_FN, PTC3_OUT, 0, PTC3_IN,
+		PTC2_FN, PTC2_OUT, 0, PTC2_IN,
+		PTC1_FN, PTC1_OUT, 0, PTC1_IN,
+		PTC0_FN, PTC0_OUT, 0, PTC0_IN }
+	},
+	{ PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) {
+		PTD7_FN, PTD7_OUT, 0, PTD7_IN,
+		PTD6_FN, PTD6_OUT, 0, PTD6_IN,
+		PTD5_FN, PTD5_OUT, 0, PTD5_IN,
+		PTD4_FN, PTD4_OUT, 0, PTD4_IN,
+		PTD3_FN, PTD3_OUT, 0, PTD3_IN,
+		PTD2_FN, PTD2_OUT, 0, PTD2_IN,
+		PTD1_FN, PTD1_OUT, 0, PTD1_IN,
+		PTD0_FN, PTD0_OUT, 0, PTD0_IN }
+	},
+	{ PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTE5_FN, PTE5_OUT, 0, PTE5_IN,
+		PTE4_FN, PTE4_OUT, 0, PTE4_IN,
+		PTE3_FN, PTE3_OUT, 0, PTE3_IN,
+		PTE2_FN, PTE2_OUT, 0, PTE2_IN,
+		PTE1_FN, PTE1_OUT, 0, PTE1_IN,
+		PTE0_FN, PTE0_OUT, 0, PTE0_IN }
+	},
+	{ PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) {
+		PTF7_FN, PTF7_OUT, 0, PTF7_IN,
+		PTF6_FN, PTF6_OUT, 0, PTF6_IN,
+		PTF5_FN, PTF5_OUT, 0, PTF5_IN,
+		PTF4_FN, PTF4_OUT, 0, PTF4_IN,
+		PTF3_FN, PTF3_OUT, 0, PTF3_IN,
+		PTF2_FN, PTF2_OUT, 0, PTF2_IN,
+		PTF1_FN, PTF1_OUT, 0, PTF1_IN,
+		PTF0_FN, PTF0_OUT, 0, PTF0_IN }
+	},
+	{ PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTG5_FN, PTG5_OUT, 0, 0,
+		PTG4_FN, PTG4_OUT, 0, 0,
+		PTG3_FN, PTG3_OUT, 0, 0,
+		PTG2_FN, PTG2_OUT, 0, 0,
+		PTG1_FN, PTG1_OUT, 0, 0,
+		PTG0_FN, PTG0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) {
+		PTH7_FN, PTH7_OUT, 0, PTH7_IN,
+		PTH6_FN, PTH6_OUT, 0, PTH6_IN,
+		PTH5_FN, PTH5_OUT, 0, PTH5_IN,
+		PTH4_FN, PTH4_OUT, 0, PTH4_IN,
+		PTH3_FN, PTH3_OUT, 0, PTH3_IN,
+		PTH2_FN, PTH2_OUT, 0, PTH2_IN,
+		PTH1_FN, PTH1_OUT, 0, PTH1_IN,
+		PTH0_FN, PTH0_OUT, 0, PTH0_IN }
+	},
+	{ PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) {
+		PTJ7_FN, PTJ7_OUT, 0, 0,
+		0, 0, 0, 0,
+		PTJ5_FN, PTJ5_OUT, 0, 0,
+		0, 0, 0, 0,
+		PTJ3_FN, PTJ3_OUT, 0, PTJ3_IN,
+		PTJ2_FN, PTJ2_OUT, 0, PTJ2_IN,
+		PTJ1_FN, PTJ1_OUT, 0, PTJ1_IN,
+		PTJ0_FN, PTJ0_OUT, 0, PTJ0_IN }
+	},
+	{ PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) {
+		PTK7_FN, PTK7_OUT, 0, PTK7_IN,
+		PTK6_FN, PTK6_OUT, 0, PTK6_IN,
+		PTK5_FN, PTK5_OUT, 0, PTK5_IN,
+		PTK4_FN, PTK4_OUT, 0, PTK4_IN,
+		PTK3_FN, PTK3_OUT, 0, PTK3_IN,
+		PTK2_FN, PTK2_OUT, 0, PTK2_IN,
+		PTK1_FN, PTK1_OUT, 0, PTK1_IN,
+		PTK0_FN, PTK0_OUT, 0, PTK0_IN }
+	},
+	{ PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) {
+		PTL7_FN, PTL7_OUT, 0, PTL7_IN,
+		PTL6_FN, PTL6_OUT, 0, PTL6_IN,
+		PTL5_FN, PTL5_OUT, 0, PTL5_IN,
+		PTL4_FN, PTL4_OUT, 0, PTL4_IN,
+		PTL3_FN, PTL3_OUT, 0, PTL3_IN,
+		PTL2_FN, PTL2_OUT, 0, PTL2_IN,
+		PTL1_FN, PTL1_OUT, 0, PTL1_IN,
+		PTL0_FN, PTL0_OUT, 0, PTL0_IN }
+	},
+	{ PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) {
+		PTM7_FN, PTM7_OUT, 0, PTM7_IN,
+		PTM6_FN, PTM6_OUT, 0, PTM6_IN,
+		PTM5_FN, PTM5_OUT, 0, PTM5_IN,
+		PTM4_FN, PTM4_OUT, 0, PTM4_IN,
+		PTM3_FN, PTM3_OUT, 0, PTM3_IN,
+		PTM2_FN, PTM2_OUT, 0, PTM2_IN,
+		PTM1_FN, PTM1_OUT, 0, PTM1_IN,
+		PTM0_FN, PTM0_OUT, 0, PTM0_IN }
+	},
+	{ PINMUX_CFG_REG("PNCR", 0xa4050118, 16, 2) {
+		PTN7_FN, PTN7_OUT, 0, PTN7_IN,
+		PTN6_FN, PTN6_OUT, 0, PTN6_IN,
+		PTN5_FN, PTN5_OUT, 0, PTN5_IN,
+		PTN4_FN, PTN4_OUT, 0, PTN4_IN,
+		PTN3_FN, PTN3_OUT, 0, PTN3_IN,
+		PTN2_FN, PTN2_OUT, 0, PTN2_IN,
+		PTN1_FN, PTN1_OUT, 0, PTN1_IN,
+		PTN0_FN, PTN0_OUT, 0, PTN0_IN }
+	},
+	{ PINMUX_CFG_REG("PQCR", 0xa405011a, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTQ3_FN, 0, 0, PTQ3_IN,
+		PTQ2_FN, 0, 0, PTQ2_IN,
+		PTQ1_FN, 0, 0, PTQ1_IN,
+		PTQ0_FN, 0, 0, PTQ0_IN }
+	},
+	{ PINMUX_CFG_REG("PRCR", 0xa405011c, 16, 2) {
+		PTR7_FN, PTR7_OUT, 0, PTR7_IN,
+		PTR6_FN, PTR6_OUT, 0, PTR6_IN,
+		PTR5_FN, PTR5_OUT, 0, PTR5_IN,
+		PTR4_FN, PTR4_OUT, 0, PTR4_IN,
+		PTR3_FN, 0, 0, PTR3_IN,
+		PTR2_FN, 0, PTR2_IN_PU, PTR2_IN,
+		PTR1_FN, PTR1_OUT, 0, PTR1_IN,
+		PTR0_FN, PTR0_OUT, 0, PTR0_IN }
+	},
+	{ PINMUX_CFG_REG("PSCR", 0xa405011e, 16, 2) {
+		PTS7_FN, PTS7_OUT, 0, PTS7_IN,
+		PTS6_FN, PTS6_OUT, 0, PTS6_IN,
+		PTS5_FN, PTS5_OUT, 0, PTS5_IN,
+		PTS4_FN, PTS4_OUT, 0, PTS4_IN,
+		PTS3_FN, PTS3_OUT, 0, PTS3_IN,
+		PTS2_FN, PTS2_OUT, 0, PTS2_IN,
+		PTS1_FN, PTS1_OUT, 0, PTS1_IN,
+		PTS0_FN, PTS0_OUT, 0, PTS0_IN }
+	},
+	{ PINMUX_CFG_REG("PTCR", 0xa4050140, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTT5_FN, PTT5_OUT, 0, PTT5_IN,
+		PTT4_FN, PTT4_OUT, 0, PTT4_IN,
+		PTT3_FN, PTT3_OUT, 0, PTT3_IN,
+		PTT2_FN, PTT2_OUT, 0, PTT2_IN,
+		PTT1_FN, PTT1_OUT, 0, PTT1_IN,
+		PTT0_FN, PTT0_OUT, 0, PTT0_IN }
+	},
+	{ PINMUX_CFG_REG("PUCR", 0xa4050142, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTU5_FN, PTU5_OUT, 0, PTU5_IN,
+		PTU4_FN, PTU4_OUT, 0, PTU4_IN,
+		PTU3_FN, PTU3_OUT, 0, PTU3_IN,
+		PTU2_FN, PTU2_OUT, 0, PTU2_IN,
+		PTU1_FN, PTU1_OUT, 0, PTU1_IN,
+		PTU0_FN, PTU0_OUT, 0, PTU0_IN }
+	},
+	{ PINMUX_CFG_REG("PVCR", 0xa4050144, 16, 2) {
+		PTV7_FN, PTV7_OUT, 0, PTV7_IN,
+		PTV6_FN, PTV6_OUT, 0, PTV6_IN,
+		PTV5_FN, PTV5_OUT, 0, PTV5_IN,
+		PTV4_FN, PTV4_OUT, 0, PTV4_IN,
+		PTV3_FN, PTV3_OUT, 0, PTV3_IN,
+		PTV2_FN, PTV2_OUT, 0, PTV2_IN,
+		PTV1_FN, PTV1_OUT, 0, PTV1_IN,
+		PTV0_FN, PTV0_OUT, 0, PTV0_IN }
+	},
+	{ PINMUX_CFG_REG("PWCR", 0xa4050146, 16, 2) {
+		PTW7_FN, PTW7_OUT, 0, PTW7_IN,
+		PTW6_FN, PTW6_OUT, 0, PTW6_IN,
+		PTW5_FN, PTW5_OUT, 0, PTW5_IN,
+		PTW4_FN, PTW4_OUT, 0, PTW4_IN,
+		PTW3_FN, PTW3_OUT, 0, PTW3_IN,
+		PTW2_FN, PTW2_OUT, 0, PTW2_IN,
+		PTW1_FN, PTW1_OUT, 0, PTW1_IN,
+		PTW0_FN, PTW0_OUT, 0, PTW0_IN }
+	},
+	{ PINMUX_CFG_REG("PXCR", 0xa4050148, 16, 2) {
+		PTX7_FN, PTX7_OUT, 0, PTX7_IN,
+		PTX6_FN, PTX6_OUT, 0, PTX6_IN,
+		PTX5_FN, PTX5_OUT, 0, PTX5_IN,
+		PTX4_FN, PTX4_OUT, 0, PTX4_IN,
+		PTX3_FN, PTX3_OUT, 0, PTX3_IN,
+		PTX2_FN, PTX2_OUT, 0, PTX2_IN,
+		PTX1_FN, PTX1_OUT, 0, PTX1_IN,
+		PTX0_FN, PTX0_OUT, 0, PTX0_IN }
+	},
+	{ PINMUX_CFG_REG("PYCR", 0xa405014a, 16, 2) {
+		PTY7_FN, PTY7_OUT, 0, PTY7_IN,
+		PTY6_FN, PTY6_OUT, 0, PTY6_IN,
+		PTY5_FN, PTY5_OUT, 0, PTY5_IN,
+		PTY4_FN, PTY4_OUT, 0, PTY4_IN,
+		PTY3_FN, PTY3_OUT, 0, PTY3_IN,
+		PTY2_FN, PTY2_OUT, 0, PTY2_IN,
+		PTY1_FN, PTY1_OUT, 0, PTY1_IN,
+		PTY0_FN, PTY0_OUT, 0, PTY0_IN }
+	},
+	{ PINMUX_CFG_REG("PZCR", 0xa405014c, 16, 2) {
+		PTZ7_FN, PTZ7_OUT, 0, PTZ7_IN,
+		PTZ6_FN, PTZ6_OUT, 0, PTZ6_IN,
+		PTZ5_FN, PTZ5_OUT, 0, PTZ5_IN,
+		PTZ4_FN, PTZ4_OUT, 0, PTZ4_IN,
+		PTZ3_FN, PTZ3_OUT, 0, PTZ3_IN,
+		PTZ2_FN, PTZ2_OUT, 0, PTZ2_IN,
+		PTZ1_FN, PTZ1_OUT, 0, PTZ1_IN,
+		PTZ0_FN, PTZ0_OUT, 0, PTZ0_IN }
+	},
+	{ PINMUX_CFG_REG("PSELA", 0xa405014e, 16, 2) {
+		PSA15_PSA14_FN1, PSA15_PSA14_FN2, 0, 0,
+		PSA13_PSA12_FN1, PSA13_PSA12_FN2, 0, 0,
+		PSA11_PSA10_FN1, PSA11_PSA10_FN2, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PSA5_PSA4_FN1, PSA5_PSA4_FN2, PSA5_PSA4_FN3, 0,
+		PSA3_PSA2_FN1, PSA3_PSA2_FN2, 0, 0,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELB", 0xa4050150, 16, 2) {
+		PSB15_PSB14_FN1, PSB15_PSB14_FN2, 0, 0,
+		PSB13_PSB12_LCDC_RGB, PSB13_PSB12_LCDC_SYS, 0, 0,
+		0, 0, 0, 0,
+		PSB9_PSB8_FN1, PSB9_PSB8_FN2, PSB9_PSB8_FN3, 0,
+		PSB7_PSB6_FN1, PSB7_PSB6_FN2, 0, 0,
+		PSB5_PSB4_FN1, PSB5_PSB4_FN2, 0, 0,
+		PSB3_PSB2_FN1, PSB3_PSB2_FN2, 0, 0,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELC", 0xa4050152, 16, 2) {
+		PSC15_PSC14_FN1, PSC15_PSC14_FN2, 0, 0,
+		PSC13_PSC12_FN1, PSC13_PSC12_FN2, 0, 0,
+		PSC11_PSC10_FN1, PSC11_PSC10_FN2, PSC11_PSC10_FN3, 0,
+		PSC9_PSC8_FN1, PSC9_PSC8_FN2, 0, 0,
+		PSC7_PSC6_FN1, PSC7_PSC6_FN2, PSC7_PSC6_FN3, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PSELD", 0xa4050154, 16, 2) {
+		PSD15_PSD14_FN1, PSD15_PSD14_FN2, 0, 0,
+		PSD13_PSD12_FN1, PSD13_PSD12_FN2, 0, 0,
+		PSD11_PSD10_FN1, PSD11_PSD10_FN2, PSD11_PSD10_FN3, 0,
+		PSD9_PSD8_FN1, PSD9_PSD8_FN2, 0, 0,
+		PSD7_PSD6_FN1, PSD7_PSD6_FN2, 0, 0,
+		PSD5_PSD4_FN1, PSD5_PSD4_FN2, 0, 0,
+		PSD3_PSD2_FN1, PSD3_PSD2_FN2, 0, 0,
+		PSD1_PSD0_FN1, PSD1_PSD0_FN2, 0, 0 }
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADR", 0xa4050120, 8) {
+		PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+		PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDR", 0xa4050122, 8) {
+		PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+		PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDR", 0xa4050124, 8) {
+		PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+		PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDR", 0xa4050126, 8) {
+		PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+		PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDR", 0xa4050128, 8) {
+		0, 0, PTE5_DATA, PTE4_DATA,
+		PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDR", 0xa405012a, 8) {
+		PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+		PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA }
+	},
+	{ PINMUX_DATA_REG("PGDR", 0xa405012c, 8) {
+		0, 0, PTG5_DATA, PTG4_DATA,
+		PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA }
+	},
+	{ PINMUX_DATA_REG("PHDR", 0xa405012e, 8) {
+		PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+		PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA }
+	},
+	{ PINMUX_DATA_REG("PJDR", 0xa4050130, 8) {
+		PTJ7_DATA, 0, PTJ5_DATA, 0,
+		PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PKDR", 0xa4050132, 8) {
+		PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+		PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA }
+	},
+	{ PINMUX_DATA_REG("PLDR", 0xa4050134, 8) {
+		PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+		PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA }
+	},
+	{ PINMUX_DATA_REG("PMDR", 0xa4050136, 8) {
+		PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+		PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA }
+	},
+	{ PINMUX_DATA_REG("PNDR", 0xa4050138, 8) {
+		PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+		PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA }
+	},
+	{ PINMUX_DATA_REG("PQDR", 0xa405013a, 8) {
+		0, 0, 0, 0,
+		PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PRDR", 0xa405013c, 8) {
+		PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+		PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA }
+	},
+	{ PINMUX_DATA_REG("PSDR", 0xa405013e, 8) {
+		PTS7_DATA, PTS6_DATA, PTS5_DATA, PTS4_DATA,
+		PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA }
+	},
+	{ PINMUX_DATA_REG("PTDR", 0xa4050160, 8) {
+		0, 0, PTT5_DATA, PTT4_DATA,
+		PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA }
+	},
+	{ PINMUX_DATA_REG("PUDR", 0xa4050162, 8) {
+		0, 0, PTU5_DATA, PTU4_DATA,
+		PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA }
+	},
+	{ PINMUX_DATA_REG("PVDR", 0xa4050164, 8) {
+		PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA,
+		PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA }
+	},
+	{ PINMUX_DATA_REG("PWDR", 0xa4050166, 8) {
+		PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+		PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA }
+	},
+	{ PINMUX_DATA_REG("PXDR", 0xa4050168, 8) {
+		PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+		PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA }
+	},
+	{ PINMUX_DATA_REG("PYDR", 0xa405016a, 8) {
+		PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+		PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA }
+	},
+	{ PINMUX_DATA_REG("PZDR", 0xa405016c, 8) {
+		PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA,
+		PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7723_pinmux_info = {
+	.name = "sh7723_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PTA7,
+	.last_gpio = GPIO_FN_IDEA0,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7723_pinmux_info);
+}
+
+arch_initcall(plat_pinmux_setup);
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index e5e06845fa4..b8869aa20de 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -1,7 +1,7 @@
 /*
  * SH-X3 SMP
  *
- *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2007 - 2008  Paul Mundt
  *  Copyright (C) 2007  Magnus Damm
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -14,6 +14,22 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 
+static irqreturn_t ipi_interrupt_handler(int irq, void *arg)
+{
+	unsigned int message = (unsigned int)(long)arg;
+	unsigned int cpu = hard_smp_processor_id();
+	unsigned int offs = 4 * cpu;
+	unsigned int x;
+
+	x = ctrl_inl(0xfe410070 + offs); /* C0INITICI..CnINTICI */
+	x &= (1 << (message << 2));
+	ctrl_outl(x, 0xfe410080 + offs); /* C0INTICICLR..CnINTICICLR */
+
+	smp_message_recv(message);
+
+	return IRQ_HANDLED;
+}
+
 void __init plat_smp_setup(void)
 {
 	unsigned int cpu = 0;
@@ -40,6 +56,13 @@ void __init plat_smp_setup(void)
 
 void __init plat_prepare_cpus(unsigned int max_cpus)
 {
+	int i;
+
+	BUILD_BUG_ON(SMP_MSG_NR >= 8);
+
+	for (i = 0; i < SMP_MSG_NR; i++)
+		request_irq(104 + i, ipi_interrupt_handler, IRQF_DISABLED,
+			    "IPI", (void *)(long)i);
 }
 
 #define STBCR_REG(phys_id) (0xfe400004 | (phys_id << 12))
@@ -59,7 +82,7 @@ void plat_start_cpu(unsigned int cpu, unsigned long entry_point)
 		ctrl_outl(STBCR_MSTP, STBCR_REG(cpu));
 
 	while (!(ctrl_inl(STBCR_REG(cpu)) & STBCR_MSTP))
-		;
+		cpu_relax();
 
 	/* Start up secondary processor by sending a reset */
 	ctrl_outl(STBCR_AP_VAL, STBCR_REG(cpu));
@@ -75,46 +98,6 @@ void plat_send_ipi(unsigned int cpu, unsigned int message)
 	unsigned long addr = 0xfe410070 + (cpu * 4);
 
 	BUG_ON(cpu >= 4);
-	BUG_ON(message >= SMP_MSG_NR);
 
 	ctrl_outl(1 << (message << 2), addr); /* C0INTICI..CnINTICI */
 }
-
-struct ipi_data {
-	void (*handler)(void *);
-	void *arg;
-	unsigned int message;
-};
-
-static irqreturn_t ipi_interrupt_handler(int irq, void *arg)
-{
-	struct ipi_data *id = arg;
-	unsigned int cpu = hard_smp_processor_id();
-	unsigned int offs = 4 * cpu;
-	unsigned int x;
-
-	x = ctrl_inl(0xfe410070 + offs); /* C0INITICI..CnINTICI */
-	x &= (1 << (id->message << 2));
-	ctrl_outl(x, 0xfe410080 + offs); /* C0INTICICLR..CnINTICICLR */
-
-	id->handler(id->arg);
-
-	return IRQ_HANDLED;
-}
-
-static struct ipi_data ipi_handlers[SMP_MSG_NR];
-
-int plat_register_ipi_handler(unsigned int message,
-			      void (*handler)(void *), void *arg)
-{
-	struct ipi_data *id = &ipi_handlers[message];
-
-	BUG_ON(SMP_MSG_NR >= 8);
-	BUG_ON(message >= SMP_MSG_NR);
-
-	id->handler = handler;
-	id->arg = arg;
-	id->message = message;
-
-	return request_irq(104 + message, ipi_interrupt_handler, 0, "IPI", id);
-}
diff --git a/arch/sh/kernel/cpu/sh5/Makefile b/arch/sh/kernel/cpu/sh5/Makefile
index 8646363e9de..ce4602ea23a 100644
--- a/arch/sh/kernel/cpu/sh5/Makefile
+++ b/arch/sh/kernel/cpu/sh5/Makefile
@@ -5,3 +5,8 @@ obj-y := entry.o probe.o switchto.o
 
 obj-$(CONFIG_SH_FPU)		+= fpu.o
 obj-$(CONFIG_KALLSYMS)		+= unwind.o
+
+# Primary on-chip clocks (common)
+clock-$(CONFIG_CPU_SH5)		:= clock-sh5.o
+
+obj-y			+= $(clock-y)
diff --git a/arch/sh/kernel/cpu/sh5/clock-sh5.c b/arch/sh/kernel/cpu/sh5/clock-sh5.c
new file mode 100644
index 00000000000..52c49248833
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh5/clock-sh5.c
@@ -0,0 +1,79 @@
+/*
+ * arch/sh/kernel/cpu/sh5/clock-sh5.c
+ *
+ * SH-5 support for the clock framework
+ *
+ *  Copyright (C) 2008  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/clock.h>
+#include <asm/io.h>
+
+static int ifc_table[] = { 2, 4, 6, 8, 10, 12, 16, 24 };
+
+/* Clock, Power and Reset Controller */
+#define	CPRC_BLOCK_OFF	0x01010000
+#define CPRC_BASE	(PHYS_PERIPHERAL_BLOCK + CPRC_BLOCK_OFF)
+
+static unsigned long cprc_base;
+
+static void master_clk_init(struct clk *clk)
+{
+	int idx = (ctrl_inl(cprc_base + 0x00) >> 6) & 0x0007;
+	clk->rate *= ifc_table[idx];
+}
+
+static struct clk_ops sh5_master_clk_ops = {
+	.init		= master_clk_init,
+};
+
+static void module_clk_recalc(struct clk *clk)
+{
+	int idx = (ctrl_inw(cprc_base) >> 12) & 0x0007;
+	clk->rate = clk->parent->rate / ifc_table[idx];
+}
+
+static struct clk_ops sh5_module_clk_ops = {
+	.recalc		= module_clk_recalc,
+};
+
+static void bus_clk_recalc(struct clk *clk)
+{
+	int idx = (ctrl_inw(cprc_base) >> 3) & 0x0007;
+	clk->rate = clk->parent->rate / ifc_table[idx];
+}
+
+static struct clk_ops sh5_bus_clk_ops = {
+	.recalc		= bus_clk_recalc,
+};
+
+static void cpu_clk_recalc(struct clk *clk)
+{
+	int idx = (ctrl_inw(cprc_base) & 0x0007);
+	clk->rate = clk->parent->rate / ifc_table[idx];
+}
+
+static struct clk_ops sh5_cpu_clk_ops = {
+	.recalc		= cpu_clk_recalc,
+};
+
+static struct clk_ops *sh5_clk_ops[] = {
+	&sh5_master_clk_ops,
+	&sh5_module_clk_ops,
+	&sh5_bus_clk_ops,
+	&sh5_cpu_clk_ops,
+};
+
+void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
+{
+	cprc_base = onchip_remap(CPRC_BASE, 1024, "CPRC");
+	BUG_ON(!cprc_base);
+
+	if (idx < ARRAY_SIZE(sh5_clk_ops))
+		*ops = sh5_clk_ops[idx];
+}
diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c
index 4a2ecbe27d8..95d21625556 100644
--- a/arch/sh/kernel/crash_dump.c
+++ b/arch/sh/kernel/crash_dump.c
@@ -10,6 +10,9 @@
 #include <linux/io.h>
 #include <asm/uaccess.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/sh/kernel/dump_task.c b/arch/sh/kernel/dump_task.c
deleted file mode 100644
index 1db7ce0f25d..00000000000
--- a/arch/sh/kernel/dump_task.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <linux/elfcore.h>
-#include <linux/sched.h>
-#include <asm/fpu.h>
-
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
-	struct pt_regs ptregs;
-
-	ptregs = *task_pt_regs(tsk);
-	elf_core_copy_regs(regs, &ptregs);
-
-	return 1;
-}
-
-int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpu)
-{
-	int fpvalid = 0;
-
-#if defined(CONFIG_SH_FPU)
-	fpvalid = !!tsk_used_math(tsk);
-	if (fpvalid) {
-		unlazy_fpu(tsk, task_pt_regs(tsk));
-		memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu));
-	}
-#endif
-
-	return fpvalid;
-}
-
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index efbb4268875..1a5cf9dd82d 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -371,3 +371,47 @@ syscall_exit:
 #endif
 7:	.long	do_syscall_trace_enter
 8:	.long	do_syscall_trace_leave
+
+#ifdef CONFIG_FTRACE
+	.align 2
+	.globl	_mcount
+	.type	_mcount,@function
+	.globl	mcount
+	.type	mcount,@function
+_mcount:
+mcount:
+	mov.l	r4, @-r15
+	mov.l	r5, @-r15
+	mov.l	r6, @-r15
+	mov.l	r7, @-r15
+	sts.l	pr, @-r15
+
+	mov.l	@(20,r15),r4
+	sts	pr, r5
+
+	mov.l	1f, r6
+	mov.l	ftrace_stub, r7	
+	cmp/eq	r6, r7
+	bt	skip_trace
+
+	mov.l	@r6, r6
+	jsr	@r6
+	 nop
+
+skip_trace:
+
+	lds.l	@r15+, pr
+	mov.l	@r15+, r7
+	mov.l	@r15+, r6
+	mov.l	@r15+, r5
+	rts
+	 mov.l	@r15+, r4
+
+	.align 2
+1:	.long	ftrace_trace_function
+
+	.globl	ftrace_stub
+ftrace_stub:
+	rts
+	 nop
+#endif /* CONFIG_FTRACE */
diff --git a/arch/sh/kernel/gpio.c b/arch/sh/kernel/gpio.c
new file mode 100644
index 00000000000..bb8b812c689
--- /dev/null
+++ b/arch/sh/kernel/gpio.c
@@ -0,0 +1,498 @@
+/*
+ * Pinmuxed GPIO support for SuperH.
+ *
+ * Copyright (C) 2008 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+
+static struct pinmux_info *registered_gpio;
+
+static struct pinmux_info *gpio_controller(unsigned gpio)
+{
+	if (!registered_gpio)
+		return NULL;
+
+	if (gpio < registered_gpio->first_gpio)
+		return NULL;
+
+	if (gpio > registered_gpio->last_gpio)
+		return NULL;
+
+	return registered_gpio;
+}
+
+static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r)
+{
+	if (enum_id < r->begin)
+		return 0;
+
+	if (enum_id > r->end)
+		return 0;
+
+	return 1;
+}
+
+static int read_write_reg(unsigned long reg, unsigned long reg_width,
+			  unsigned long field_width, unsigned long in_pos,
+			  unsigned long value, int do_write)
+{
+	unsigned long data, mask, pos;
+
+	data = 0;
+	mask = (1 << field_width) - 1;
+	pos = reg_width - ((in_pos + 1) * field_width);
+
+#ifdef DEBUG
+	pr_info("%s, addr = %lx, value = %ld, pos = %ld, "
+		"r_width = %ld, f_width = %ld\n",
+		do_write ? "write" : "read", reg, value, pos,
+		reg_width, field_width);
+#endif
+
+	switch (reg_width) {
+	case 8:
+		data = ctrl_inb(reg);
+		break;
+	case 16:
+		data = ctrl_inw(reg);
+		break;
+	case 32:
+		data = ctrl_inl(reg);
+		break;
+	}
+
+	if (!do_write)
+		return (data >> pos) & mask;
+
+	data &= ~(mask << pos);
+	data |= value << pos;
+
+	switch (reg_width) {
+	case 8:
+		ctrl_outb(data, reg);
+		break;
+	case 16:
+		ctrl_outw(data, reg);
+		break;
+	case 32:
+		ctrl_outl(data, reg);
+		break;
+	}
+	return 0;
+}
+
+static int get_data_reg(struct pinmux_info *gpioc, unsigned gpio,
+			struct pinmux_data_reg **drp, int *bitp)
+{
+	pinmux_enum_t enum_id = gpioc->gpios[gpio].enum_id;
+	struct pinmux_data_reg *data_reg;
+	int k, n;
+
+	if (!enum_in_range(enum_id, &gpioc->data))
+		return -1;
+
+	k = 0;
+	while (1) {
+		data_reg = gpioc->data_regs + k;
+
+		if (!data_reg->reg_width)
+			break;
+
+		for (n = 0; n < data_reg->reg_width; n++) {
+			if (data_reg->enum_ids[n] == enum_id) {
+				*drp = data_reg;
+				*bitp = n;
+				return 0;
+
+			}
+		}
+		k++;
+	}
+
+	return -1;
+}
+
+static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
+			  struct pinmux_cfg_reg **crp, int *indexp,
+			  unsigned long **cntp)
+{
+	struct pinmux_cfg_reg *config_reg;
+	unsigned long r_width, f_width;
+	int k, n;
+
+	k = 0;
+	while (1) {
+		config_reg = gpioc->cfg_regs + k;
+
+		r_width = config_reg->reg_width;
+		f_width = config_reg->field_width;
+
+		if (!r_width)
+			break;
+		for (n = 0; n < (r_width / f_width) * 1 << f_width; n++) {
+			if (config_reg->enum_ids[n] == enum_id) {
+				*crp = config_reg;
+				*indexp = n;
+				*cntp = &config_reg->cnt[n / (1 << f_width)];
+				return 0;
+			}
+		}
+		k++;
+	}
+
+	return -1;
+}
+
+static int get_gpio_enum_id(struct pinmux_info *gpioc, unsigned gpio,
+			    int pos, pinmux_enum_t *enum_idp)
+{
+	pinmux_enum_t enum_id = gpioc->gpios[gpio].enum_id;
+	pinmux_enum_t *data = gpioc->gpio_data;
+	int k;
+
+	if (!enum_in_range(enum_id, &gpioc->data)) {
+		if (!enum_in_range(enum_id, &gpioc->mark)) {
+			pr_err("non data/mark enum_id for gpio %d\n", gpio);
+			return -1;
+		}
+	}
+
+	if (pos) {
+		*enum_idp = data[pos + 1];
+		return pos + 1;
+	}
+
+	for (k = 0; k < gpioc->gpio_data_size; k++) {
+		if (data[k] == enum_id) {
+			*enum_idp = data[k + 1];
+			return k + 1;
+		}
+	}
+
+	pr_err("cannot locate data/mark enum_id for gpio %d\n", gpio);
+	return -1;
+}
+
+static int write_config_reg(struct pinmux_info *gpioc,
+			    struct pinmux_cfg_reg *crp,
+			    int index)
+{
+	unsigned long ncomb, pos, value;
+
+	ncomb = 1 << crp->field_width;
+	pos = index / ncomb;
+	value = index % ncomb;
+
+	return read_write_reg(crp->reg, crp->reg_width,
+			      crp->field_width, pos, value, 1);
+}
+
+static int check_config_reg(struct pinmux_info *gpioc,
+			    struct pinmux_cfg_reg *crp,
+			    int index)
+{
+	unsigned long ncomb, pos, value;
+
+	ncomb = 1 << crp->field_width;
+	pos = index / ncomb;
+	value = index % ncomb;
+
+	if (read_write_reg(crp->reg, crp->reg_width,
+			   crp->field_width, pos, 0, 0) == value)
+		return 0;
+
+	return -1;
+}
+
+enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
+
+int pinmux_config_gpio(struct pinmux_info *gpioc, unsigned gpio,
+		       int pinmux_type, int cfg_mode)
+{
+	struct pinmux_cfg_reg *cr = NULL;
+	pinmux_enum_t enum_id;
+	struct pinmux_range *range;
+	int in_range, pos, index;
+	unsigned long *cntp;
+
+	switch (pinmux_type) {
+
+	case PINMUX_TYPE_FUNCTION:
+		range = NULL;
+		break;
+
+	case PINMUX_TYPE_OUTPUT:
+		range = &gpioc->output;
+		break;
+
+	case PINMUX_TYPE_INPUT:
+		range = &gpioc->input;
+		break;
+
+	case PINMUX_TYPE_INPUT_PULLUP:
+		range = &gpioc->input_pu;
+		break;
+
+	case PINMUX_TYPE_INPUT_PULLDOWN:
+		range = &gpioc->input_pd;
+		break;
+
+	default:
+		goto out_err;
+	}
+
+	pos = 0;
+	enum_id = 0;
+	index = 0;
+	while (1) {
+		pos = get_gpio_enum_id(gpioc, gpio, pos, &enum_id);
+		if (pos <= 0)
+			goto out_err;
+
+		if (!enum_id)
+			break;
+
+		in_range = enum_in_range(enum_id, &gpioc->function);
+		if (!in_range && range)
+			in_range = enum_in_range(enum_id, range);
+
+		if (!in_range)
+			continue;
+
+		if (get_config_reg(gpioc, enum_id, &cr, &index, &cntp) != 0)
+			goto out_err;
+
+		switch (cfg_mode) {
+		case GPIO_CFG_DRYRUN:
+			if (!*cntp || !check_config_reg(gpioc, cr, index))
+				continue;
+			break;
+
+		case GPIO_CFG_REQ:
+			if (write_config_reg(gpioc, cr, index) != 0)
+				goto out_err;
+			*cntp = *cntp + 1;
+			break;
+
+		case GPIO_CFG_FREE:
+			*cntp = *cntp - 1;
+			break;
+		}
+	}
+
+	return 0;
+ out_err:
+	return -1;
+}
+
+static DEFINE_SPINLOCK(gpio_lock);
+
+int __gpio_request(unsigned gpio)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	struct pinmux_data_reg *dummy;
+	unsigned long flags;
+	int i, ret, pinmux_type;
+
+	ret = -EINVAL;
+
+	if (!gpioc)
+		goto err_out;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	if ((gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE) != PINMUX_TYPE_NONE)
+		goto err_unlock;
+
+	/* setup pin function here if no data is associated with pin */
+
+	if (get_data_reg(gpioc, gpio, &dummy, &i) != 0)
+		pinmux_type = PINMUX_TYPE_FUNCTION;
+	else
+		pinmux_type = PINMUX_TYPE_GPIO;
+
+	if (pinmux_type == PINMUX_TYPE_FUNCTION) {
+		if (pinmux_config_gpio(gpioc, gpio,
+				       pinmux_type,
+				       GPIO_CFG_DRYRUN) != 0)
+			goto err_unlock;
+
+		if (pinmux_config_gpio(gpioc, gpio,
+				       pinmux_type,
+				       GPIO_CFG_REQ) != 0)
+			BUG();
+	}
+
+	gpioc->gpios[gpio].flags = pinmux_type;
+
+	ret = 0;
+ err_unlock:
+	spin_unlock_irqrestore(&gpio_lock, flags);
+ err_out:
+	return ret;
+}
+EXPORT_SYMBOL(__gpio_request);
+
+void gpio_free(unsigned gpio)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+	int pinmux_type;
+
+	if (!gpioc)
+		return;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	pinmux_type = gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE;
+	pinmux_config_gpio(gpioc, gpio, pinmux_type, GPIO_CFG_FREE);
+	gpioc->gpios[gpio].flags = PINMUX_TYPE_NONE;
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+}
+EXPORT_SYMBOL(gpio_free);
+
+static int pinmux_direction(struct pinmux_info *gpioc,
+			    unsigned gpio, int new_pinmux_type)
+{
+	int ret, pinmux_type;
+
+	ret = -EINVAL;
+	pinmux_type = gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE;
+
+	switch (pinmux_type) {
+	case PINMUX_TYPE_GPIO:
+		break;
+	case PINMUX_TYPE_OUTPUT:
+	case PINMUX_TYPE_INPUT:
+	case PINMUX_TYPE_INPUT_PULLUP:
+	case PINMUX_TYPE_INPUT_PULLDOWN:
+		pinmux_config_gpio(gpioc, gpio, pinmux_type, GPIO_CFG_FREE);
+		break;
+	default:
+		goto err_out;
+	}
+
+	if (pinmux_config_gpio(gpioc, gpio,
+			       new_pinmux_type,
+			       GPIO_CFG_DRYRUN) != 0)
+		goto err_out;
+
+	if (pinmux_config_gpio(gpioc, gpio,
+			       new_pinmux_type,
+			       GPIO_CFG_REQ) != 0)
+		BUG();
+
+	gpioc->gpios[gpio].flags = new_pinmux_type;
+
+	ret = 0;
+ err_out:
+	return ret;
+}
+
+int gpio_direction_input(unsigned gpio)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (!gpioc)
+		goto err_out;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	ret = pinmux_direction(gpioc, gpio, PINMUX_TYPE_INPUT);
+	spin_unlock_irqrestore(&gpio_lock, flags);
+ err_out:
+	return ret;
+}
+EXPORT_SYMBOL(gpio_direction_input);
+
+static int __gpio_get_set_value(struct pinmux_info *gpioc,
+				unsigned gpio, int value,
+				int do_write)
+{
+	struct pinmux_data_reg *dr = NULL;
+	int bit = 0;
+
+	if (get_data_reg(gpioc, gpio, &dr, &bit) != 0)
+		BUG();
+	else
+		value = read_write_reg(dr->reg, dr->reg_width,
+				       1, bit, value, do_write);
+
+	return value;
+}
+
+int gpio_direction_output(unsigned gpio, int value)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (!gpioc)
+		goto err_out;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	__gpio_get_set_value(gpioc, gpio, value, 1);
+	ret = pinmux_direction(gpioc, gpio, PINMUX_TYPE_OUTPUT);
+	spin_unlock_irqrestore(&gpio_lock, flags);
+ err_out:
+	return ret;
+}
+EXPORT_SYMBOL(gpio_direction_output);
+
+int gpio_get_value(unsigned gpio)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+	int value = 0;
+
+	if (!gpioc)
+		BUG();
+	else {
+		spin_lock_irqsave(&gpio_lock, flags);
+		value = __gpio_get_set_value(gpioc, gpio, 0, 0);
+		spin_unlock_irqrestore(&gpio_lock, flags);
+	}
+
+	return value;
+}
+EXPORT_SYMBOL(gpio_get_value);
+
+void gpio_set_value(unsigned gpio, int value)
+{
+	struct pinmux_info *gpioc = gpio_controller(gpio);
+	unsigned long flags;
+
+	if (!gpioc)
+		BUG();
+	else {
+		spin_lock_irqsave(&gpio_lock, flags);
+		__gpio_get_set_value(gpioc, gpio, value, 1);
+		spin_unlock_irqrestore(&gpio_lock, flags);
+	}
+}
+EXPORT_SYMBOL(gpio_set_value);
+
+int register_pinmux(struct pinmux_info *pip)
+{
+	registered_gpio = pip;
+	pr_info("pinmux: %s handling gpio %d -> %d\n",
+		pip->name, pip->first_gpio, pip->last_gpio);
+
+	return 0;
+}
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
index 2b899122990..29cf4588fc0 100644
--- a/arch/sh/kernel/io.c
+++ b/arch/sh/kernel/io.c
@@ -19,12 +19,12 @@
  * Copy data from IO memory space to "real" memory space.
  * This needs to be optimized.
  */
-void memcpy_fromio(void *to, volatile void __iomem *from, unsigned long count)
+void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
 {
-	char *p = to;
+	unsigned char *p = to;
         while (count) {
                 count--;
-                *p = readb((void __iomem *)from);
+                *p = readb(from);
                 p++;
                 from++;
         }
@@ -37,10 +37,10 @@ EXPORT_SYMBOL(memcpy_fromio);
  */
 void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
 {
-	const char *p = from;
+	const unsigned char *p = from;
         while (count) {
                 count--;
-                writeb(*p, (void __iomem *)to);
+                writeb(*p, to);
                 p++;
                 to++;
         }
@@ -55,7 +55,7 @@ void memset_io(volatile void __iomem *dst, int c, unsigned long count)
 {
         while (count) {
                 count--;
-                writeb(c, (void __iomem *)dst);
+                writeb(c, dst);
                 dst++;
         }
 }
diff --git a/arch/sh/kernel/io_generic.c b/arch/sh/kernel/io_generic.c
index db769449f5a..5a7f554d9ca 100644
--- a/arch/sh/kernel/io_generic.c
+++ b/arch/sh/kernel/io_generic.c
@@ -19,38 +19,33 @@
 /* SH3 has a PCMCIA bug that needs a dummy read from area 6 for a
  * workaround. */
 /* I'm not sure SH7709 has this kind of bug */
-#define dummy_read()	ctrl_inb(0xba000000)
+#define dummy_read()	__raw_readb(0xba000000)
 #else
 #define dummy_read()
 #endif
 
 unsigned long generic_io_base;
 
-static inline void delay(void)
-{
-	ctrl_inw(0xa0000000);
-}
-
 u8 generic_inb(unsigned long port)
 {
-	return ctrl_inb((unsigned long __force)__ioport_map(port, 1));
+	return __raw_readb(__ioport_map(port, 1));
 }
 
 u16 generic_inw(unsigned long port)
 {
-	return ctrl_inw((unsigned long __force)__ioport_map(port, 2));
+	return __raw_readw(__ioport_map(port, 2));
 }
 
 u32 generic_inl(unsigned long port)
 {
-	return ctrl_inl((unsigned long __force)__ioport_map(port, 4));
+	return __raw_readl(__ioport_map(port, 4));
 }
 
 u8 generic_inb_p(unsigned long port)
 {
 	unsigned long v = generic_inb(port);
 
-	delay();
+	ctrl_delay();
 	return v;
 }
 
@@ -58,7 +53,7 @@ u16 generic_inw_p(unsigned long port)
 {
 	unsigned long v = generic_inw(port);
 
-	delay();
+	ctrl_delay();
 	return v;
 }
 
@@ -66,7 +61,7 @@ u32 generic_inl_p(unsigned long port)
 {
 	unsigned long v = generic_inl(port);
 
-	delay();
+	ctrl_delay();
 	return v;
 }
 
@@ -81,7 +76,7 @@ void generic_insb(unsigned long port, void *dst, unsigned long count)
 	volatile u8 *port_addr;
 	u8 *buf = dst;
 
-	port_addr = (volatile u8 *)__ioport_map(port, 1);
+	port_addr = (volatile u8 __force *)__ioport_map(port, 1);
 	while (count--)
 		*buf++ = *port_addr;
 }
@@ -91,7 +86,7 @@ void generic_insw(unsigned long port, void *dst, unsigned long count)
 	volatile u16 *port_addr;
 	u16 *buf = dst;
 
-	port_addr = (volatile u16 *)__ioport_map(port, 2);
+	port_addr = (volatile u16 __force *)__ioport_map(port, 2);
 	while (count--)
 		*buf++ = *port_addr;
 
@@ -103,7 +98,7 @@ void generic_insl(unsigned long port, void *dst, unsigned long count)
 	volatile u32 *port_addr;
 	u32 *buf = dst;
 
-	port_addr = (volatile u32 *)__ioport_map(port, 4);
+	port_addr = (volatile u32 __force *)__ioport_map(port, 4);
 	while (count--)
 		*buf++ = *port_addr;
 
@@ -112,35 +107,35 @@ void generic_insl(unsigned long port, void *dst, unsigned long count)
 
 void generic_outb(u8 b, unsigned long port)
 {
-	ctrl_outb(b, (unsigned long __force)__ioport_map(port, 1));
+	__raw_writeb(b, __ioport_map(port, 1));
 }
 
 void generic_outw(u16 b, unsigned long port)
 {
-	ctrl_outw(b, (unsigned long __force)__ioport_map(port, 2));
+	__raw_writew(b, __ioport_map(port, 2));
 }
 
 void generic_outl(u32 b, unsigned long port)
 {
-	ctrl_outl(b, (unsigned long __force)__ioport_map(port, 4));
+	__raw_writel(b, __ioport_map(port, 4));
 }
 
 void generic_outb_p(u8 b, unsigned long port)
 {
 	generic_outb(b, port);
-	delay();
+	ctrl_delay();
 }
 
 void generic_outw_p(u16 b, unsigned long port)
 {
 	generic_outw(b, port);
-	delay();
+	ctrl_delay();
 }
 
 void generic_outl_p(u32 b, unsigned long port)
 {
 	generic_outl(b, port);
-	delay();
+	ctrl_delay();
 }
 
 /*
@@ -184,36 +179,6 @@ void generic_outsl(unsigned long port, const void *src, unsigned long count)
 	dummy_read();
 }
 
-u8 generic_readb(void __iomem *addr)
-{
-	return ctrl_inb((unsigned long __force)addr);
-}
-
-u16 generic_readw(void __iomem *addr)
-{
-	return ctrl_inw((unsigned long __force)addr);
-}
-
-u32 generic_readl(void __iomem *addr)
-{
-	return ctrl_inl((unsigned long __force)addr);
-}
-
-void generic_writeb(u8 b, void __iomem *addr)
-{
-	ctrl_outb(b, (unsigned long __force)addr);
-}
-
-void generic_writew(u16 b, void __iomem *addr)
-{
-	ctrl_outw(b, (unsigned long __force)addr);
-}
-
-void generic_writel(u32 b, void __iomem *addr)
-{
-	ctrl_outl(b, (unsigned long __force)addr);
-}
-
 void __iomem *generic_ioport_map(unsigned long addr, unsigned int size)
 {
 	return (void __iomem *)(addr + generic_io_base);
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
new file mode 100644
index 00000000000..c96850b061f
--- /dev/null
+++ b/arch/sh/kernel/kprobes.c
@@ -0,0 +1,584 @@
+/*
+ * Kernel probes (kprobes) for SuperH
+ *
+ * Copyright (C) 2007 Chris Smith <chris.smith@st.com>
+ * Copyright (C) 2006 Lineo Solutions, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/kdebug.h>
+#include <asm/cacheflush.h>
+#include <asm/uaccess.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static struct kprobe saved_current_opcode;
+static struct kprobe saved_next_opcode;
+static struct kprobe saved_next_opcode2;
+
+#define OPCODE_JMP(x)	(((x) & 0xF0FF) == 0x402b)
+#define OPCODE_JSR(x)	(((x) & 0xF0FF) == 0x400b)
+#define OPCODE_BRA(x)	(((x) & 0xF000) == 0xa000)
+#define OPCODE_BRAF(x)	(((x) & 0xF0FF) == 0x0023)
+#define OPCODE_BSR(x)	(((x) & 0xF000) == 0xb000)
+#define OPCODE_BSRF(x)	(((x) & 0xF0FF) == 0x0003)
+
+#define OPCODE_BF_S(x)	(((x) & 0xFF00) == 0x8f00)
+#define OPCODE_BT_S(x)	(((x) & 0xFF00) == 0x8d00)
+
+#define OPCODE_BF(x)	(((x) & 0xFF00) == 0x8b00)
+#define OPCODE_BT(x)	(((x) & 0xFF00) == 0x8900)
+
+#define OPCODE_RTS(x)	(((x) & 0x000F) == 0x000b)
+#define OPCODE_RTE(x)	(((x) & 0xFFFF) == 0x002b)
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	kprobe_opcode_t opcode = *(kprobe_opcode_t *) (p->addr);
+
+	if (OPCODE_RTE(opcode))
+		return -EFAULT;	/* Bad breakpoint */
+
+	p->opcode = opcode;
+
+	return 0;
+}
+
+void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	p->opcode = *p->addr;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	*p->addr = p->opcode;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (*p->addr == BREAKPOINT_INSTRUCTION)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * If an illegal slot instruction exception occurs for an address
+ * containing a kprobe, remove the probe.
+ *
+ * Returns 0 if the exception was handled successfully, 1 otherwise.
+ */
+int __kprobes kprobe_handle_illslot(unsigned long pc)
+{
+	struct kprobe *p = get_kprobe((kprobe_opcode_t *) pc + 1);
+
+	if (p != NULL) {
+		printk("Warning: removing kprobe from delay slot: 0x%.8x\n",
+		       (unsigned int)pc + 2);
+		unregister_kprobe(p);
+		return 0;
+	}
+
+	return 1;
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	if (saved_next_opcode.addr != 0x0) {
+		arch_disarm_kprobe(p);
+		arch_disarm_kprobe(&saved_next_opcode);
+		saved_next_opcode.addr = 0x0;
+		saved_next_opcode.opcode = 0x0;
+
+		if (saved_next_opcode2.addr != 0x0) {
+			arch_disarm_kprobe(&saved_next_opcode2);
+			saved_next_opcode2.addr = 0x0;
+			saved_next_opcode2.opcode = 0x0;
+		}
+	}
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+					 struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = p;
+}
+
+/*
+ * Singlestep is implemented by disabling the current kprobe and setting one
+ * on the next instruction, following branches. Two probes are set if the
+ * branch is conditional.
+ */
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t *addr = NULL;
+	saved_current_opcode.addr = (kprobe_opcode_t *) (regs->pc);
+	addr = saved_current_opcode.addr;
+
+	if (p != NULL) {
+		arch_disarm_kprobe(p);
+
+		if (OPCODE_JSR(p->opcode) || OPCODE_JMP(p->opcode)) {
+			unsigned int reg_nr = ((p->opcode >> 8) & 0x000F);
+			saved_next_opcode.addr =
+			    (kprobe_opcode_t *) regs->regs[reg_nr];
+		} else if (OPCODE_BRA(p->opcode) || OPCODE_BSR(p->opcode)) {
+			unsigned long disp = (p->opcode & 0x0FFF);
+			saved_next_opcode.addr =
+			    (kprobe_opcode_t *) (regs->pc + 4 + disp * 2);
+
+		} else if (OPCODE_BRAF(p->opcode) || OPCODE_BSRF(p->opcode)) {
+			unsigned int reg_nr = ((p->opcode >> 8) & 0x000F);
+			saved_next_opcode.addr =
+			    (kprobe_opcode_t *) (regs->pc + 4 +
+						 regs->regs[reg_nr]);
+
+		} else if (OPCODE_RTS(p->opcode)) {
+			saved_next_opcode.addr = (kprobe_opcode_t *) regs->pr;
+
+		} else if (OPCODE_BF(p->opcode) || OPCODE_BT(p->opcode)) {
+			unsigned long disp = (p->opcode & 0x00FF);
+			/* case 1 */
+			saved_next_opcode.addr = p->addr + 1;
+			/* case 2 */
+			saved_next_opcode2.addr =
+			    (kprobe_opcode_t *) (regs->pc + 4 + disp * 2);
+			saved_next_opcode2.opcode = *(saved_next_opcode2.addr);
+			arch_arm_kprobe(&saved_next_opcode2);
+
+		} else if (OPCODE_BF_S(p->opcode) || OPCODE_BT_S(p->opcode)) {
+			unsigned long disp = (p->opcode & 0x00FF);
+			/* case 1 */
+			saved_next_opcode.addr = p->addr + 2;
+			/* case 2 */
+			saved_next_opcode2.addr =
+			    (kprobe_opcode_t *) (regs->pc + 4 + disp * 2);
+			saved_next_opcode2.opcode = *(saved_next_opcode2.addr);
+			arch_arm_kprobe(&saved_next_opcode2);
+
+		} else {
+			saved_next_opcode.addr = p->addr + 1;
+		}
+
+		saved_next_opcode.opcode = *(saved_next_opcode.addr);
+		arch_arm_kprobe(&saved_next_opcode);
+	}
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+				      struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *) regs->pr;
+
+	/* Replace the return addr with trampoline addr */
+	regs->pr = (unsigned long)kretprobe_trampoline;
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	int ret = 0;
+	kprobe_opcode_t *addr = NULL;
+	struct kprobe_ctlblk *kcb;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
+	addr = (kprobe_opcode_t *) (regs->pc);
+
+	/* Check we're not actually recursing */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if (kcb->kprobe_status == KPROBE_HIT_SS &&
+			    *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
+				goto no_kprobe;
+			}
+			/* We have reentered the kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
+			kprobes_inc_nmissed_count(p);
+			prepare_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
+		} else {
+			p = __get_cpu_var(current_kprobe);
+			if (p->break_handler && p->break_handler(p, regs)) {
+				goto ss_probe;
+			}
+		}
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p) {
+		/* Not one of ours: let kernel handle it */
+		if (*(kprobe_opcode_t *)addr != BREAKPOINT_INSTRUCTION) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it. Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address. In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+		}
+
+		goto no_kprobe;
+	}
+
+	set_current_kprobe(p, regs, kcb);
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+
+ss_probe:
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/*
+ * For function-return probes, init_kprobes() establishes a probepoint
+ * here. When a retprobed function returns, this probe is hit and
+ * trampoline_probe_handler() runs, calling the kretprobe's handler.
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+	asm volatile (".globl kretprobe_trampoline\n"
+		      "kretprobe_trampoline:\n\t"
+		      "nop\n");
+}
+
+/*
+ * Called when we hit the probe point at kretprobe_trampoline
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head, empty_rp;
+	struct hlist_node *node, *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+
+	INIT_HLIST_HEAD(&empty_rp);
+	kretprobe_hash_lock(current, &head, &flags);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		if (ri->rp && ri->rp->handler) {
+			__get_cpu_var(current_kprobe) = &ri->rp->kp;
+			ri->rp->handler(ri, regs);
+			__get_cpu_var(current_kprobe) = NULL;
+		}
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri, &empty_rp);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+	regs->pc = orig_ret_address;
+	kretprobe_hash_unlock(current, &flags);
+
+	preempt_enable_no_resched();
+
+	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+		hlist_del(&ri->hlist);
+		kfree(ri);
+	}
+
+	return orig_ret_address;
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	kprobe_opcode_t *addr = NULL;
+	struct kprobe *p = NULL;
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	if (saved_next_opcode.addr != 0x0) {
+		arch_disarm_kprobe(&saved_next_opcode);
+		saved_next_opcode.addr = 0x0;
+		saved_next_opcode.opcode = 0x0;
+
+		addr = saved_current_opcode.addr;
+		saved_current_opcode.addr = 0x0;
+
+		p = get_kprobe(addr);
+		arch_arm_kprobe(p);
+
+		if (saved_next_opcode2.addr != 0x0) {
+			arch_disarm_kprobe(&saved_next_opcode2);
+			saved_next_opcode2.addr = 0x0;
+			saved_next_opcode2.opcode = 0x0;
+		}
+	}
+
+	/* Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+
+	reset_current_kprobe();
+
+out:
+	preempt_enable_no_resched();
+
+	return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	const struct exception_table_entry *entry;
+
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe, point the pc back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		regs->pc = (unsigned long)cur->addr;
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accounting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if ((entry = search_exception_tables(regs->pc)) != NULL) {
+			regs->pc = entry->fixup;
+			return 1;
+		}
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct kprobe *p = NULL;
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+	kprobe_opcode_t *addr = NULL;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	addr = (kprobe_opcode_t *) (args->regs->pc);
+	if (val == DIE_TRAP) {
+		if (!kprobe_running()) {
+			if (kprobe_handler(args->regs)) {
+				ret = NOTIFY_STOP;
+			} else {
+				/* Not a kprobe trap */
+				ret = NOTIFY_DONE;
+			}
+		} else {
+			p = get_kprobe(addr);
+			if ((kcb->kprobe_status == KPROBE_HIT_SS) ||
+			    (kcb->kprobe_status == KPROBE_REENTER)) {
+				if (post_kprobe_handler(args->regs))
+					ret = NOTIFY_STOP;
+			} else {
+				if (kprobe_handler(args->regs)) {
+					ret = NOTIFY_STOP;
+				} else {
+					p = __get_cpu_var(current_kprobe);
+					if (p->break_handler &&
+					    p->break_handler(p, args->regs))
+						ret = NOTIFY_STOP;
+				}
+			}
+		}
+	}
+
+	return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	unsigned long addr;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	kcb->jprobe_saved_regs = *regs;
+	kcb->jprobe_saved_r15 = regs->regs[15];
+	addr = kcb->jprobe_saved_r15;
+
+	/*
+	 * TBD: As Linus pointed out, gcc assumes that the callee
+	 * owns the argument space and could overwrite it, e.g.
+	 * tailcall optimization. So, to be absolutely safe
+	 * we also save and restore enough stack bytes to cover
+	 * the argument area.
+	 */
+	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *) addr,
+	       MIN_STACK_SIZE(addr));
+
+	regs->pc = (unsigned long)(jp->entry);
+
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	asm volatile ("trapa #0x3a\n\t" "jprobe_return_end:\n\t" "nop\n\t");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long stack_addr = kcb->jprobe_saved_r15;
+	u8 *addr = (u8 *)regs->pc;
+
+	if ((addr >= (u8 *)jprobe_return) &&
+	    (addr <= (u8 *)jprobe_return_end)) {
+		*regs = kcb->jprobe_saved_regs;
+
+		memcpy((kprobe_opcode_t *)stack_addr, kcb->jprobes_stack,
+		       MIN_STACK_SIZE(stack_addr));
+
+		kcb->kprobe_status = KPROBE_HIT_SS;
+		preempt_enable_no_resched();
+		return 1;
+	}
+
+	return 0;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *)&kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	saved_next_opcode.addr = 0x0;
+	saved_next_opcode.opcode = 0x0;
+
+	saved_current_opcode.addr = 0x0;
+	saved_current_opcode.opcode = 0x0;
+
+	saved_next_opcode2.addr = 0x0;
+	saved_next_opcode2.opcode = 0x0;
+
+	return register_kprobe(&trampoline_p);
+}
diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c
index 129b2cfd18a..c1ea41e5812 100644
--- a/arch/sh/kernel/machvec.c
+++ b/arch/sh/kernel/machvec.c
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <asm/machvec.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
@@ -125,9 +126,6 @@ void __init sh_mv_setup(void)
 	mv_set(insb);	mv_set(insw);	mv_set(insl);
 	mv_set(outsb);	mv_set(outsw);	mv_set(outsl);
 
-	mv_set(readb);	mv_set(readw);	mv_set(readl);
-	mv_set(writeb);	mv_set(writew);	mv_set(writel);
-
 	mv_set(ioport_map);
 	mv_set(ioport_unmap);
 	mv_set(irq_demux);
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 3326a45749d..b965f0282c7 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -7,7 +7,11 @@
  *
  *  SuperH version:  Copyright (C) 1999, 2000  Niibe Yutaka & Kaz Kojima
  *		     Copyright (C) 2006 Lineo Solutions Inc. support SH4A UBC
- *		     Copyright (C) 2002 - 2007  Paul Mundt
+ *		     Copyright (C) 2002 - 2008  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -26,6 +30,7 @@
 #include <asm/system.h>
 #include <asm/ubc.h>
 #include <asm/fpu.h>
+#include <asm/syscalls.h>
 
 static int hlt_counter;
 int ubc_usercnt = 0;
@@ -111,15 +116,21 @@ void show_regs(struct pt_regs * regs)
 {
 	printk("\n");
 	printk("Pid : %d, Comm: %20s\n", task_pid_nr(current), current->comm);
+	printk("CPU : %d    %s  (%s %.*s)\n",
+	       smp_processor_id(), print_tainted(), init_utsname()->release,
+	       (int)strcspn(init_utsname()->version, " "),
+	       init_utsname()->version);
+
 	print_symbol("PC is at %s\n", instruction_pointer(regs));
+	print_symbol("PR is at %s\n", regs->pr);
+
 	printk("PC  : %08lx SP  : %08lx SR  : %08lx ",
 	       regs->pc, regs->regs[15], regs->sr);
 #ifdef CONFIG_MMU
-	printk("TEA : %08x    ", ctrl_inl(MMU_TEA));
+	printk("TEA : %08x\n", ctrl_inl(MMU_TEA));
 #else
-	printk("                  ");
+	printk("\n");
 #endif
-	printk("%s\n", print_tainted());
 
 	printk("R0  : %08lx R1  : %08lx R2  : %08lx R3  : %08lx\n",
 	       regs->regs[0],regs->regs[1],
@@ -162,6 +173,7 @@ __asm__(".align 5\n"
 int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 {
 	struct pt_regs regs;
+	int pid;
 
 	memset(&regs, 0, sizeof(regs));
 	regs.regs[4] = (unsigned long)arg;
@@ -171,8 +183,12 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	regs.sr = (1 << 30);
 
 	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-		       &regs, 0, NULL, NULL);
+	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
+		      &regs, 0, NULL, NULL);
+
+	trace_mark(kernel_arch_kthread_create, "pid %d fn %p", pid, fn);
+
+	return pid;
 }
 
 /*
@@ -210,10 +226,10 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
 	struct task_struct *tsk = current;
 
 	fpvalid = !!tsk_used_math(tsk);
-	if (fpvalid) {
-		unlazy_fpu(tsk, regs);
-		memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu));
-	}
+	if (fpvalid)
+		fpvalid = !fpregs_get(tsk, NULL, 0,
+				      sizeof(struct user_fpu_struct),
+				      fpu, NULL);
 #endif
 
 	return fpvalid;
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index b9dbd2d3b4a..b7aa09235b5 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/io.h>
+#include <asm/syscalls.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
@@ -395,6 +396,7 @@ ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
 int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 {
 	struct pt_regs regs;
+	int pid;
 
 	memset(&regs, 0, sizeof(regs));
 	regs.regs[2] = (unsigned long)arg;
@@ -403,8 +405,13 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	regs.pc = (unsigned long)kernel_thread_helper;
 	regs.sr = (1 << 30);
 
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-		       &regs, 0, NULL, NULL);
+	/* Ok, create the new process.. */
+	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
+		      &regs, 0, NULL, NULL);
+
+	trace_mark(kernel_arch_kthread_create, "pid %d fn %p", pid, fn);
+
+	return pid;
 }
 
 /*
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 035cb300d3d..29ca09d24ef 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -1,12 +1,14 @@
 /*
- * linux/arch/sh/kernel/ptrace.c
+ * SuperH process tracing
  *
- * Original x86 implementation:
- *	By Ross Biro 1/23/92
- *	edited by Linus Torvalds
+ * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
+ * Copyright (C) 2002 - 2008  Paul Mundt
  *
- * SuperH version:   Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
- * Audit support: Yuichi Nakamura <ynakam@hitachisoft.jp>
+ * Audit support by Yuichi Nakamura <ynakam@hitachisoft.jp>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -22,16 +24,15 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/tracehook.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
+#include <asm/syscalls.h>
+#include <asm/fpu.h>
 
 /*
  * This routine will get a word off of the process kernel stack.
@@ -61,16 +62,12 @@ static inline int put_stack_long(struct task_struct *task, int offset,
 
 void user_enable_single_step(struct task_struct *child)
 {
-	struct pt_regs *regs = task_pt_regs(child);
-	long pc;
-
-	pc = get_stack_long(child, (long)&regs->pc);
-
 	/* Next scheduling will set up UBC */
 	if (child->thread.ubc_pc == 0)
 		ubc_usercnt += 1;
 
-	child->thread.ubc_pc = pc;
+	child->thread.ubc_pc = get_stack_long(child,
+				offsetof(struct pt_regs, pc));
 
 	set_tsk_thread_flag(child, TIF_SINGLESTEP);
 }
@@ -102,9 +99,213 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs->regs,
+				  0, 16 * sizeof(unsigned long));
+	if (!ret)
+		/* PC, PR, SR, GBR, MACH, MACL, TRA */
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &regs->pc,
+					  offsetof(struct pt_regs, pc),
+					  sizeof(struct pt_regs));
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       sizeof(struct pt_regs), -1);
+
+	return ret;
+}
+
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->regs,
+				 0, 16 * sizeof(unsigned long));
+	if (!ret && count > 0)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &regs->pc,
+					 offsetof(struct pt_regs, pc),
+					 sizeof(struct pt_regs));
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						sizeof(struct pt_regs), -1);
+
+	return ret;
+}
+
+#ifdef CONFIG_SH_FPU
+int fpregs_get(struct task_struct *target,
+	       const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	ret = init_fpu(target);
+	if (ret)
+		return ret;
+
+	if ((boot_cpu_data.flags & CPU_HAS_FPU))
+		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					   &target->thread.fpu.hard, 0, -1);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &target->thread.fpu.soft, 0, -1);
+}
+
+static int fpregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	ret = init_fpu(target);
+	if (ret)
+		return ret;
+
+	set_stopped_child_used_math(target);
+
+	if ((boot_cpu_data.flags & CPU_HAS_FPU))
+		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					  &target->thread.fpu.hard, 0, -1);
+
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpu.soft, 0, -1);
+}
+
+static int fpregs_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	return tsk_used_math(target) ? regset->n : 0;
+}
+#endif
+
+#ifdef CONFIG_SH_DSP
+static int dspregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct pt_dspregs *regs = task_pt_dspregs(target);
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs,
+				  0, sizeof(struct pt_dspregs));
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       sizeof(struct pt_dspregs), -1);
+
+	return ret;
+}
+
+static int dspregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct pt_dspregs *regs = task_pt_dspregs(target);
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs,
+				 0, sizeof(struct pt_dspregs));
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						sizeof(struct pt_dspregs), -1);
+
+	return ret;
+}
+
+static int dspregs_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+
+	return regs->sr & SR_DSP ? regset->n : 0;
+}
+#endif
+
+/*
+ * These are our native regset flavours.
+ */
+enum sh_regset {
+	REGSET_GENERAL,
+#ifdef CONFIG_SH_FPU
+	REGSET_FPU,
+#endif
+#ifdef CONFIG_SH_DSP
+	REGSET_DSP,
+#endif
+};
+
+static const struct user_regset sh_regsets[] = {
+	/*
+	 * Format is:
+	 *	R0 --> R15
+	 *	PC, PR, SR, GBR, MACH, MACL, TRA
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type	= NT_PRSTATUS,
+		.n		= ELF_NGREG,
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= genregs_get,
+		.set		= genregs_set,
+	},
+
+#ifdef CONFIG_SH_FPU
+	[REGSET_FPU] = {
+		.core_note_type	= NT_PRFPREG,
+		.n		= sizeof(struct user_fpu_struct) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= fpregs_get,
+		.set		= fpregs_set,
+		.active		= fpregs_active,
+	},
+#endif
+
+#ifdef CONFIG_SH_DSP
+	[REGSET_DSP] = {
+		.n		= sizeof(struct pt_dspregs) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= dspregs_get,
+		.set		= dspregs_set,
+		.active		= dspregs_active,
+	},
+#endif
+};
+
+static const struct user_regset_view user_sh_native_view = {
+	.name		= "sh",
+	.e_machine	= EM_SH,
+	.regsets	= sh_regsets,
+	.n		= ARRAY_SIZE(sh_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_sh_native_view;
+}
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	struct user * dummy = NULL;
+	unsigned long __user *datap = (unsigned long __user *)data;
 	int ret;
 
 	switch (request) {
@@ -133,7 +334,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			tmp = !!tsk_used_math(child);
 		else
 			tmp = 0;
-		ret = put_user(tmp, (unsigned long __user *)data);
+		ret = put_user(tmp, datap);
 		break;
 	}
 
@@ -157,34 +358,39 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		}
 		break;
 
+	case PTRACE_GETREGS:
+		return copy_regset_to_user(child, &user_sh_native_view,
+					   REGSET_GENERAL,
+					   0, sizeof(struct pt_regs),
+					   (void __user *)data);
+	case PTRACE_SETREGS:
+		return copy_regset_from_user(child, &user_sh_native_view,
+					     REGSET_GENERAL,
+					     0, sizeof(struct pt_regs),
+					     (const void __user *)data);
+#ifdef CONFIG_SH_FPU
+	case PTRACE_GETFPREGS:
+		return copy_regset_to_user(child, &user_sh_native_view,
+					   REGSET_FPU,
+					   0, sizeof(struct user_fpu_struct),
+					   (void __user *)data);
+	case PTRACE_SETFPREGS:
+		return copy_regset_from_user(child, &user_sh_native_view,
+					     REGSET_FPU,
+					     0, sizeof(struct user_fpu_struct),
+					     (const void __user *)data);
+#endif
 #ifdef CONFIG_SH_DSP
-	case PTRACE_GETDSPREGS: {
-		unsigned long dp;
-
-		ret = -EIO;
-		dp = ((unsigned long) child) + THREAD_SIZE -
-			 sizeof(struct pt_dspregs);
-		if (*((int *) (dp - 4)) == SR_FD) {
-			copy_to_user((void *)addr, (void *) dp,
-				sizeof(struct pt_dspregs));
-			ret = 0;
-		}
-		break;
-	}
-
-	case PTRACE_SETDSPREGS: {
-		unsigned long dp;
-
-		ret = -EIO;
-		dp = ((unsigned long) child) + THREAD_SIZE -
-			 sizeof(struct pt_dspregs);
-		if (*((int *) (dp - 4)) == SR_FD) {
-			copy_from_user((void *) dp, (void *)addr,
-				sizeof(struct pt_dspregs));
-			ret = 0;
-		}
-		break;
-	}
+	case PTRACE_GETDSPREGS:
+		return copy_regset_to_user(child, &user_sh_native_view,
+					   REGSET_DSP,
+					   0, sizeof(struct pt_dspregs),
+					   (void __user *)data);
+	case PTRACE_SETDSPREGS:
+		return copy_regset_from_user(child, &user_sh_native_view,
+					     REGSET_DSP,
+					     0, sizeof(struct pt_dspregs),
+					     (const void __user *)data);
 #endif
 #ifdef CONFIG_BINFMT_ELF_FDPIC
 	case PTRACE_GETFDPIC: {
@@ -202,7 +408,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		}
 
 		ret = 0;
-		if (put_user(tmp, (unsigned long *) data)) {
+		if (put_user(tmp, datap)) {
 			ret = -EFAULT;
 			break;
 		}
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index 9c6424892bd..e15b099c1f0 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -35,6 +35,7 @@
 #include <asm/system.h>
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
+#include <asm/syscalls.h>
 #include <asm/fpu.h>
 
 /* This mask defines the bits of the SR which the user is not allowed to
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index de832056bf1..e7152cc6930 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -26,6 +26,9 @@
 #include <linux/err.h>
 #include <linux/debugfs.h>
 #include <linux/crash_dump.h>
+#include <linux/mmzone.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/page.h>
@@ -144,6 +147,7 @@ static void __init reserve_crashkernel(void)
 {
 	unsigned long long free_mem;
 	unsigned long long crash_size, crash_base;
+	void *vp;
 	int ret;
 
 	free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
@@ -152,12 +156,14 @@ static void __init reserve_crashkernel(void)
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size) {
 		if (crash_base <= 0) {
-			printk(KERN_INFO "crashkernel reservation failed - "
-					"you have to specify a base address\n");
-			return;
-		}
-
-		if (reserve_bootmem(crash_base, crash_size,
+			vp = alloc_bootmem_nopanic(crash_size); 
+			if (!vp) {
+				printk(KERN_INFO "crashkernel allocation "
+				       "failed\n");
+				return;
+			}
+			crash_base = __pa(vp);
+		} else if (reserve_bootmem(crash_base, crash_size,
 					BOOTMEM_EXCLUSIVE) < 0) {
 			printk(KERN_INFO "crashkernel reservation failed - "
 					"memory is in use\n");
@@ -179,6 +185,24 @@ static inline void __init reserve_crashkernel(void)
 {}
 #endif
 
+#ifndef CONFIG_GENERIC_CALIBRATE_DELAY
+void __cpuinit calibrate_delay(void)
+{
+	struct clk *clk = clk_get(NULL, "cpu_clk");
+
+	if (IS_ERR(clk))
+		panic("Need a sane CPU clock definition!");
+
+	loops_per_jiffy = (clk_get_rate(clk) >> 1) / HZ;
+
+	printk(KERN_INFO "Calibrating delay loop (skipped)... "
+			 "%lu.%02lu BogoMIPS PRESET (lpj=%lu)\n",
+			 loops_per_jiffy/(500000/HZ),
+			 (loops_per_jiffy/(5000/HZ)) % 100,
+			 loops_per_jiffy);
+}
+#endif
+
 void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
 						unsigned long end_pfn)
 {
@@ -232,15 +256,17 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
 	 * case of us accidentally initializing the bootmem allocator with
 	 * an invalid RAM area.
 	 */
-	reserve_bootmem(__MEMORY_START+PAGE_SIZE,
-		(PFN_PHYS(free_pfn)+bootmap_size+PAGE_SIZE-1)-__MEMORY_START,
-		BOOTMEM_DEFAULT);
+	reserve_bootmem(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
+			(PFN_PHYS(free_pfn) + bootmap_size + PAGE_SIZE - 1) -
+			(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET),
+			BOOTMEM_DEFAULT);
 
 	/*
 	 * reserve physical page 0 - it's a special BIOS page on many boxes,
 	 * enabling clean reboots, SMP operation, laptop functions.
 	 */
-	reserve_bootmem(__MEMORY_START, PAGE_SIZE, BOOTMEM_DEFAULT);
+	reserve_bootmem(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET,
+			BOOTMEM_DEFAULT);
 
 	sparse_memory_present_with_active_regions(0);
 
@@ -248,17 +274,18 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
 	ROOT_DEV = Root_RAM0;
 
 	if (LOADER_TYPE && INITRD_START) {
-		if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
-			reserve_bootmem(INITRD_START + __MEMORY_START,
-					INITRD_SIZE, BOOTMEM_DEFAULT);
-			initrd_start = INITRD_START + PAGE_OFFSET +
-					__MEMORY_START;
+		unsigned long initrd_start_phys = INITRD_START + __MEMORY_START;
+
+		if (initrd_start_phys + INITRD_SIZE <= PFN_PHYS(max_low_pfn)) {
+			reserve_bootmem(initrd_start_phys, INITRD_SIZE,
+					BOOTMEM_DEFAULT);
+			initrd_start = (unsigned long)__va(initrd_start_phys);
 			initrd_end = initrd_start + INITRD_SIZE;
 		} else {
 			printk("initrd extends beyond end of memory "
-			    "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-				    INITRD_START + INITRD_SIZE,
-				    max_low_pfn << PAGE_SHIFT);
+			       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+			       initrd_start_phys + INITRD_SIZE,
+			       (unsigned long)PFN_PHYS(max_low_pfn));
 			initrd_start = 0;
 		}
 	}
@@ -530,6 +557,8 @@ struct dentry *sh_debugfs_root;
 static int __init sh_debugfs_init(void)
 {
 	sh_debugfs_root = debugfs_create_dir("sh", NULL);
+	if (!sh_debugfs_root)
+		return -ENOMEM;
 	if (IS_ERR(sh_debugfs_root))
 		return PTR_ERR(sh_debugfs_root);
 
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 6e1b1c27165..d917b7b4042 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -16,6 +16,7 @@
 #include <asm/delay.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
+#include <asm/ftrace.h>
 
 extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
 extern struct hw_interrupt_type no_irq_type;
@@ -133,6 +134,9 @@ EXPORT_SYMBOL(__flush_purge_region);
 EXPORT_SYMBOL(clear_user_page);
 #endif
 
+#ifdef CONFIG_FTRACE
+EXPORT_SYMBOL(mcount);
+#endif
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy_generic);
 #ifdef CONFIG_IPV6
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 51689d29ad4..69d09c0b349 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -30,6 +30,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
+#include <asm/syscalls.h>
 #include <asm/fpu.h>
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -215,6 +216,9 @@ asmlinkage int sys_sigreturn(unsigned long r4, unsigned long r5,
 	sigset_t set;
 	int r0;
 
+        /* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 
@@ -247,9 +251,11 @@ asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5,
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
 	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)regs->regs[15];
 	sigset_t set;
-	stack_t st;
 	int r0;
 
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 
@@ -265,11 +271,9 @@ asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5,
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0))
 		goto badframe;
 
-	if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL,
+			   regs->regs[15]) == -EFAULT)
 		goto badframe;
-	/* It is more difficult to avoid calling this function than to
-	   call it and ignore errors.  */
-	do_sigaltstack((const stack_t __user *)&st, NULL, (unsigned long)frame);
 
 	return r0;
 
@@ -429,7 +433,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(NULL, &frame->uc.uc_link);
 	err |= __put_user((void *)current->sas_ss_sp,
 			  &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->regs[15]),
@@ -492,37 +496,43 @@ give_sigsegv:
 	return -EFAULT;
 }
 
+static inline void
+handle_syscall_restart(unsigned long save_r0, struct pt_regs *regs,
+		       struct sigaction *sa)
+{
+	/* If we're not from a syscall, bail out */
+	if (regs->tra < 0)
+		return;
+
+	/* check for system call restart.. */
+	switch (regs->regs[0]) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+		no_system_call_restart:
+			regs->regs[0] = -EINTR;
+			regs->sr |= 1;
+			break;
+
+		case -ERESTARTSYS:
+			if (!(sa->sa_flags & SA_RESTART))
+				goto no_system_call_restart;
+		/* fallthrough */
+		case -ERESTARTNOINTR:
+			regs->regs[0] = save_r0;
+			regs->pc -= instruction_size(ctrl_inw(regs->pc - 4));
+			break;
+	}
+}
+
 /*
  * OK, we're invoking a handler
  */
-
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	      sigset_t *oldset, struct pt_regs *regs, unsigned int save_r0)
 {
 	int ret;
 
-	/* Are we from a system call? */
-	if (regs->tra >= 0) {
-		/* If so, check system call restarting.. */
-		switch (regs->regs[0]) {
-			case -ERESTART_RESTARTBLOCK:
-			case -ERESTARTNOHAND:
-			no_system_call_restart:
-				regs->regs[0] = -EINTR;
-				break;
-
-			case -ERESTARTSYS:
-				if (!(ka->sa.sa_flags & SA_RESTART))
-					goto no_system_call_restart;
-			/* fallthrough */
-			case -ERESTARTNOINTR:
-				regs->regs[0] = save_r0;
-				regs->pc -= instruction_size(
-						ctrl_inw(regs->pc - 4));
-				break;
-		}
-	}
 
 	/* Set up the stack frame */
 	if (ka->sa.sa_flags & SA_SIGINFO)
@@ -580,6 +590,9 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
+		if (regs->sr & 1)
+			handle_syscall_restart(save_r0, regs, &ka.sa);
+
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(signr, &ka, &info, oldset,
 				  regs, save_r0) == 0) {
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 1d62dfef77f..ce3e851dffc 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -43,6 +43,10 @@
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
+static void
+handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
+		sigset_t *oldset, struct pt_regs * regs);
+
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -371,6 +375,9 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3,
 	sigset_t set;
 	long long ret;
 
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 
@@ -408,6 +415,9 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3,
 	stack_t __user st;
 	long long ret;
 
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 
@@ -535,7 +545,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
 		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -550,10 +560,10 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 		 */
 		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
 		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 
 		if (__copy_to_user(frame->retcode,
-			(unsigned long long)sa_default_restorer & (~1), 16) != 0)
+			(void *)((unsigned long)sa_default_restorer & (~1)), 16) != 0)
 			goto give_sigsegv;
 
 		/* Cohere the trampoline with the I-cache. */
@@ -566,7 +576,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 	 */
 	regs->regs[REG_SP] = (unsigned long) frame;
 	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
-        		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 
         /* FIXME:
@@ -652,7 +662,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
 		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -668,10 +678,10 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
 		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
-        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 
 		if (__copy_to_user(frame->retcode,
-			(unsigned long long)sa_default_rt_restorer & (~1), 16) != 0)
+			(void *)((unsigned long)sa_default_rt_restorer & (~1)), 16) != 0)
 			goto give_sigsegv;
 
 		flush_icache_range(DEREF_REG_PR-1, DEREF_REG_PR-1+15);
@@ -683,7 +693,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 */
 	regs->regs[REG_SP] = (unsigned long) frame;
 	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
-        		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->info;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 001778f9ada..508dfb02362 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -3,7 +3,7 @@
  *
  * SMP support for the SuperH processors.
  *
- * Copyright (C) 2002 - 2007 Paul Mundt
+ * Copyright (C) 2002 - 2008 Paul Mundt
  * Copyright (C) 2006 - 2007 Akio Idehara
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -86,9 +86,12 @@ asmlinkage void __cpuinit start_secondary(void)
 
 	local_irq_enable();
 
+	cpu = smp_processor_id();
+
+	/* Enable local timers */
+	local_timer_setup(cpu);
 	calibrate_delay();
 
-	cpu = smp_processor_id();
 	smp_store_cpu_info(cpu);
 
 	cpu_set(cpu, cpu_online_map);
@@ -186,6 +189,42 @@ void arch_send_call_function_single_ipi(int cpu)
 	plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
 }
 
+void smp_timer_broadcast(cpumask_t mask)
+{
+	int cpu;
+
+	for_each_cpu_mask(cpu, mask)
+		plat_send_ipi(cpu, SMP_MSG_TIMER);
+}
+
+static void ipi_timer(void)
+{
+	irq_enter();
+	local_timer_interrupt();
+	irq_exit();
+}
+
+void smp_message_recv(unsigned int msg)
+{
+	switch (msg) {
+	case SMP_MSG_FUNCTION:
+		generic_smp_call_function_interrupt();
+		break;
+	case SMP_MSG_RESCHEDULE:
+		break;
+	case SMP_MSG_FUNCTION_SINGLE:
+		generic_smp_call_function_single_interrupt();
+		break;
+	case SMP_MSG_TIMER:
+		ipi_timer();
+		break;
+	default:
+		printk(KERN_WARNING "SMP %d: %s(): unknown IPI %d\n",
+		       smp_processor_id(), __func__, msg);
+		break;
+	}
+}
+
 /* Not really SMP stuff ... */
 int setup_profiling_timer(unsigned int multiplier)
 {
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index 54d1f61aa00..1a2a5eb76e4 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -3,7 +3,7 @@
  *
  * Stack trace management functions
  *
- *  Copyright (C) 2006  Paul Mundt
+ *  Copyright (C) 2006 - 2008  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -36,3 +36,24 @@ void save_stack_trace(struct stack_trace *trace)
 	}
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	unsigned long *sp = (unsigned long *)tsk->thread.sp;
+
+	while (!kstack_end(sp)) {
+		unsigned long addr = *sp++;
+
+		if (__kernel_text_address(addr)) {
+			if (in_sched_functions(addr))
+				break;
+			if (trace->skip > 0)
+				trace->skip--;
+			else
+				trace->entries[trace->nr_entries++] = addr;
+			if (trace->nr_entries >= trace->max_entries)
+				break;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 9061b86d73f..38f098c9c72 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -23,6 +23,7 @@
 #include <linux/fs.h>
 #include <linux/ipc.h>
 #include <asm/cacheflush.h>
+#include <asm/syscalls.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
@@ -170,6 +171,8 @@ asmlinkage int sys_ipc(uint call, int first, int second,
 	version = call >> 16; /* hack for backward compatibility */
 	call &= 0xffff;
 
+	trace_mark(kernel_arch_ipc_call, "call %u first %d", call, first);
+
 	if (call <= SEMTIMEDOP)
 		switch (call) {
 		case SEMOP:
@@ -186,7 +189,7 @@ asmlinkage int sys_ipc(uint call, int first, int second,
 			union semun fourth;
 			if (!ptr)
 				return -EINVAL;
-			if (get_user(fourth.__pad, (void * __user *) ptr))
+			if (get_user(fourth.__pad, (void __user * __user *) ptr))
 				return -EFAULT;
 			return sys_semctl (first, second, third, fourth);
 			}
@@ -261,13 +264,13 @@ asmlinkage int sys_ipc(uint call, int first, int second,
 	return -EINVAL;
 }
 
-asmlinkage int sys_uname(struct old_utsname * name)
+asmlinkage int sys_uname(struct old_utsname __user *name)
 {
 	int err;
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof(*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index f0aa5c39865..dbba1e1833d 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -16,6 +16,7 @@
 #include <asm/cacheflush.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include <asm/syscalls.h>
 
 /*
  * sys_pipe() is the normal C calling standard for creating
@@ -37,13 +38,13 @@ asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
 	return error;
 }
 
-asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char * buf,
+asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char __user *buf,
 			     size_t count, long dummy, loff_t pos)
 {
 	return sys_pread64(fd, buf, count, pos);
 }
 
-asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char * buf,
+asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char __user *buf,
 			      size_t count, long dummy, loff_t pos)
 {
 	return sys_pwrite64(fd, buf, count, pos);
diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index 0758b5ee818..23ca711c27d 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -1,9 +1,9 @@
 /*
- *  arch/sh/kernel/time.c
+ *  arch/sh/kernel/time_32.c
  *
  *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
  *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
- *  Copyright (C) 2002 - 2007  Paul Mundt
+ *  Copyright (C) 2002 - 2008  Paul Mundt
  *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
  *
  *  Some code taken from i386 version.
@@ -16,6 +16,8 @@
 #include <linux/timex.h>
 #include <linux/sched.h>
 #include <linux/clockchips.h>
+#include <linux/mc146818rtc.h>	/* for rtc_lock */
+#include <linux/smp.h>
 #include <asm/clock.h>
 #include <asm/rtc.h>
 #include <asm/timer.h>
@@ -253,6 +255,10 @@ void __init time_init(void)
 	set_normalized_timespec(&wall_to_monotonic,
 				-xtime.tv_sec, -xtime.tv_nsec);
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	local_timer_setup(smp_processor_id());
+#endif
+
 	/*
 	 * Find the timer to use as the system timer, it will be
 	 * initialized for us.
@@ -260,6 +266,7 @@ void __init time_init(void)
 	sys_timer = get_sys_timer();
 	printk(KERN_INFO "Using %s for system timer\n", sys_timer->name);
 
+
 	if (sys_timer->ops->read)
 		clocksource_sh.read = sys_timer->ops->read;
 
diff --git a/arch/sh/kernel/time_64.c b/arch/sh/kernel/time_64.c
index 791edabf7d8..bbb2af1004d 100644
--- a/arch/sh/kernel/time_64.c
+++ b/arch/sh/kernel/time_64.c
@@ -39,6 +39,7 @@
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
+#include <asm/clock.h>
 
 #define TMU_TOCR_INIT	0x00
 #define TMU0_TCR_INIT	0x0020
@@ -51,14 +52,6 @@
 #define RTC_RCR1_CIE	0x10	/* Carry Interrupt Enable */
 #define RTC_RCR1	(rtc_base + 0x38)
 
-/* Clock, Power and Reset Controller */
-#define	CPRC_BLOCK_OFF	0x01010000
-#define CPRC_BASE	PHYS_PERIPHERAL_BLOCK + CPRC_BLOCK_OFF
-
-#define FRQCR		(cprc_base+0x0)
-#define WTCSR		(cprc_base+0x0018)
-#define STBCR		(cprc_base+0x0030)
-
 /* Time Management Unit */
 #define	TMU_BLOCK_OFF	0x01020000
 #define TMU_BASE	PHYS_PERIPHERAL_BLOCK + TMU_BLOCK_OFF
@@ -293,103 +286,17 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-
-static __init unsigned int get_cpu_hz(void)
-{
-	unsigned int count;
-	unsigned long __dummy;
-	unsigned long ctc_val_init, ctc_val;
-
-	/*
-	** Regardless the toolchain, force the compiler to use the
-	** arbitrary register r3 as a clock tick counter.
-	** NOTE: r3 must be in accordance with sh64_rtc_interrupt()
-	*/
-	register unsigned long long  __rtc_irq_flag __asm__ ("r3");
-
-	local_irq_enable();
-	do {} while (ctrl_inb(rtc_base) != 0);
-	ctrl_outb(RTC_RCR1_CIE, RTC_RCR1); /* Enable carry interrupt */
-
-	/*
-	 * r3 is arbitrary. CDC does not support "=z".
-	 */
-	ctc_val_init = 0xffffffff;
-	ctc_val = ctc_val_init;
-
-	asm volatile("gettr	tr0, %1\n\t"
-		     "putcon	%0, " __CTC "\n\t"
-		     "and	%2, r63, %2\n\t"
-		     "pta	$+4, tr0\n\t"
-		     "beq/l	%2, r63, tr0\n\t"
-		     "ptabs	%1, tr0\n\t"
-		     "getcon	" __CTC ", %0\n\t"
-		: "=r"(ctc_val), "=r" (__dummy), "=r" (__rtc_irq_flag)
-		: "0" (0));
-	local_irq_disable();
-	/*
-	 * SH-3:
-	 * CPU clock = 4 stages * loop
-	 * tst    rm,rm      if id ex
-	 * bt/s   1b            if id ex
-	 * add    #1,rd            if id ex
-         *                            (if) pipe line stole
-	 * tst    rm,rm                  if id ex
-         * ....
-	 *
-	 *
-	 * SH-4:
-	 * CPU clock = 6 stages * loop
-	 * I don't know why.
-         * ....
-	 *
-	 * SH-5:
-	 * Use CTC register to count.  This approach returns the right value
-	 * even if the I-cache is disabled (e.g. whilst debugging.)
-	 *
-	 */
-
-	count = ctc_val_init - ctc_val; /* CTC counts down */
-
-	/*
-	 * This really is count by the number of clock cycles
-         * by the ratio between a complete R64CNT
-         * wrap-around (128) and CUI interrupt being raised (64).
-	 */
-	return count*2;
-}
-
-static irqreturn_t sh64_rtc_interrupt(int irq, void *dev_id)
-{
-	struct pt_regs *regs = get_irq_regs();
-
-	ctrl_outb(0, RTC_RCR1);	/* Disable Carry Interrupts */
-	regs->regs[3] = 1;	/* Using r3 */
-
-	return IRQ_HANDLED;
-}
-
 static struct irqaction irq0  = {
 	.handler = timer_interrupt,
 	.flags = IRQF_DISABLED,
 	.mask = CPU_MASK_NONE,
 	.name = "timer",
 };
-static struct irqaction irq1  = {
-	.handler = sh64_rtc_interrupt,
-	.flags = IRQF_DISABLED,
-	.mask = CPU_MASK_NONE,
-	.name = "rtc",
-};
 
 void __init time_init(void)
 {
-	unsigned int cpu_clock, master_clock, bus_clock, module_clock;
 	unsigned long interval;
-	unsigned long frqcr, ifc, pfc;
-	static int ifc_table[] = { 2, 4, 6, 8, 10, 12, 16, 24 };
-#define bfc_table ifc_table	/* Same */
-#define pfc_table ifc_table	/* Same */
+	struct clk *clk;
 
 	tmu_base = onchip_remap(TMU_BASE, 1024, "TMU");
 	if (!tmu_base) {
@@ -401,50 +308,19 @@ void __init time_init(void)
 		panic("Unable to remap RTC\n");
 	}
 
-	cprc_base = onchip_remap(CPRC_BASE, 1024, "CPRC");
-	if (!cprc_base) {
-		panic("Unable to remap CPRC\n");
-	}
+	clk = clk_get(NULL, "cpu_clk");
+	scaled_recip_ctc_ticks_per_jiffy = ((1ULL << CTC_JIFFY_SCALE_SHIFT) /
+			(unsigned long long)(clk_get_rate(clk) / HZ));
 
 	rtc_sh_get_time(&xtime);
 
 	setup_irq(TIMER_IRQ, &irq0);
-	setup_irq(RTC_IRQ, &irq1);
-
-	/* Check how fast it is.. */
-	cpu_clock = get_cpu_hz();
-
-	/* Note careful order of operations to maintain reasonable precision and avoid overflow. */
-	scaled_recip_ctc_ticks_per_jiffy = ((1ULL << CTC_JIFFY_SCALE_SHIFT) / (unsigned long long)(cpu_clock / HZ));
-
-	free_irq(RTC_IRQ, NULL);
-
-	printk("CPU clock: %d.%02dMHz\n",
-	       (cpu_clock / 1000000), (cpu_clock % 1000000)/10000);
-	{
-		unsigned short bfc;
-		frqcr = ctrl_inl(FRQCR);
-		ifc  = ifc_table[(frqcr>> 6) & 0x0007];
-		bfc  = bfc_table[(frqcr>> 3) & 0x0007];
-		pfc  = pfc_table[(frqcr>> 12) & 0x0007];
-		master_clock = cpu_clock * ifc;
-		bus_clock = master_clock/bfc;
-	}
 
-	printk("Bus clock: %d.%02dMHz\n",
-	       (bus_clock/1000000), (bus_clock % 1000000)/10000);
-	module_clock = master_clock/pfc;
-	printk("Module clock: %d.%02dMHz\n",
-	       (module_clock/1000000), (module_clock % 1000000)/10000);
-	interval = (module_clock/(HZ*4));
+	clk = clk_get(NULL, "module_clk");
+	interval = (clk_get_rate(clk)/(HZ*4));
 
 	printk("Interval = %ld\n", interval);
 
-	current_cpu_data.cpu_clock    = cpu_clock;
-	current_cpu_data.master_clock = master_clock;
-	current_cpu_data.bus_clock    = bus_clock;
-	current_cpu_data.module_clock = module_clock;
-
 	/* Start TMU0 */
 	ctrl_outb(TMU_TSTR_OFF, TMU_TSTR);
 	ctrl_outb(TMU_TOCR_INIT, TMU_TOCR);
@@ -454,36 +330,6 @@ void __init time_init(void)
 	ctrl_outb(TMU_TSTR_INIT, TMU_TSTR);
 }
 
-void enter_deep_standby(void)
-{
-	/* Disable watchdog timer */
-	ctrl_outl(0xa5000000, WTCSR);
-	/* Configure deep standby on sleep */
-	ctrl_outl(0x03, STBCR);
-
-#ifdef CONFIG_SH_ALPHANUMERIC
-	{
-		extern void mach_alphanum(int position, unsigned char value);
-		extern void mach_alphanum_brightness(int setting);
-		char halted[] = "Halted. ";
-		int i;
-		mach_alphanum_brightness(6); /* dimmest setting above off */
-		for (i=0; i<8; i++) {
-			mach_alphanum(i, halted[i]);
-		}
-		asm __volatile__ ("synco");
-	}
-#endif
-
-	asm __volatile__ ("sleep");
-	asm __volatile__ ("synci");
-	asm __volatile__ ("nop");
-	asm __volatile__ ("nop");
-	asm __volatile__ ("nop");
-	asm __volatile__ ("nop");
-	panic("Unexpected wakeup!\n");
-}
-
 static struct resource rtc_resources[] = {
 	[0] = {
 		/* RTC base, filled in by rtc_init */
diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile
index bcf244ff6a1..0b7f8577193 100644
--- a/arch/sh/kernel/timers/Makefile
+++ b/arch/sh/kernel/timers/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_SH_TMU)		+= timer-tmu.o
 obj-$(CONFIG_SH_MTU2)		+= timer-mtu2.o
 obj-$(CONFIG_SH_CMT)		+= timer-cmt.o
 
+obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= timer-broadcast.o
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c
new file mode 100644
index 00000000000..c2317635230
--- /dev/null
+++ b/arch/sh/kernel/timers/timer-broadcast.c
@@ -0,0 +1,57 @@
+/*
+ * Dummy local timer
+ *
+ * Copyright (C) 2008  Paul Mundt
+ *
+ * cloned from:
+ *
+ *  linux/arch/arm/mach-realview/localtimer.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/smp.h>
+#include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/clockchips.h>
+#include <linux/irq.h>
+
+static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
+
+/*
+ * Used on SMP for either the local timer or SMP_MSG_TIMER
+ */
+void local_timer_interrupt(void)
+{
+	struct clock_event_device *clk = &__get_cpu_var(local_clockevent);
+
+	clk->event_handler(clk);
+}
+
+static void dummy_timer_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *clk)
+{
+}
+
+void __cpuinit local_timer_setup(unsigned int cpu)
+{
+	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+
+	clk->name		= "dummy_timer";
+	clk->features		= CLOCK_EVT_FEAT_DUMMY;
+	clk->rating		= 200;
+	clk->mult		= 1;
+	clk->set_mode		= dummy_timer_set_mode;
+	clk->broadcast		= smp_timer_broadcast;
+	clk->cpumask		= cpumask_of_cpu(cpu);
+
+	clockevents_register_device(clk);
+}
diff --git a/arch/sh/kernel/timers/timer-cmt.c b/arch/sh/kernel/timers/timer-cmt.c
index d20c8c37588..c127293271e 100644
--- a/arch/sh/kernel/timers/timer-cmt.c
+++ b/arch/sh/kernel/timers/timer-cmt.c
@@ -174,7 +174,7 @@ static int cmt_timer_init(void)
 	return 0;
 }
 
-struct sys_timer_ops cmt_timer_ops = {
+static struct sys_timer_ops cmt_timer_ops = {
 	.init		= cmt_timer_init,
 	.start		= cmt_timer_start,
 	.stop		= cmt_timer_stop,
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 1ca9ad49b54..aaaf90d06b8 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -28,43 +28,90 @@
 #define TMU_TOCR_INIT	0x00
 #define TMU_TCR_INIT	0x0020
 
-static int tmu_timer_start(void)
+#define TMU0		(0)
+#define TMU1		(1)
+
+static inline void _tmu_start(int tmu_num)
 {
-	ctrl_outb(ctrl_inb(TMU_012_TSTR) | 0x3, TMU_012_TSTR);
-	return 0;
+	ctrl_outb(ctrl_inb(TMU_012_TSTR) | (0x1<<tmu_num), TMU_012_TSTR);
 }
 
-static void tmu0_timer_set_interval(unsigned long interval, unsigned int reload)
+static inline void _tmu_set_irq(int tmu_num, int enabled)
 {
-	ctrl_outl(interval, TMU0_TCNT);
+	register unsigned long tmu_tcr = TMU0_TCR + (0xc*tmu_num);
+	ctrl_outw( (enabled ? ctrl_inw(tmu_tcr) | (1<<5) : ctrl_inw(tmu_tcr) & ~(1<<5)), tmu_tcr);
+}
 
-	/*
-	 * TCNT reloads from TCOR on underflow, clear it if we don't
-	 * intend to auto-reload
-	 */
-	if (reload)
-		ctrl_outl(interval, TMU0_TCOR);
-	else
-		ctrl_outl(0, TMU0_TCOR);
+static inline void _tmu_stop(int tmu_num)
+{
+	ctrl_outb(ctrl_inb(TMU_012_TSTR) & ~(0x1<<tmu_num), TMU_012_TSTR);
+}
+
+static inline void _tmu_clear_status(int tmu_num)
+{
+	register unsigned long tmu_tcr = TMU0_TCR + (0xc*tmu_num);
+	/* Clear UNF bit */
+	ctrl_outw(ctrl_inw(tmu_tcr) & ~0x100, tmu_tcr);
+}
 
-	tmu_timer_start();
+static inline unsigned long _tmu_read(int tmu_num)
+{
+        return ctrl_inl(TMU0_TCNT+0xC*tmu_num);
+}
+
+static int tmu_timer_start(void)
+{
+	_tmu_start(TMU0);
+	_tmu_start(TMU1);
+	_tmu_set_irq(TMU0,1);
+	return 0;
 }
 
 static int tmu_timer_stop(void)
 {
-	ctrl_outb(ctrl_inb(TMU_012_TSTR) & ~0x3, TMU_012_TSTR);
+	_tmu_stop(TMU0);
+	_tmu_stop(TMU1);
+	_tmu_clear_status(TMU0);
 	return 0;
 }
 
+/*
+ * also when the module_clk is scaled the TMU1
+ * will show the same frequency
+ */
+static int tmus_are_scaled;
+
 static cycle_t tmu_timer_read(void)
 {
-	return ~ctrl_inl(TMU1_TCNT);
+	return ((cycle_t)(~_tmu_read(TMU1)))<<tmus_are_scaled;
+}
+
+
+static unsigned long tmu_latest_interval[3];
+static void tmu_timer_set_interval(int tmu_num, unsigned long interval, unsigned int reload)
+{
+	unsigned long tmu_tcnt = TMU0_TCNT + tmu_num*0xC;
+	unsigned long tmu_tcor = TMU0_TCOR + tmu_num*0xC;
+
+	_tmu_stop(tmu_num);
+
+	ctrl_outl(interval, tmu_tcnt);
+	tmu_latest_interval[tmu_num] = interval;
+
+	/*
+	 * TCNT reloads from TCOR on underflow, clear it if we don't
+	 * intend to auto-reload
+	 */
+	ctrl_outl( reload ? interval : 0 , tmu_tcor);
+
+	_tmu_start(tmu_num);
 }
 
 static int tmu_set_next_event(unsigned long cycles,
 			      struct clock_event_device *evt)
 {
-	tmu0_timer_set_interval(cycles, 1);
+	tmu_timer_set_interval(TMU0,cycles, evt->mode == CLOCK_EVT_MODE_PERIODIC);
+	_tmu_set_irq(TMU0,1);
 	return 0;
 }
 
@@ -96,12 +143,8 @@ static struct clock_event_device tmu0_clockevent = {
 static irqreturn_t tmu_timer_interrupt(int irq, void *dummy)
 {
 	struct clock_event_device *evt = &tmu0_clockevent;
-	unsigned long timer_status;
-
-	/* Clear UNF bit */
-	timer_status = ctrl_inw(TMU0_TCR);
-	timer_status &= ~0x100;
-	ctrl_outw(timer_status, TMU0_TCR);
+	_tmu_clear_status(TMU0);
+	_tmu_set_irq(TMU0,tmu0_clockevent.mode != CLOCK_EVT_MODE_ONESHOT);
 
 	evt->event_handler(evt);
 
@@ -109,56 +152,73 @@ static irqreturn_t tmu_timer_interrupt(int irq, void *dummy)
 }
 
 static struct irqaction tmu0_irq = {
-	.name		= "periodic timer",
+	.name		= "periodic/oneshot timer",
 	.handler	= tmu_timer_interrupt,
 	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
 	.mask		= CPU_MASK_NONE,
 };
 
-static void tmu0_clk_init(struct clk *clk)
+static void __init tmu_clk_init(struct clk *clk)
 {
-	u8 divisor = TMU_TCR_INIT & 0x7;
-	ctrl_outw(TMU_TCR_INIT, TMU0_TCR);
-	clk->rate = clk->parent->rate / (4 << (divisor << 1));
+	u8 divisor  = TMU_TCR_INIT & 0x7;
+	int tmu_num = clk->name[3]-'0';
+	ctrl_outw(TMU_TCR_INIT, TMU0_TCR+(tmu_num*0xC));
+	clk->rate = clk_get_rate(clk->parent) / (4 << (divisor << 1));
 }
 
-static void tmu0_clk_recalc(struct clk *clk)
+static void tmu_clk_recalc(struct clk *clk)
 {
-	u8 divisor = ctrl_inw(TMU0_TCR) & 0x7;
-	clk->rate = clk->parent->rate / (4 << (divisor << 1));
-}
+	int tmu_num = clk->name[3]-'0';
+	unsigned long prev_rate = clk_get_rate(clk);
+	unsigned long flags;
+	u8 divisor = ctrl_inw(TMU0_TCR+tmu_num*0xC) & 0x7;
+	clk->rate  = clk_get_rate(clk->parent) / (4 << (divisor << 1));
 
-static struct clk_ops tmu0_clk_ops = {
-	.init		= tmu0_clk_init,
-	.recalc		= tmu0_clk_recalc,
-};
+	if(prev_rate==clk_get_rate(clk))
+		return;
 
-static struct clk tmu0_clk = {
-	.name		= "tmu0_clk",
-	.ops		= &tmu0_clk_ops,
-};
+	if(tmu_num)
+		return; /* No more work on TMU1 */
 
-static void tmu1_clk_init(struct clk *clk)
-{
-	u8 divisor = TMU_TCR_INIT & 0x7;
-	ctrl_outw(divisor, TMU1_TCR);
-	clk->rate = clk->parent->rate / (4 << (divisor << 1));
-}
+	local_irq_save(flags);
+	tmus_are_scaled = (prev_rate > clk->rate);
 
-static void tmu1_clk_recalc(struct clk *clk)
-{
-	u8 divisor = ctrl_inw(TMU1_TCR) & 0x7;
-	clk->rate = clk->parent->rate / (4 << (divisor << 1));
+	_tmu_stop(TMU0);
+
+	tmu0_clockevent.mult = div_sc(clk->rate, NSEC_PER_SEC,
+				tmu0_clockevent.shift);
+	tmu0_clockevent.max_delta_ns =
+			clockevent_delta2ns(-1, &tmu0_clockevent);
+	tmu0_clockevent.min_delta_ns =
+			clockevent_delta2ns(1, &tmu0_clockevent);
+
+	if (tmus_are_scaled)
+		tmu_latest_interval[TMU0] >>= 1;
+	else
+		tmu_latest_interval[TMU0] <<= 1;
+
+	tmu_timer_set_interval(TMU0,
+		tmu_latest_interval[TMU0],
+		tmu0_clockevent.mode == CLOCK_EVT_MODE_PERIODIC);
+
+	_tmu_start(TMU0);
+
+	local_irq_restore(flags);
 }
 
-static struct clk_ops tmu1_clk_ops = {
-	.init		= tmu1_clk_init,
-	.recalc		= tmu1_clk_recalc,
+static struct clk_ops tmu_clk_ops = {
+	.init		= tmu_clk_init,
+	.recalc		= tmu_clk_recalc,
+};
+
+static struct clk tmu0_clk = {
+	.name		= "tmu0_clk",
+	.ops		= &tmu_clk_ops,
 };
 
 static struct clk tmu1_clk = {
 	.name		= "tmu1_clk",
-	.ops		= &tmu1_clk_ops,
+	.ops		= &tmu_clk_ops,
 };
 
 static int tmu_timer_init(void)
@@ -189,11 +249,12 @@ static int tmu_timer_init(void)
 	frequency = clk_get_rate(&tmu0_clk);
 	interval = (frequency + HZ / 2) / HZ;
 
-	sh_hpt_frequency = clk_get_rate(&tmu1_clk);
-	ctrl_outl(~0, TMU1_TCNT);
-	ctrl_outl(~0, TMU1_TCOR);
+	tmu_timer_set_interval(TMU0,interval, 1);
+	tmu_timer_set_interval(TMU1,~0,1);
 
-	tmu0_timer_set_interval(interval, 1);
+	_tmu_start(TMU1);
+
+	sh_hpt_frequency = clk_get_rate(&tmu1_clk);
 
 	tmu0_clockevent.mult = div_sc(frequency, NSEC_PER_SEC,
 				      tmu0_clockevent.shift);
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 511a9426cec..b359b08a8e3 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -26,6 +26,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/fpu.h>
+#include <asm/kprobes.h>
 
 #ifdef CONFIG_SH_KGDB
 #include <asm/kgdb.h>
@@ -192,6 +193,7 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 	int ret, index, count;
 	unsigned long *rm, *rn;
 	unsigned char *src, *dst;
+	unsigned char __user *srcu, *dstu;
 
 	index = (instruction>>8)&15;	/* 0x0F00 */
 	rn = &regs->regs[index];
@@ -206,28 +208,28 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 	case 0: /* mov.[bwl] to/from memory via r0+rn */
 		if (instruction & 8) {
 			/* from memory */
-			src = (unsigned char*) *rm;
-			src += regs->regs[0];
-			dst = (unsigned char*) rn;
-			*(unsigned long*)dst = 0;
+			srcu = (unsigned char __user *)*rm;
+			srcu += regs->regs[0];
+			dst = (unsigned char *)rn;
+			*(unsigned long *)dst = 0;
 
 #if !defined(__LITTLE_ENDIAN__)
 			dst += 4-count;
 #endif
-			if (ma->from(dst, src, count))
+			if (ma->from(dst, srcu, count))
 				goto fetch_fault;
 
 			sign_extend(count, dst);
 		} else {
 			/* to memory */
-			src = (unsigned char*) rm;
+			src = (unsigned char *)rm;
 #if !defined(__LITTLE_ENDIAN__)
 			src += 4-count;
 #endif
-			dst = (unsigned char*) *rn;
-			dst += regs->regs[0];
+			dstu = (unsigned char __user *)*rn;
+			dstu += regs->regs[0];
 
-			if (ma->to(dst, src, count))
+			if (ma->to(dstu, src, count))
 				goto fetch_fault;
 		}
 		ret = 0;
@@ -235,10 +237,10 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 
 	case 1: /* mov.l Rm,@(disp,Rn) */
 		src = (unsigned char*) rm;
-		dst = (unsigned char*) *rn;
-		dst += (instruction&0x000F)<<2;
+		dstu = (unsigned char __user *)*rn;
+		dstu += (instruction&0x000F)<<2;
 
-		if (ma->to(dst, src, 4))
+		if (ma->to(dstu, src, 4))
 			goto fetch_fault;
 		ret = 0;
 		break;
@@ -247,28 +249,28 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 		if (instruction & 4)
 			*rn -= count;
 		src = (unsigned char*) rm;
-		dst = (unsigned char*) *rn;
+		dstu = (unsigned char __user *)*rn;
 #if !defined(__LITTLE_ENDIAN__)
 		src += 4-count;
 #endif
-		if (ma->to(dst, src, count))
+		if (ma->to(dstu, src, count))
 			goto fetch_fault;
 		ret = 0;
 		break;
 
 	case 5: /* mov.l @(disp,Rm),Rn */
-		src = (unsigned char*) *rm;
-		src += (instruction&0x000F)<<2;
-		dst = (unsigned char*) rn;
-		*(unsigned long*)dst = 0;
+		srcu = (unsigned char __user *)*rm;
+		srcu += (instruction & 0x000F) << 2;
+		dst = (unsigned char *)rn;
+		*(unsigned long *)dst = 0;
 
-		if (ma->from(dst, src, 4))
+		if (ma->from(dst, srcu, 4))
 			goto fetch_fault;
 		ret = 0;
 		break;
 
 	case 6:	/* mov.[bwl] from memory, possibly with post-increment */
-		src = (unsigned char*) *rm;
+		srcu = (unsigned char __user *)*rm;
 		if (instruction & 4)
 			*rm += count;
 		dst = (unsigned char*) rn;
@@ -277,7 +279,7 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 #if !defined(__LITTLE_ENDIAN__)
 		dst += 4-count;
 #endif
-		if (ma->from(dst, src, count))
+		if (ma->from(dst, srcu, count))
 			goto fetch_fault;
 		sign_extend(count, dst);
 		ret = 0;
@@ -286,28 +288,28 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
 	case 8:
 		switch ((instruction&0xFF00)>>8) {
 		case 0x81: /* mov.w R0,@(disp,Rn) */
-			src = (unsigned char*) &regs->regs[0];
+			src = (unsigned char *) &regs->regs[0];
 #if !defined(__LITTLE_ENDIAN__)
 			src += 2;
 #endif
-			dst = (unsigned char*) *rm; /* called Rn in the spec */
-			dst += (instruction&0x000F)<<1;
+			dstu = (unsigned char __user *)*rm; /* called Rn in the spec */
+			dstu += (instruction & 0x000F) << 1;
 
-			if (ma->to(dst, src, 2))
+			if (ma->to(dstu, src, 2))
 				goto fetch_fault;
 			ret = 0;
 			break;
 
 		case 0x85: /* mov.w @(disp,Rm),R0 */
-			src = (unsigned char*) *rm;
-			src += (instruction&0x000F)<<1;
-			dst = (unsigned char*) &regs->regs[0];
-			*(unsigned long*)dst = 0;
+			srcu = (unsigned char __user *)*rm;
+			srcu += (instruction & 0x000F) << 1;
+			dst = (unsigned char *) &regs->regs[0];
+			*(unsigned long *)dst = 0;
 
 #if !defined(__LITTLE_ENDIAN__)
 			dst += 2;
 #endif
-			if (ma->from(dst, src, 2))
+			if (ma->from(dst, srcu, 2))
 				goto fetch_fault;
 			sign_extend(2, dst);
 			ret = 0;
@@ -333,7 +335,8 @@ static inline int handle_delayslot(struct pt_regs *regs,
 				   struct mem_access *ma)
 {
 	opcode_t instruction;
-	void *addr = (void *)(regs->pc + instruction_size(old_instruction));
+	void __user *addr = (void __user *)(regs->pc +
+		instruction_size(old_instruction));
 
 	if (copy_from_user(&instruction, addr, sizeof(instruction))) {
 		/* the instruction-fetch faulted */
@@ -511,14 +514,6 @@ int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
 	return ret;
 }
 
-#ifdef CONFIG_CPU_HAS_SR_RB
-#define lookup_exception_vector(x)	\
-	__asm__ __volatile__ ("stc r2_bank, %0\n\t" : "=r" ((x)))
-#else
-#define lookup_exception_vector(x)	\
-	__asm__ __volatile__ ("mov r4, %0\n\t" : "=r" ((x)))
-#endif
-
 /*
  * Handle various address error exceptions:
  *  - instruction address error:
@@ -542,7 +537,7 @@ asmlinkage void do_address_error(struct pt_regs *regs,
 
 	/* Intentional ifdef */
 #ifdef CONFIG_CPU_HAS_SR_RB
-	lookup_exception_vector(error_code);
+	error_code = lookup_exception_vector();
 #endif
 
 	oldfs = get_fs();
@@ -559,7 +554,7 @@ asmlinkage void do_address_error(struct pt_regs *regs,
 		}
 
 		set_fs(USER_DS);
-		if (copy_from_user(&instruction, (void *)(regs->pc),
+		if (copy_from_user(&instruction, (void __user *)(regs->pc),
 				   sizeof(instruction))) {
 			/* Argh. Fault on the instruction itself.
 			   This should never happen non-SMP
@@ -589,7 +584,7 @@ uspace_segv:
 			die("unaligned program counter", regs, error_code);
 
 		set_fs(KERNEL_DS);
-		if (copy_from_user(&instruction, (void *)(regs->pc),
+		if (copy_from_user(&instruction, (void __user *)(regs->pc),
 				   sizeof(instruction))) {
 			/* Argh. Fault on the instruction itself.
 			   This should never happen non-SMP
@@ -683,7 +678,7 @@ asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5,
 	}
 #endif
 
-	lookup_exception_vector(error_code);
+	error_code = lookup_exception_vector();
 
 	local_irq_enable();
 	CHK_REMOTE_DEBUG(regs);
@@ -739,11 +734,13 @@ asmlinkage void do_illegal_slot_inst(unsigned long r4, unsigned long r5,
 				struct pt_regs __regs)
 {
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
-	unsigned long error_code;
+	unsigned long inst;
 	struct task_struct *tsk = current;
-#ifdef CONFIG_SH_FPU_EMU
-	unsigned short inst = 0;
 
+	if (kprobe_handle_illslot(regs->pc) == 0)
+		return;
+
+#ifdef CONFIG_SH_FPU_EMU
 	get_user(inst, (unsigned short *)regs->pc + 1);
 	if (!do_fpu_inst(inst, regs)) {
 		get_user(inst, (unsigned short *)regs->pc);
@@ -754,12 +751,12 @@ asmlinkage void do_illegal_slot_inst(unsigned long r4, unsigned long r5,
 	/* not a FPU inst. */
 #endif
 
-	lookup_exception_vector(error_code);
+	inst = lookup_exception_vector();
 
 	local_irq_enable();
 	CHK_REMOTE_DEBUG(regs);
 	force_sig(SIGILL, tsk);
-	die_if_no_fixup("illegal slot instruction", regs, error_code);
+	die_if_no_fixup("illegal slot instruction", regs, inst);
 }
 
 asmlinkage void do_exception_error(unsigned long r4, unsigned long r5,
@@ -769,7 +766,7 @@ asmlinkage void do_exception_error(unsigned long r4, unsigned long r5,
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
 	long ex;
 
-	lookup_exception_vector(ex);
+	ex = lookup_exception_vector();
 	die_if_kernel("exception", regs, ex);
 }
 
diff --git a/arch/sh/lib/div64-generic.c b/arch/sh/lib/div64-generic.c
index 4bef3b5d964..60e76aa8b53 100644
--- a/arch/sh/lib/div64-generic.c
+++ b/arch/sh/lib/div64-generic.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/types.h>
+#include <asm/div64.h>
 
 extern uint64_t __xdiv64_32(u64 n, u32 d);
 
diff --git a/arch/sh/lib/io.c b/arch/sh/lib/io.c
index 4f54ec43516..88dfe6e396b 100644
--- a/arch/sh/lib/io.c
+++ b/arch/sh/lib/io.c
@@ -14,12 +14,12 @@
 #include <linux/module.h>
 #include <linux/io.h>
 
-void __raw_readsl(unsigned long addr, void *datap, int len)
+void __raw_readsl(const void __iomem *addr, void *datap, int len)
 {
 	u32 *data;
 
 	for (data = datap; (len != 0) && (((u32)data & 0x1f) != 0); len--)
-		*data++ = ctrl_inl(addr);
+		*data++ = __raw_readl(addr);
 
 	if (likely(len >= (0x20 >> 2))) {
 		int tmp2, tmp3, tmp4, tmp5, tmp6;
@@ -59,11 +59,11 @@ void __raw_readsl(unsigned long addr, void *datap, int len)
 	}
 
 	for (; len != 0; len--)
-		*data++ = ctrl_inl(addr);
+		*data++ = __raw_readl(addr);
 }
 EXPORT_SYMBOL(__raw_readsl);
 
-void __raw_writesl(unsigned long addr, const void *data, int len)
+void __raw_writesl(void __iomem *addr, const void *data, int len)
 {
 	if (likely(len != 0)) {
 		int tmp1;
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 8a03926ea84..555ec9714b9 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -132,7 +132,11 @@ config ARCH_SELECT_MEMORY_MODEL
 
 config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
-	depends on SPARSEMEM
+	depends on SPARSEMEM && MMU
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+	depends on SPARSEMEM && MMU
 
 config ARCH_MEMORY_PROBE
 	def_bool y
diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c
index 0e189ccd4a7..5ba067b2659 100644
--- a/arch/sh/mm/cache-debugfs.c
+++ b/arch/sh/mm/cache-debugfs.c
@@ -130,12 +130,18 @@ static int __init cache_debugfs_init(void)
 	dcache_dentry = debugfs_create_file("dcache", S_IRUSR, sh_debugfs_root,
 					    (unsigned int *)CACHE_TYPE_DCACHE,
 					    &cache_debugfs_fops);
+	if (!dcache_dentry)
+		return -ENOMEM;
 	if (IS_ERR(dcache_dentry))
 		return PTR_ERR(dcache_dentry);
 
 	icache_dentry = debugfs_create_file("icache", S_IRUSR, sh_debugfs_root,
 					    (unsigned int *)CACHE_TYPE_ICACHE,
 					    &cache_debugfs_fops);
+	if (!icache_dentry) {
+		debugfs_remove(dcache_dentry);
+		return -ENOMEM;
+	}
 	if (IS_ERR(icache_dentry)) {
 		debugfs_remove(dcache_dentry);
 		return PTR_ERR(icache_dentry);
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 1fdc8d90254..5cfe08dbb59 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -261,7 +261,7 @@ void flush_dcache_page(struct page *page)
 }
 
 /* TODO: Selective icache invalidation through IC address array.. */
-static inline void __uses_jump_to_uncached flush_icache_all(void)
+static void __uses_jump_to_uncached flush_icache_all(void)
 {
 	unsigned long flags, ccr;
 
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 64b8f7f96f9..9f8ea3ada4d 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -16,14 +16,6 @@
 #include <asm/addrspace.h>
 #include <asm/io.h>
 
-struct dma_coherent_mem {
-	void		*virt_base;
-	u32		device_base;
-	int		size;
-	int		flags;
-	unsigned long	*bitmap;
-};
-
 void *dma_alloc_coherent(struct device *dev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t gfp)
 {
@@ -44,7 +36,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 	 */
 	dma_cache_sync(dev, ret, size, DMA_BIDIRECTIONAL);
 
-	ret_nocache = ioremap_nocache(virt_to_phys(ret), size);
+	ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size);
 	if (!ret_nocache) {
 		free_pages((unsigned long)ret, order);
 		return NULL;
@@ -58,12 +50,10 @@ EXPORT_SYMBOL(dma_alloc_coherent);
 void dma_free_coherent(struct device *dev, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
-	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
 	int order = get_order(size);
 
 	if (!dma_release_from_coherent(dev, order, vaddr)) {
 		WARN_ON(irqs_disabled());	/* for portability */
-		BUG_ON(mem && mem->flags & DMA_MEMORY_EXCLUSIVE);
 		free_pages((unsigned long)phys_to_virt(dma_handle), order);
 		iounmap(vaddr);
 	}
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 0c776fdfbdd..898d477e47c 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -2,7 +2,7 @@
  * Page fault handler for SH with an MMU.
  *
  *  Copyright (C) 1999  Niibe Yutaka
- *  Copyright (C) 2003 - 2007  Paul Mundt
+ *  Copyright (C) 2003 - 2008  Paul Mundt
  *
  *  Based on linux/arch/i386/mm/fault.c:
  *   Copyright (C) 1995  Linus Torvalds
@@ -15,6 +15,7 @@
 #include <linux/mm.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
+#include <linux/marker.h>
 #include <asm/io_trapped.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
@@ -37,10 +38,10 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	int fault;
 	siginfo_t info;
 
-#ifdef CONFIG_SH_KGDB
-	if (kgdb_nofault && kgdb_bus_err_hook)
-		kgdb_bus_err_hook();
-#endif
+	/*
+	 * We don't bother with any notifier callbacks here, as they are
+	 * all handled through the __do_page_fault() fast-path.
+	 */
 
 	tsk = current;
 	si_code = SEGV_MAPERR;
@@ -61,7 +62,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		pgd = get_TTB() + offset;
 		pgd_k = swapper_pg_dir + offset;
 
-		/* This will never happen with the folded page table. */
 		if (!pgd_present(*pgd)) {
 			if (!pgd_present(*pgd_k))
 				goto bad_area_nosemaphore;
@@ -71,9 +71,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 
 		pud = pud_offset(pgd, address);
 		pud_k = pud_offset(pgd_k, address);
-		if (pud_present(*pud) || !pud_present(*pud_k))
-			goto bad_area_nosemaphore;
-		set_pud(pud, *pud_k);
+
+		if (!pud_present(*pud)) {
+			if (!pud_present(*pud_k))
+				goto bad_area_nosemaphore;
+			set_pud(pud, *pud_k);
+			return;
+		}
 
 		pmd = pmd_offset(pud, address);
 		pmd_k = pmd_offset(pud_k, address);
@@ -242,6 +246,25 @@ do_sigbus:
 		goto no_context;
 }
 
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	int ret = 0;
+
+	trace_mark(kernel_arch_trap_entry, "trap_id %d ip #p%ld",
+		   trap >> 5, instruction_pointer(regs));
+
+#ifdef CONFIG_KPROBES
+	if (!user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
+#endif
+
+	return ret;
+}
+
 #ifdef CONFIG_SH_STORE_QUEUES
 /*
  * This is a special case for the SH-4 store queues, as pages for this
@@ -265,12 +288,18 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t entry;
+	int ret = 0;
+
+	if (notify_page_fault(regs, lookup_exception_vector()))
+		goto out;
 
 #ifdef CONFIG_SH_KGDB
 	if (kgdb_nofault && kgdb_bus_err_hook)
 		kgdb_bus_err_hook();
 #endif
 
+	ret = 1;
+
 	/*
 	 * We don't take page faults for P1, P2, and parts of P4, these
 	 * are always mapped, whether it be due to legacy behaviour in
@@ -280,24 +309,23 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
 		pgd = pgd_offset_k(address);
 	} else {
 		if (unlikely(address >= TASK_SIZE || !current->mm))
-			return 1;
+			goto out;
 
 		pgd = pgd_offset(current->mm, address);
 	}
 
 	pud = pud_offset(pgd, address);
 	if (pud_none_or_clear_bad(pud))
-		return 1;
+		goto out;
 	pmd = pmd_offset(pud, address);
 	if (pmd_none_or_clear_bad(pmd))
-		return 1;
-
+		goto out;
 	pte = pte_offset_kernel(pmd, address);
 	entry = *pte;
 	if (unlikely(pte_none(entry) || pte_not_present(entry)))
-		return 1;
+		goto out;
 	if (unlikely(writeaccess && !pte_write(entry)))
-		return 1;
+		goto out;
 
 	if (writeaccess)
 		entry = pte_mkdirty(entry);
@@ -314,5 +342,8 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
 	set_pte(pte, entry);
 	update_mmu_cache(NULL, address, entry);
 
-	return 0;
+	ret = 0;
+out:
+	trace_mark(kernel_arch_trap_exit, MARK_NOARGS);
+	return ret;
 }
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index b75a7acd62f..2a53943924b 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -23,7 +23,19 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
-unsigned long cached_to_uncached = 0;
+
+#ifdef CONFIG_SUPERH32
+/*
+ * Handle trivial transitions between cached and uncached
+ * segments, making use of the 1:1 mapping relationship in
+ * 512MB lowmem.
+ *
+ * This is the offset of the uncached section from its cached alias.
+ * Default value only valid in 29 bit mode, in 32bit mode will be
+ * overridden in pmb_init.
+ */
+unsigned long cached_to_uncached = P2SEG - P1SEG;
+#endif
 
 #ifdef CONFIG_MMU
 static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
@@ -58,9 +70,7 @@ static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
 	}
 
 	set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot));
-
-	if (cached_to_uncached)
-		flush_tlb_one(get_asid(), addr);
+	flush_tlb_one(get_asid(), addr);
 }
 
 /*
@@ -113,7 +123,6 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 		if (!pmd_present(*pmd)) {
 			pte_t *pte_table;
 			pte_table = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-			memset(pte_table, 0, PAGE_SIZE);
 			pmd_populate_kernel(&init_mm, pmd, pte_table);
 		}
 
@@ -165,15 +174,6 @@ void __init paging_init(void)
 #ifdef CONFIG_SUPERH32
 	/* Set up the uncached fixmap */
 	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
-
-#ifdef CONFIG_29BIT
-	/*
-	 * Handle trivial transitions between cached and uncached
-	 * segments, making use of the 1:1 mapping relationship in
-	 * 512MB lowmem.
-	 */
-	cached_to_uncached = P2SEG - P1SEG;
-#endif
 #endif
 }
 
@@ -265,6 +265,35 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
+#if THREAD_SHIFT < PAGE_SHIFT
+static struct kmem_cache *thread_info_cache;
+
+struct thread_info *alloc_thread_info(struct task_struct *tsk)
+{
+	struct thread_info *ti;
+
+	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
+	if (unlikely(ti == NULL))
+		return NULL;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	memset(ti, 0, THREAD_SIZE);
+#endif
+	return ti;
+}
+
+void free_thread_info(struct thread_info *ti)
+{
+	kmem_cache_free(thread_info_cache, ti);
+}
+
+void thread_info_cache_init(void)
+{
+	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+					      THREAD_SIZE, 0, NULL);
+	BUG_ON(thread_info_cache == NULL);
+}
+#endif /* THREAD_SHIFT < PAGE_SHIFT */
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size)
 {
@@ -292,4 +321,21 @@ int memory_add_physaddr_to_nid(u64 addr)
 }
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = start_pfn + (size >> PAGE_SHIFT);
+	int ret;
+
+	ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
+	if (unlikely(ret))
+		printk("%s: Failed, offline_pages() == %d\n", __func__, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(remove_memory);
 #endif
+
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/pg-nommu.c b/arch/sh/mm/pg-nommu.c
index 677dd57f087..91ed4e695ff 100644
--- a/arch/sh/mm/pg-nommu.c
+++ b/arch/sh/mm/pg-nommu.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <asm/page.h>
+#include <asm/uaccess.h>
 
 void copy_page(void *to, void *from)
 {
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index cef727669c8..84241676265 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -394,6 +394,8 @@ static int __init pmb_debugfs_init(void)
 
 	dentry = debugfs_create_file("pmb", S_IFREG | S_IRUGO,
 				     sh_debugfs_root, NULL, &pmb_debugfs_fops);
+	if (!dentry)
+		return -ENOMEM;
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
diff --git a/arch/sh/mm/tlb-nommu.c b/arch/sh/mm/tlb-nommu.c
index 15111bc7ddd..71c742b5aee 100644
--- a/arch/sh/mm/tlb-nommu.c
+++ b/arch/sh/mm/tlb-nommu.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 
 /*
  * Nothing too terribly exciting here ..
diff --git a/arch/sh/tools/mach-types b/arch/sh/tools/mach-types
index 0a11cc08f0a..d4fb11f7e2e 100644
--- a/arch/sh/tools/mach-types
+++ b/arch/sh/tools/mach-types
@@ -30,6 +30,7 @@ HP6XX			SH_HP6XX
 DREAMCAST		SH_DREAMCAST
 SNAPGEAR		SH_SECUREEDGE5410
 EDOSK7705		SH_EDOSK7705
+EDOSK7760		SH_EDOSK7760
 SH4202_MICRODEV		SH_SH4202_MICRODEV
 SH03			SH_SH03
 LANDISK			SH_LANDISK
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 97671dac12a..e594559c8db 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -37,6 +37,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "General machine setup"
 
 config SMP
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 29899fd5b1b..80fe547c3f4 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -135,6 +135,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define TIF_POLLING_NRFLAG	9	/* true if poll_idle() is polling
 					 * TIF_NEED_RESCHED */
 #define TIF_MEMDIE		10
+#define TIF_FREEZE		11	/* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -148,6 +149,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define _TIF_DO_NOTIFY_RESUME_MASK	(_TIF_NOTIFY_RESUME | \
 					 _TIF_SIGPENDING | \
 					 _TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index c0a737d7292..639ac805448 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -237,6 +237,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define TIF_ABI_PENDING		12
 #define TIF_MEMDIE		13
 #define TIF_POLLING_NRFLAG	14
+#define TIF_FREEZE		15	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -249,6 +250,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_USER_WORK_MASK	((0xff << TI_FLAG_WSAVED_SHIFT) | \
 				 _TIF_DO_NOTIFY_RESUME_MASK | \
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 5446e2a499b..035b15af90d 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -96,6 +96,7 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
 	def_bool y
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 6976812cfb1..393bccfe178 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -229,6 +229,8 @@ endmenu
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "drivers/block/Kconfig"
 
 source "arch/um/Kconfig.char"
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index fd0c25ad6af..129647375a6 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -179,7 +179,8 @@ static int copy_sc_from_user(struct pt_regs *regs,
 	if (have_fpx_regs) {
 		struct user_fxsr_struct fpx;
 
-		err = copy_from_user(&fpx, &sc.fpstate->_fxsr_env[0],
+		err = copy_from_user(&fpx,
+			&((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
 				     sizeof(struct user_fxsr_struct));
 		if (err)
 			return 1;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bd3c2c53873..5b9b12321ad 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,7 @@ config X86
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
@@ -193,6 +194,7 @@ config X86_TRAMPOLINE
 config KTIME_SCALAR
 	def_bool X86_32
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
@@ -1241,14 +1243,6 @@ config EFI
   	resultant kernel should continue to boot on existing non-EFI
   	platforms.
 
-config IRQBALANCE
-	def_bool y
-	prompt "Enable kernel irq balancing"
-	depends on X86_32 && SMP && X86_IO_APIC
-	help
-	  The default yes will allow the kernel to do irq load balancing.
-	  Saying no will keep the kernel from doing irq load balancing.
-
 config SECCOMP
 	def_bool y
 	prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 52d0359719d..13b8c86ae98 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
-# CONFIG_IRQBALANCE is not set
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0d41f0343dc..d7e5a58ee22 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o		:= $(nostackp)
 CFLAGS_tsc.o		:= $(nostackp)
 
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
-obj-y			+= traps.o irq_$(BITS).o dumpstack_$(BITS).o
+obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
 obj-y			+= setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
@@ -60,8 +60,8 @@ obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
 obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi.o
-obj-$(CONFIG_X86_IO_APIC)	+= io_apic_$(BITS).o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
+obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE)			+= microcode.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
         obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
-	obj-y				+= bios_uv.o
+	obj-y				+= bios_uv.o uv_irq.o uv_sysfs.o
         obj-y				+= genx2apic_cluster.o
         obj-y				+= genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index eb875cdc736..0d1c26a583c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1256,7 +1256,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
-				  NR_IRQ_VECTORS);
+				  nr_irqs);
 	if (count < 0) {
 		printk(KERN_ERR PREFIX
 		       "Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1276,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
-				  NR_IRQ_VECTORS);
+				  nr_irqs);
 	if (count < 0) {
 		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
 		/* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 426e5d91b63..c44cd6dbfa1 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -10,6 +10,7 @@
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
 #include <asm/segment.h>
+#include <asm/desc.h>
 
 #include "realmode/wakeup.h"
 #include "sleep.h"
@@ -98,6 +99,8 @@ int acpi_save_state_mem(void)
 	header->trampoline_segment = setup_trampoline() >> 4;
 #ifdef CONFIG_SMP
 	stack_start.sp = temp_stack + 4096;
+	early_gdt_descr.address =
+			(unsigned long)get_cpu_gdt_table(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
 	saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic.c
index 21c831d96af..04a7f960bbc 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic.c
@@ -23,11 +23,13 @@
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
+#include <linux/ioport.h>
 #include <linux/cpu.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
 #include <linux/dmi.h>
+#include <linux/dmar.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -36,8 +38,14 @@
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
+#include <asm/pgalloc.h>
 #include <asm/i8253.h>
 #include <asm/nmi.h>
+#include <asm/idle.h>
+#include <asm/proto.h>
+#include <asm/timex.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
 
 #include <mach_apic.h>
 #include <mach_apicdef.h>
@@ -50,16 +58,58 @@
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
-unsigned long mp_lapic_addr;
-
+#ifdef CONFIG_X86_32
 /*
  * Knob to control our willingness to enable the local APIC.
  *
  * +1=force-enable
  */
 static int force_enable_local_apic;
-int disable_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	force_enable_local_apic = 1;
+	return 0;
+}
+early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
+#endif
+
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
+int x2apic;
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+	disable_x2apic = 1;
+	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
 
+unsigned long mp_lapic_addr;
+int disable_apic;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __cpuinitdata;
 /* Local APIC timer works in C2 */
@@ -110,9 +160,6 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
 static unsigned long apic_phys;
 
 /*
@@ -202,6 +249,42 @@ static struct apic_ops xapic_ops = {
 struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 EXPORT_SYMBOL_GPL(apic_ops);
 
+#ifdef HAVE_X2APIC
+static void x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+	unsigned long val;
+
+	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+	return val;
+}
+
+static struct apic_ops x2apic_ops = {
+	.read = native_apic_msr_read,
+	.write = native_apic_msr_write,
+	.icr_read = x2apic_icr_read,
+	.icr_write = x2apic_icr_write,
+	.wait_icr_idle = x2apic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+#endif
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -219,6 +302,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
 	apic_write(APIC_LVT0, v);
 }
 
+#ifdef CONFIG_X86_32
 /**
  * get_physical_broadcast - Get number of physical broadcast IDs
  */
@@ -226,6 +310,7 @@ int get_physical_broadcast(void)
 {
 	return modern_apic() ? 0xff : 0xf;
 }
+#endif
 
 /**
  * lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -247,11 +332,7 @@ int lapic_get_maxlvt(void)
  */
 
 /* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
 #define APIC_DIVISOR 16
-#endif
 
 /*
  * This function sets up the local APIC timer, with a timeout of
@@ -383,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
  * Setup the local APIC timer for this CPU. Copy the initilized values
  * of the boot CPU and register the clock event in the framework.
  */
-static void __devinit setup_APIC_timer(void)
+static void __cpuinit setup_APIC_timer(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
@@ -453,14 +534,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
 	}
 }
 
+static int __init calibrate_by_pmtimer(long deltapm, long *delta)
+{
+	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+	const long pm_thresh = pm_100ms / 100;
+	unsigned long mult;
+	u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+	return -1;
+#endif
+
+	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+	/* Check, if the PM timer is available */
+	if (!deltapm)
+		return -1;
+
+	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+	if (deltapm > (pm_100ms - pm_thresh) &&
+	    deltapm < (pm_100ms + pm_thresh)) {
+		apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+	} else {
+		res = (((u64)deltapm) *  mult) >> 22;
+		do_div(res, 1000000);
+		printk(KERN_WARNING "APIC calibration not consistent "
+			"with PM Timer: %ldms instead of 100ms\n",
+			(long)res);
+		/* Correct the lapic counter value */
+		res = (((u64)(*delta)) * pm_100ms);
+		do_div(res, deltapm);
+		printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+			"%lu (%ld)\n", (unsigned long)res, *delta);
+		*delta = (long)res;
+	}
+
+	return 0;
+}
+
 static int __init calibrate_APIC_clock(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
-	const long pm_thresh = pm_100ms/100;
 	void (*real_handler)(struct clock_event_device *dev);
 	unsigned long deltaj;
-	long delta, deltapm;
+	long delta;
 	int pm_referenced = 0;
 
 	local_irq_disable();
@@ -470,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
 	global_clock_event->event_handler = lapic_cal_handler;
 
 	/*
-	 * Setup the APIC counter to 1e9. There is no way the lapic
+	 * Setup the APIC counter to maximum. There is no way the lapic
 	 * can underflow in the 100ms detection time frame
 	 */
-	__setup_APIC_LVTT(1000000000, 0, 0);
+	__setup_APIC_LVTT(0xffffffff, 0, 0);
 
 	/* Let the interrupts run */
 	local_irq_enable();
@@ -490,34 +608,9 @@ static int __init calibrate_APIC_clock(void)
 	delta = lapic_cal_t1 - lapic_cal_t2;
 	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 
-	/* Check, if the PM timer is available */
-	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
-	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
-	if (deltapm) {
-		unsigned long mult;
-		u64 res;
-
-		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-		if (deltapm > (pm_100ms - pm_thresh) &&
-		    deltapm < (pm_100ms + pm_thresh)) {
-			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
-		} else {
-			res = (((u64) deltapm) *  mult) >> 22;
-			do_div(res, 1000000);
-			printk(KERN_WARNING "APIC calibration not consistent "
-			       "with PM Timer: %ldms instead of 100ms\n",
-			       (long)res);
-			/* Correct the lapic counter value */
-			res = (((u64) delta) * pm_100ms);
-			do_div(res, deltapm);
-			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
-			       "%lu (%ld)\n", (unsigned long) res, delta);
-			delta = (long) res;
-		}
-		pm_referenced = 1;
-	}
+	/* we trust the PM based calibration if possible */
+	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+					&delta);
 
 	/* Calculate the scaled math multiplication factor */
 	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -559,7 +652,10 @@ static int __init calibrate_APIC_clock(void)
 
 	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 
-	/* We trust the pm timer based calibration */
+	/*
+	 * PM timer calibration failed or not turned on
+	 * so lets try APIC timer based calibration
+	 */
 	if (!pm_referenced) {
 		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 
@@ -652,7 +748,7 @@ void __init setup_boot_APIC_clock(void)
 	setup_APIC_timer();
 }
 
-void __devinit setup_secondary_APIC_clock(void)
+void __cpuinit setup_secondary_APIC_clock(void)
 {
 	setup_APIC_timer();
 }
@@ -718,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	local_apic_timer_interrupt();
 	irq_exit();
@@ -991,40 +1090,43 @@ void __init init_bsp_APIC(void)
 
 static void __cpuinit lapic_setup_esr(void)
 {
-	unsigned long oldvalue, value, maxlvt;
-	if (lapic_is_integrated() && !esr_disable) {
-		if (esr_disable) {
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-			return;
-		}
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
+	unsigned int oldvalue, value, maxlvt;
+
+	if (!lapic_is_integrated()) {
+		printk(KERN_INFO "No ESR for 82489DX.\n");
+		return;
+	}
 
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write(APIC_LVTERR, value);
+	if (esr_disable) {
 		/*
-		 * spec says clear errors after enabling vector.
+		 * Something untraceable is creating bad interrupts on
+		 * secondary quads ... for the moment, just leave the
+		 * ESR disabled - we can't do anything useful with the
+		 * errors anyway - mbligh
 		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		printk(KERN_INFO "No ESR for 82489DX.\n");
+		printk(KERN_INFO "Leaving ESR disabled.\n");
+		return;
 	}
+
+	maxlvt = lapic_get_maxlvt();
+	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+		apic_write(APIC_ESR, 0);
+	oldvalue = apic_read(APIC_ESR);
+
+	/* enables sending errors */
+	value = ERROR_APIC_VECTOR;
+	apic_write(APIC_LVTERR, value);
+
+	/*
+	 * spec says clear errors after enabling vector.
+	 */
+	if (maxlvt > 3)
+		apic_write(APIC_ESR, 0);
+	value = apic_read(APIC_ESR);
+	if (value != oldvalue)
+		apic_printk(APIC_VERBOSE, "ESR value before enabling "
+			"vector: 0x%08x  after: 0x%08x\n",
+			oldvalue, value);
 }
 
 
@@ -1033,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void)
  */
 void __cpuinit setup_local_APIC(void)
 {
-	unsigned long value, integrated;
+	unsigned int value;
 	int i, j;
 
+#ifdef CONFIG_X86_32
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
-	if (esr_disable) {
+	if (lapic_is_integrated() && esr_disable) {
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
 	}
+#endif
 
-	integrated = lapic_is_integrated();
+	preempt_disable();
 
 	/*
 	 * Double-check whether this APIC is really registered.
+	 * This is meaningless in clustered apic mode, so we skip it.
 	 */
 	if (!apic_id_registered())
-		WARN_ON_ONCE(1);
+		BUG();
 
 	/*
 	 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1201,7 @@ void __cpuinit setup_local_APIC(void)
 	 */
 	value |= APIC_SPIV_APIC_ENABLED;
 
+#ifdef CONFIG_X86_32
 	/*
 	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
 	 * certain networking cards. If high frequency interrupts are
@@ -1116,8 +1222,13 @@ void __cpuinit setup_local_APIC(void)
 	 * See also the comment in end_level_ioapic_irq().  --macro
 	 */
 
-	/* Enable focus processor (bit==0) */
+	/*
+	 * - enable focus processor (bit==0)
+	 * - 64bit mode always use processor focus
+	 *   so no need to set it
+	 */
 	value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
 
 	/*
 	 * Set spurious IRQ vector
@@ -1154,9 +1265,11 @@ void __cpuinit setup_local_APIC(void)
 		value = APIC_DM_NMI;
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	if (!integrated)		/* 82489DX */
+	if (!lapic_is_integrated())		/* 82489DX */
 		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write(APIC_LVT1, value);
+
+	preempt_enable();
 }
 
 void __cpuinit end_local_APIC_setup(void)
@@ -1177,6 +1290,153 @@ void __cpuinit end_local_APIC_setup(void)
 	apic_pm_activate();
 }
 
+#ifdef HAVE_X2APIC
+void check_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+	if (msr & X2APIC_ENABLE) {
+		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+		x2apic_preenabled = x2apic = 1;
+		apic_ops = &x2apic_ops;
+	}
+}
+
+void enable_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (!(msr & X2APIC_ENABLE)) {
+		printk("Enabling x2apic\n");
+		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+	}
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+	int ret;
+	unsigned long flags;
+
+	if (!cpu_has_x2apic)
+		return;
+
+	if (!x2apic_preenabled && disable_x2apic) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of nox2apic\n");
+		return;
+	}
+
+	if (x2apic_preenabled && disable_x2apic)
+		panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of skipping io-apic setup\n");
+		return;
+	}
+
+	ret = dmar_table_init();
+	if (ret) {
+		printk(KERN_INFO
+		       "dmar_table_init() failed with %d:\n", ret);
+
+		if (x2apic_preenabled)
+			panic("x2apic enabled by bios. But IR enabling failed");
+		else
+			printk(KERN_INFO
+			       "Not enabling x2apic,Intr-remapping\n");
+		return;
+	}
+
+	local_irq_save(flags);
+	mask_8259A();
+
+	ret = save_mask_IO_APIC_setup();
+	if (ret) {
+		printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+		goto end;
+	}
+
+	ret = enable_intr_remapping(1);
+
+	if (ret && x2apic_preenabled) {
+		local_irq_restore(flags);
+		panic("x2apic enabled by bios. But IR enabling failed");
+	}
+
+	if (ret)
+		goto end_restore;
+
+	if (!x2apic) {
+		x2apic = 1;
+		apic_ops = &x2apic_ops;
+		enable_x2apic();
+	}
+
+end_restore:
+	if (ret)
+		/*
+		 * IR enabling failed
+		 */
+		restore_IO_APIC_setup();
+	else
+		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+end:
+	unmask_8259A();
+	local_irq_restore(flags);
+
+	if (!ret) {
+		if (!x2apic_preenabled)
+			printk(KERN_INFO
+			       "Enabled x2apic and interrupt-remapping\n");
+		else
+			printk(KERN_INFO
+			       "Enabled Interrupt-remapping\n");
+	} else
+		printk(KERN_ERR
+		       "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+	if (!cpu_has_x2apic)
+		return;
+
+	if (x2apic_preenabled)
+		panic("x2apic enabled prior OS handover,"
+		      " enable CONFIG_INTR_REMAP");
+
+	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+	       " and x2apic\n");
+#endif
+
+	return;
+}
+#endif /* HAVE_X2APIC */
+
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+	if (!cpu_has_apic) {
+		printk(KERN_INFO "No local APIC present\n");
+		return -1;
+	}
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+	boot_cpu_physical_apicid = 0;
+	return 0;
+}
+#else
 /*
  * Detect and initialize APIC
  */
@@ -1255,12 +1515,46 @@ no_apic:
 	printk(KERN_INFO "No local APIC present or hardware disabled\n");
 	return -1;
 }
+#endif
+
+#ifdef CONFIG_X86_64
+void __init early_init_lapic_mapping(void)
+{
+	unsigned long phys_addr;
+
+	/*
+	 * If no local APIC can be found then go out
+	 * : it means there is no mpatable and MADT
+	 */
+	if (!smp_found_config)
+		return;
+
+	phys_addr = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+		    APIC_BASE, phys_addr);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	boot_cpu_physical_apicid = read_apic_id();
+}
+#endif
 
 /**
  * init_apic_mappings - initialize APIC mappings
  */
 void __init init_apic_mappings(void)
 {
+#ifdef HAVE_X2APIC
+	if (x2apic) {
+		boot_cpu_physical_apicid = read_apic_id();
+		return;
+	}
+#endif
+
 	/*
 	 * If no local APIC can be found then set up a fake all
 	 * zeroes page to simulate the local APIC and another
@@ -1273,8 +1567,8 @@ void __init init_apic_mappings(void)
 		apic_phys = mp_lapic_addr;
 
 	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
-	       apic_phys);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+				APIC_BASE, apic_phys);
 
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
@@ -1282,18 +1576,27 @@ void __init init_apic_mappings(void)
 	 */
 	if (boot_cpu_physical_apicid == -1U)
 		boot_cpu_physical_apicid = read_apic_id();
-
 }
 
 /*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
-
 int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
+#ifdef CONFIG_X86_64
+	if (disable_apic) {
+		printk(KERN_INFO "Apic disabled\n");
+		return -1;
+	}
+	if (!cpu_has_apic) {
+		disable_apic = 1;
+		printk(KERN_INFO "Apic disabled by BIOS\n");
+		return -1;
+	}
+#else
 	if (!smp_found_config && !cpu_has_apic)
 		return -1;
 
@@ -1302,39 +1605,68 @@ int __init APIC_init_uniprocessor(void)
 	 */
 	if (!cpu_has_apic &&
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+		printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
 		       boot_cpu_physical_apicid);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
 	}
+#endif
 
-	verify_local_APIC();
+#ifdef HAVE_X2APIC
+	enable_IR_x2apic();
+#endif
+#ifdef CONFIG_X86_64
+	setup_apic_routing();
+#endif
 
+	verify_local_APIC();
 	connect_bsp_APIC();
 
+#ifdef CONFIG_X86_64
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
 	/*
 	 * Hack: In case of kdump, after a crash, kernel might be booting
 	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 */
-#ifdef CONFIG_CRASH_DUMP
+# ifdef CONFIG_CRASH_DUMP
 	boot_cpu_physical_apicid = read_apic_id();
+# endif
 #endif
 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-
 	setup_local_APIC();
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Now enable IO-APICs, actually call clear_IO_APIC
+	 * We need clear_IO_APIC before enabling vector on BP
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+#endif
+
 #ifdef CONFIG_X86_IO_APIC
 	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
 #endif
 		localise_nmi_watchdog();
 	end_local_APIC_setup();
+
 #ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config)
-		if (!skip_ioapic_setup && nr_ioapics)
-			setup_IO_APIC();
+	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+		setup_IO_APIC();
+# ifdef CONFIG_X86_64
+	else
+		nr_ioapics = 0;
+# endif
 #endif
+
+#ifdef CONFIG_X86_64
+	setup_boot_APIC_clock();
+	check_nmi_watchdog();
+#else
 	setup_boot_clock();
+#endif
 
 	return 0;
 }
@@ -1348,8 +1680,11 @@ int __init APIC_init_uniprocessor(void)
  */
 void smp_spurious_interrupt(struct pt_regs *regs)
 {
-	unsigned long v;
+	u32 v;
 
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
@@ -1360,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
 		ack_APIC_irq();
 
+#ifdef CONFIG_X86_64
+	add_pda(irq_spurious_count, 1);
+#else
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
 	       "should never happen.\n", smp_processor_id());
 	__get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
 	irq_exit();
 }
 
@@ -1372,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
  */
 void smp_error_interrupt(struct pt_regs *regs)
 {
-	unsigned long v, v1;
+	u32 v, v1;
 
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v = apic_read(APIC_ESR);
@@ -1392,7 +1734,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 	   6: Received illegal vector
 	   7: Illegal register address
 	*/
-	printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
 		smp_processor_id(), v , v1);
 	irq_exit();
 }
@@ -1565,6 +1907,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	cpu_set(cpu, cpu_present_map);
 }
 
+#ifdef CONFIG_X86_64
+int hard_smp_processor_id(void)
+{
+	return read_apic_id();
+}
+#endif
+
 /*
  * Power management
  */
@@ -1640,7 +1989,7 @@ static int lapic_resume(struct sys_device *dev)
 
 	local_irq_save(flags);
 
-#ifdef CONFIG_X86_64
+#ifdef HAVE_X2APIC
 	if (x2apic)
 		enable_x2apic();
 	else
@@ -1702,7 +2051,7 @@ static struct sys_device device_lapic = {
 	.cls	= &lapic_sysclass,
 };
 
-static void __devinit apic_pm_activate(void)
+static void __cpuinit apic_pm_activate(void)
 {
 	apic_pm_state.active = 1;
 }
@@ -1728,16 +2077,87 @@ static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
 
+#ifdef CONFIG_X86_64
 /*
- * APIC command line parameters
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ *
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis. Use available data to take a good guess.
+ * If in doubt, go HPET.
  */
-static int __init parse_lapic(char *arg)
+__cpuinit int apic_is_clustered_box(void)
 {
-	force_enable_local_apic = 1;
-	return 0;
+	int i, clusters, zeros;
+	unsigned id;
+	u16 *bios_cpu_apicid;
+	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+	/*
+	 * there is not this kind of box with AMD CPU yet.
+	 * Some AMD box with quadcore cpu and 8 sockets apicid
+	 * will be [4, 0x23] or [8, 0x27] could be thought to
+	 * vsmp box still need checking...
+	 */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+		return 0;
+
+	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		/* are we being called early in kernel startup? */
+		if (bios_cpu_apicid) {
+			id = bios_cpu_apicid[i];
+		}
+		else if (i < nr_cpu_ids) {
+			if (cpu_present(i))
+				id = per_cpu(x86_bios_cpu_apicid, i);
+			else
+				continue;
+		}
+		else
+			break;
+
+		if (id != BAD_APICID)
+			__set_bit(APIC_CLUSTERID(id), clustermap);
+	}
+
+	/* Problem:  Partially populated chassis may not have CPUs in some of
+	 * the APIC clusters they have been allocated.  Only present CPUs have
+	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+	 * Since clusters are allocated sequentially, count zeros only if
+	 * they are bounded by ones.
+	 */
+	clusters = 0;
+	zeros = 0;
+	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+		if (test_bit(i, clustermap)) {
+			clusters += 1 + zeros;
+			zeros = 0;
+		} else
+			++zeros;
+	}
+
+	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+	 * not guaranteed to be synced between boards
+	 */
+	if (is_vsmp_box() && clusters > 1)
+		return 1;
+
+	/*
+	 * If clusters > 2, then should be multi-chassis.
+	 * May have to revisit this when multi-core + hyperthreaded CPUs come
+	 * out, but AFAIK this will work even for them.
+	 */
+	return (clusters > 2);
 }
-early_param("lapic", parse_lapic);
+#endif
 
+/*
+ * APIC command line parameters
+ */
 static int __init setup_disableapic(char *arg)
 {
 	disable_apic = 1;
@@ -1779,7 +2199,6 @@ static int __init apic_set_verbosity(char *arg)
 	if (!arg)  {
 #ifdef CONFIG_X86_64
 		skip_ioapic_setup = 0;
-		ioapic_force = 1;
 		return 0;
 #endif
 		return -EINVAL;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
deleted file mode 100644
index 94ddb69ae15..00000000000
--- a/arch/x86/kernel/apic_64.c
+++ /dev/null
@@ -1,1848 +0,0 @@
-/*
- *	Local APIC handling, local APIC timers
- *
- *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
- *					thanks to Eric Gilmore
- *					and Rolf G. Tews
- *					for testing these extensively.
- *	Maciej W. Rozycki	:	Various updates and fixes.
- *	Mikael Pettersson	:	Power Management for UP-APIC.
- *	Pavel Machek and
- *	Mikael Pettersson	:	PM converted to driver model.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/ioport.h>
-#include <linux/clockchips.h>
-#include <linux/acpi_pmtmr.h>
-#include <linux/module.h>
-#include <linux/dmar.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/hpet.h>
-#include <asm/pgalloc.h>
-#include <asm/nmi.h>
-#include <asm/idle.h>
-#include <asm/proto.h>
-#include <asm/timex.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-#include <mach_ipi.h>
-#include <mach_apic.h>
-
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
-static int apic_calibrate_pmtmr __initdata;
-int disable_apic;
-int disable_x2apic;
-int x2apic;
-
-/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-
-/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-/*
- * Debug level, exported for io_apic.c
- */
-unsigned int apic_verbosity;
-
-/* Have we found an MP table */
-int smp_found_config;
-
-static struct resource lapic_resource = {
-	.name = "Local APIC",
-	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-static unsigned int calibration_result;
-
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
-static void apic_pm_activate(void);
-
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
-	.name		= "lapic",
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
-			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
-	.shift		= 32,
-	.set_mode	= lapic_timer_setup,
-	.set_next_event	= lapic_next_event,
-	.broadcast	= lapic_timer_broadcast,
-	.rating		= 100,
-	.irq		= -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
-static unsigned long apic_phys;
-
-unsigned long mp_lapic_addr;
-
-/*
- * Get the LAPIC version
- */
-static inline int lapic_get_version(void)
-{
-	return GET_APIC_VERSION(apic_read(APIC_LVR));
-}
-
-/*
- * Check, if the APIC is integrated or a separate chip
- */
-static inline int lapic_is_integrated(void)
-{
-#ifdef CONFIG_X86_64
-	return 1;
-#else
-	return APIC_INTEGRATED(lapic_get_version());
-#endif
-}
-
-/*
- * Check, whether this is a modern or a first generation APIC
- */
-static int modern_apic(void)
-{
-	/* AMD systems use old APIC versions, so check the CPU */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-	    boot_cpu_data.x86 >= 0xf)
-		return 1;
-	return lapic_get_version() >= 0x14;
-}
-
-/*
- * Paravirt kernels also might be using these below ops. So we still
- * use generic apic_read()/apic_write(), which might be pointing to different
- * ops in PARAVIRT case.
- */
-void xapic_wait_icr_idle(void)
-{
-	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
-		cpu_relax();
-}
-
-u32 safe_xapic_wait_icr_idle(void)
-{
-	u32 send_status;
-	int timeout;
-
-	timeout = 0;
-	do {
-		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-		if (!send_status)
-			break;
-		udelay(100);
-	} while (timeout++ < 1000);
-
-	return send_status;
-}
-
-void xapic_icr_write(u32 low, u32 id)
-{
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
-	apic_write(APIC_ICR, low);
-}
-
-u64 xapic_icr_read(void)
-{
-	u32 icr1, icr2;
-
-	icr2 = apic_read(APIC_ICR2);
-	icr1 = apic_read(APIC_ICR);
-
-	return icr1 | ((u64)icr2 << 32);
-}
-
-static struct apic_ops xapic_ops = {
-	.read = native_apic_mem_read,
-	.write = native_apic_mem_write,
-	.icr_read = xapic_icr_read,
-	.icr_write = xapic_icr_write,
-	.wait_icr_idle = xapic_wait_icr_idle,
-	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
-};
-
-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
-EXPORT_SYMBOL_GPL(apic_ops);
-
-static void x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return;
-}
-
-static u32 safe_x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return 0;
-}
-
-void x2apic_icr_write(u32 low, u32 id)
-{
-	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
-}
-
-u64 x2apic_icr_read(void)
-{
-	unsigned long val;
-
-	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
-	return val;
-}
-
-static struct apic_ops x2apic_ops = {
-	.read = native_apic_msr_read,
-	.write = native_apic_msr_write,
-	.icr_read = x2apic_icr_read,
-	.icr_write = x2apic_icr_write,
-	.wait_icr_idle = x2apic_wait_icr_idle,
-	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
-};
-
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
-	unsigned int v;
-
-	/* unmask and set to NMI */
-	v = APIC_DM_NMI;
-
-	/* Level triggered for 82489DX (32bit mode) */
-	if (!lapic_is_integrated())
-		v |= APIC_LVT_LEVEL_TRIGGER;
-
-	apic_write(APIC_LVT0, v);
-}
-
-/**
- * lapic_get_maxlvt - get the maximum number of local vector table entries
- */
-int lapic_get_maxlvt(void)
-{
-	unsigned int v;
-
-	v = apic_read(APIC_LVR);
-	/*
-	 * - we always have APIC integrated on 64bit mode
-	 * - 82489DXs do not report # of LVT entries
-	 */
-	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
-}
-
-/*
- * Local APIC timer
- */
-
-/* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
-#define APIC_DIVISOR 16
-#endif
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
-	unsigned int lvtt_value, tmp_value;
-
-	lvtt_value = LOCAL_TIMER_VECTOR;
-	if (!oneshot)
-		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-	if (!lapic_is_integrated())
-		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-
-	if (!irqen)
-		lvtt_value |= APIC_LVT_MASKED;
-
-	apic_write(APIC_LVTT, lvtt_value);
-
-	/*
-	 * Divide PICLK by 16
-	 */
-	tmp_value = apic_read(APIC_TDCR);
-	apic_write(APIC_TDCR,
-		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
-		APIC_TDR_DIV_16);
-
-	if (!oneshot)
-		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
-}
-
-/*
- * Setup extended LVT, AMD specific (K8, family 10h)
- *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
- *
- * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
- */
-
-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
-{
-	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
-	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-
-	apic_write(reg, v);
-}
-
-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_MCE;
-}
-
-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_IBS;
-}
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
-
-/*
- * Program the next event, relative to now
- */
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt)
-{
-	apic_write(APIC_TMICT, delta);
-	return 0;
-}
-
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt)
-{
-	unsigned long flags;
-	unsigned int v;
-
-	/* Lapic used as dummy for broadcast ? */
-	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
-		return;
-
-	local_irq_save(flags);
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-	case CLOCK_EVT_MODE_ONESHOT:
-		__setup_APIC_LVTT(calibration_result,
-				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		v = apic_read(APIC_LVTT);
-		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, v);
-		break;
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do here */
-		break;
-	}
-
-	local_irq_restore(flags);
-}
-
-/*
- * Local APIC timer broadcast function
- */
-static void lapic_timer_broadcast(cpumask_t mask)
-{
-#ifdef CONFIG_SMP
-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
-}
-
-/*
- * Setup the local APIC timer for this CPU. Copy the initilized values
- * of the boot CPU and register the clock event in the framework.
- */
-static void setup_APIC_timer(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
-	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
-	clockevents_register_device(levt);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
-	unsigned apic, apic_start;
-	unsigned long tsc, tsc_start;
-	int result;
-
-	local_irq_disable();
-
-	/*
-	 * Put whatever arbitrary (but long enough) timeout
-	 * value into the APIC clock, we just want to get the
-	 * counter running for calibration.
-	 *
-	 * No interrupt enable !
-	 */
-	__setup_APIC_LVTT(250000000, 0, 0);
-
-	apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-	if (apic_calibrate_pmtmr && pmtmr_ioport) {
-		pmtimer_wait(5000);  /* 5ms wait */
-		apic = apic_read(APIC_TMCCT);
-		result = (apic_start - apic) * 1000L / 5;
-	} else
-#endif
-	{
-		rdtscll(tsc_start);
-
-		do {
-			apic = apic_read(APIC_TMCCT);
-			rdtscll(tsc);
-		} while ((tsc - tsc_start) < TICK_COUNT &&
-				(apic_start - apic) < TICK_COUNT);
-
-		result = (apic_start - apic) * 1000L * tsc_khz /
-					(tsc - tsc_start);
-	}
-
-	local_irq_enable();
-
-	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-		result / 1000 / 1000, result / 1000 % 1000);
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (result * APIC_DIVISOR) / HZ;
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		printk(KERN_WARNING
-			"APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
-{
-	/*
-	 * The local apic timer can be disabled via the kernel
-	 * commandline or from the CPU detection code. Register the lapic
-	 * timer as a dummy clock event source on SMP systems, so the
-	 * broadcast mechanism is used. On UP systems simply ignore it.
-	 */
-	if (disable_apic_timer) {
-		printk(KERN_INFO "Disabling APIC timer\n");
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1) {
-			lapic_clockevent.mult = 1;
-			setup_APIC_timer();
-		}
-		return;
-	}
-
-	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-		    "calibrating APIC timer ...\n");
-
-	if (calibrate_APIC_clock()) {
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1)
-			setup_APIC_timer();
-		return;
-	}
-
-	/*
-	 * If nmi_watchdog is set to IO_APIC, we need the
-	 * PIT/HPET going.  Otherwise register lapic as a dummy
-	 * device.
-	 */
-	if (nmi_watchdog != NMI_IO_APIC)
-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-	else
-		printk(KERN_WARNING "APIC timer registered as dummy,"
-			" due to nmi_watchdog=%d!\n", nmi_watchdog);
-
-	/* Setup the lapic or request the broadcast */
-	setup_APIC_timer();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-	setup_APIC_timer();
-}
-
-/*
- * The guts of the apic timer interrupt
- */
-static void local_apic_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
-	/*
-	 * Normally we should not be here till LAPIC has been initialized but
-	 * in some cases like kdump, its possible that there is a pending LAPIC
-	 * timer interrupt from previous kernel's context and is delivered in
-	 * new kernel the moment interrupts are enabled.
-	 *
-	 * Interrupts are enabled early and LAPIC is setup much later, hence
-	 * its possible that when we get here evt->event_handler is NULL.
-	 * Check for event_handler being NULL and discard the interrupt as
-	 * spurious.
-	 */
-	if (!evt->event_handler) {
-		printk(KERN_WARNING
-		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-		/* Switch it off */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-		return;
-	}
-
-	/*
-	 * the NMI deadlock-detector uses this.
-	 */
-#ifdef CONFIG_X86_64
-	add_pda(apic_timer_irqs, 1);
-#else
-	per_cpu(irq_stat, cpu).apic_timer_irqs++;
-#endif
-
-	evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	/*
-	 * NOTE! We'd better ACK the irq immediately,
-	 * because timer handling can be slow.
-	 */
-	ack_APIC_irq();
-	/*
-	 * update_process_times() expects us to have done irq_enter().
-	 * Besides, if we don't timer interrupts ignore the global
-	 * interrupt lock, which is the WrongThing (tm) to do.
-	 */
-	exit_idle();
-	irq_enter();
-	local_apic_timer_interrupt();
-	irq_exit();
-
-	set_irq_regs(old_regs);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
-
-
-/*
- * Local APIC start and shutdown
- */
-
-/**
- * clear_local_APIC - shutdown the local APIC
- *
- * This is called, when a CPU is disabled and before rebooting, so the state of
- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
- * leftovers during boot.
- */
-void clear_local_APIC(void)
-{
-	int maxlvt;
-	u32 v;
-
-	/* APIC hasn't been mapped yet */
-	if (!apic_phys)
-		return;
-
-	maxlvt = lapic_get_maxlvt();
-	/*
-	 * Masking an LVT entry can trigger a local APIC error
-	 * if the vector is zero. Mask LVTERR first to prevent this.
-	 */
-	if (maxlvt >= 3) {
-		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-	}
-	/*
-	 * Careful: we have to set masks only first to deassert
-	 * any level-triggered sources.
-	 */
-	v = apic_read(APIC_LVTT);
-	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT1);
-	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-	if (maxlvt >= 4) {
-		v = apic_read(APIC_LVTPC);
-		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
-	}
-
-	/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
-	if (maxlvt >= 5) {
-		v = apic_read(APIC_LVTTHMR);
-		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-	}
-#endif
-	/*
-	 * Clean APIC state for other OSs:
-	 */
-	apic_write(APIC_LVTT, APIC_LVT_MASKED);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	apic_write(APIC_LVT1, APIC_LVT_MASKED);
-	if (maxlvt >= 3)
-		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-
-	/* Integrated APIC (!82489DX) ? */
-	if (lapic_is_integrated()) {
-		if (maxlvt > 3)
-			/* Clear ESR due to Pentium errata 3AP and 11AP */
-			apic_write(APIC_ESR, 0);
-		apic_read(APIC_ESR);
-	}
-}
-
-/**
- * disable_local_APIC - clear and disable the local APIC
- */
-void disable_local_APIC(void)
-{
-	unsigned int value;
-
-	clear_local_APIC();
-
-	/*
-	 * Disable APIC (implies clearing of registers
-	 * for 82489DX!).
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_SPIV_APIC_ENABLED;
-	apic_write(APIC_SPIV, value);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
-	 * restore the disabled state.
-	 */
-	if (enabled_via_apicbase) {
-		unsigned int l, h;
-
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_ENABLE;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-	}
-#endif
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default disable it down before
- * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
- * not power-off.  Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
-	unsigned long flags;
-
-	if (!cpu_has_apic)
-		return;
-
-	local_irq_save(flags);
-
-#ifdef CONFIG_X86_32
-	if (!enabled_via_apicbase)
-		clear_local_APIC();
-	else
-#endif
-		disable_local_APIC();
-
-
-	local_irq_restore(flags);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-	unsigned int reg0, reg1;
-
-	/*
-	 * The version register is read-only in a real APIC.
-	 */
-	reg0 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-	reg1 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-	/*
-	 * The two version reads above should print the same
-	 * numbers.  If the second one is different, then we
-	 * poke at a non-APIC.
-	 */
-	if (reg1 != reg0)
-		return 0;
-
-	/*
-	 * Check if the version looks reasonably.
-	 */
-	reg1 = GET_APIC_VERSION(reg0);
-	if (reg1 == 0x00 || reg1 == 0xff)
-		return 0;
-	reg1 = lapic_get_maxlvt();
-	if (reg1 < 0x02 || reg1 == 0xff)
-		return 0;
-
-	/*
-	 * The ID register is read/write in a real APIC.
-	 */
-	reg0 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-	apic_write(APIC_ID, reg0);
-	if (reg1 != (reg0 ^ APIC_ID_MASK))
-		return 0;
-
-	/*
-	 * The next two are just to see if we have sane values.
-	 * They're only really relevant if we're in Virtual Wire
-	 * compatibility mode, but most boxes are anymore.
-	 */
-	reg0 = apic_read(APIC_LVT0);
-	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-	reg1 = apic_read(APIC_LVT1);
-	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-	return 1;
-}
-
-/**
- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
- */
-void __init sync_Arb_IDs(void)
-{
-	/*
-	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
-	 * needed on AMD.
-	 */
-	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		return;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-
-	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-	apic_write(APIC_ICR, APIC_DEST_ALLINC |
-			APIC_INT_LEVELTRIG | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-	unsigned int value;
-
-	/*
-	 * Don't do the setup now if we have a SMP BIOS as the
-	 * through-I/O-APIC virtual wire mode might be active.
-	 */
-	if (smp_found_config || !cpu_has_apic)
-		return;
-
-	/*
-	 * Do not trust the local APIC being empty at bootup.
-	 */
-	clear_local_APIC();
-
-	/*
-	 * Enable APIC.
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-	/* This bit is reserved on P4/Xeon and should be cleared */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    (boot_cpu_data.x86 == 15))
-		value &= ~APIC_SPIV_FOCUS_DISABLED;
-	else
-#endif
-		value |= APIC_SPIV_FOCUS_DISABLED;
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up the virtual wire mode.
-	 */
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-	value = APIC_DM_NMI;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	apic_write(APIC_LVT1, value);
-}
-
-static void __cpuinit lapic_setup_esr(void)
-{
-	unsigned long oldvalue, value, maxlvt;
-	if (lapic_is_integrated() && !esr_disable) {
-		if (esr_disable) {
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-			return;
-		}
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
-
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write(APIC_LVTERR, value);
-		/*
-		 * spec says clear errors after enabling vector.
-		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		printk(KERN_INFO "No ESR for 82489DX.\n");
-	}
-}
-
-
-/**
- * setup_local_APIC - setup the local APIC
- */
-void __cpuinit setup_local_APIC(void)
-{
-	unsigned int value;
-	int i, j;
-
-	preempt_disable();
-	value = apic_read(APIC_LVR);
-
-	BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
-
-	/*
-	 * Double-check whether this APIC is really registered.
-	 * This is meaningless in clustered apic mode, so we skip it.
-	 */
-	if (!apic_id_registered())
-		BUG();
-
-	/*
-	 * Intel recommends to set DFR, LDR and TPR before enabling
-	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
-	 * document number 292116).  So here it goes...
-	 */
-	init_apic_ldr();
-
-	/*
-	 * Set Task Priority to 'accept all'. We never change this
-	 * later on.
-	 */
-	value = apic_read(APIC_TASKPRI);
-	value &= ~APIC_TPRI_MASK;
-	apic_write(APIC_TASKPRI, value);
-
-	/*
-	 * After a crash, we no longer service the interrupts and a pending
-	 * interrupt from previous kernel might still have ISR bit set.
-	 *
-	 * Most probably by now CPU has serviced that pending interrupt and
-	 * it might not have done the ack_APIC_irq() because it thought,
-	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-	 * does not clear the ISR bit and cpu thinks it has already serivced
-	 * the interrupt. Hence a vector might get locked. It was noticed
-	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
-	 */
-	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-		value = apic_read(APIC_ISR + i*0x10);
-		for (j = 31; j >= 0; j--) {
-			if (value & (1<<j))
-				ack_APIC_irq();
-		}
-	}
-
-	/*
-	 * Now that we are all set up, enable the APIC
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	/*
-	 * Enable APIC
-	 */
-	value |= APIC_SPIV_APIC_ENABLED;
-
-	/* We always use processor focus */
-
-	/*
-	 * Set spurious IRQ vector
-	 */
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up LVT0, LVT1:
-	 *
-	 * set up through-local-APIC on the BP's LINT0. This is not
-	 * strictly necessary in pure symmetric-IO mode, but sometimes
-	 * we delegate interrupts to the 8259A.
-	 */
-	/*
-	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
-	 */
-	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-	if (!smp_processor_id() && !value) {
-		value = APIC_DM_EXTINT;
-		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-			    smp_processor_id());
-	} else {
-		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-			    smp_processor_id());
-	}
-	apic_write(APIC_LVT0, value);
-
-	/*
-	 * only the BP should see the LINT1 NMI signal, obviously.
-	 */
-	if (!smp_processor_id())
-		value = APIC_DM_NMI;
-	else
-		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	apic_write(APIC_LVT1, value);
-	preempt_enable();
-}
-
-void __cpuinit end_local_APIC_setup(void)
-{
-	lapic_setup_esr();
-
-#ifdef CONFIG_X86_32
-	{
-		unsigned int value;
-		/* Disable the local apic timer */
-		value = apic_read(APIC_LVTT);
-		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, value);
-	}
-#endif
-
-	setup_apic_nmi_watchdog(NULL);
-	apic_pm_activate();
-}
-
-void check_x2apic(void)
-{
-	int msr, msr2;
-
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
-
-	if (msr & X2APIC_ENABLE) {
-		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
-		x2apic_preenabled = x2apic = 1;
-		apic_ops = &x2apic_ops;
-	}
-}
-
-void enable_x2apic(void)
-{
-	int msr, msr2;
-
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
-	if (!(msr & X2APIC_ENABLE)) {
-		printk("Enabling x2apic\n");
-		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
-	}
-}
-
-void enable_IR_x2apic(void)
-{
-#ifdef CONFIG_INTR_REMAP
-	int ret;
-	unsigned long flags;
-
-	if (!cpu_has_x2apic)
-		return;
-
-	if (!x2apic_preenabled && disable_x2apic) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of nox2apic\n");
-		return;
-	}
-
-	if (x2apic_preenabled && disable_x2apic)
-		panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-	if (!x2apic_preenabled && skip_ioapic_setup) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of skipping io-apic setup\n");
-		return;
-	}
-
-	ret = dmar_table_init();
-	if (ret) {
-		printk(KERN_INFO
-		       "dmar_table_init() failed with %d:\n", ret);
-
-		if (x2apic_preenabled)
-			panic("x2apic enabled by bios. But IR enabling failed");
-		else
-			printk(KERN_INFO
-			       "Not enabling x2apic,Intr-remapping\n");
-		return;
-	}
-
-	local_irq_save(flags);
-	mask_8259A();
-	save_mask_IO_APIC_setup();
-
-	ret = enable_intr_remapping(1);
-
-	if (ret && x2apic_preenabled) {
-		local_irq_restore(flags);
-		panic("x2apic enabled by bios. But IR enabling failed");
-	}
-
-	if (ret)
-		goto end;
-
-	if (!x2apic) {
-		x2apic = 1;
-		apic_ops = &x2apic_ops;
-		enable_x2apic();
-	}
-end:
-	if (ret)
-		/*
-		 * IR enabling failed
-		 */
-		restore_IO_APIC_setup();
-	else
-		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
-
-	unmask_8259A();
-	local_irq_restore(flags);
-
-	if (!ret) {
-		if (!x2apic_preenabled)
-			printk(KERN_INFO
-			       "Enabled x2apic and interrupt-remapping\n");
-		else
-			printk(KERN_INFO
-			       "Enabled Interrupt-remapping\n");
-	} else
-		printk(KERN_ERR
-		       "Failed to enable Interrupt-remapping and x2apic\n");
-#else
-	if (!cpu_has_x2apic)
-		return;
-
-	if (x2apic_preenabled)
-		panic("x2apic enabled prior OS handover,"
-		      " enable CONFIG_INTR_REMAP");
-
-	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-	       " and x2apic\n");
-#endif
-
-	return;
-}
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-static int __init detect_init_APIC(void)
-{
-	if (!cpu_has_apic) {
-		printk(KERN_INFO "No local APIC present\n");
-		return -1;
-	}
-
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_physical_apicid = 0;
-	return 0;
-}
-
-void __init early_init_lapic_mapping(void)
-{
-	unsigned long phys_addr;
-
-	/*
-	 * If no local APIC can be found then go out
-	 * : it means there is no mpatable and MADT
-	 */
-	if (!smp_found_config)
-		return;
-
-	phys_addr = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-		    APIC_BASE, phys_addr);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	boot_cpu_physical_apicid = read_apic_id();
-}
-
-/**
- * init_apic_mappings - initialize APIC mappings
- */
-void __init init_apic_mappings(void)
-{
-	if (x2apic) {
-		boot_cpu_physical_apicid = read_apic_id();
-		return;
-	}
-
-	/*
-	 * If no local APIC can be found then set up a fake all
-	 * zeroes page to simulate the local APIC and another
-	 * one for the IO-APIC.
-	 */
-	if (!smp_found_config && detect_init_APIC()) {
-		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-		apic_phys = __pa(apic_phys);
-	} else
-		apic_phys = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-				APIC_BASE, apic_phys);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	boot_cpu_physical_apicid = read_apic_id();
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int apic_version[MAX_APICS];
-
-int __init APIC_init_uniprocessor(void)
-{
-	if (disable_apic) {
-		printk(KERN_INFO "Apic disabled\n");
-		return -1;
-	}
-	if (!cpu_has_apic) {
-		disable_apic = 1;
-		printk(KERN_INFO "Apic disabled by BIOS\n");
-		return -1;
-	}
-
-	enable_IR_x2apic();
-	setup_apic_routing();
-
-	verify_local_APIC();
-
-	connect_bsp_APIC();
-
-	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-
-	setup_local_APIC();
-
-	/*
-	 * Now enable IO-APICs, actually call clear_IO_APIC
-	 * We need clear_IO_APIC before enabling vector on BP
-	 */
-	if (!skip_ioapic_setup && nr_ioapics)
-		enable_IO_APIC();
-
-	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
-		localise_nmi_watchdog();
-	end_local_APIC_setup();
-
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-	else
-		nr_ioapics = 0;
-	setup_boot_APIC_clock();
-	check_nmi_watchdog();
-	return 0;
-}
-
-/*
- * Local APIC interrupts
- */
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-asmlinkage void smp_spurious_interrupt(void)
-{
-	unsigned int v;
-	exit_idle();
-	irq_enter();
-	/*
-	 * Check if this really is a spurious interrupt and ACK it
-	 * if it is a vectored one.  Just in case...
-	 * Spurious interrupts should not be ACKed.
-	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
-
-	add_pda(irq_spurious_count, 1);
-	irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-asmlinkage void smp_error_interrupt(void)
-{
-	unsigned int v, v1;
-
-	exit_idle();
-	irq_enter();
-	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
-	apic_write(APIC_ESR, 0);
-	v1 = apic_read(APIC_ESR);
-	ack_APIC_irq();
-	atomic_inc(&irq_err_count);
-
-	/* Here is what the APIC error bits mean:
-	   0: Send CS error
-	   1: Receive CS error
-	   2: Send accept error
-	   3: Receive accept error
-	   4: Reserved
-	   5: Send illegal vector
-	   6: Received illegal vector
-	   7: Illegal register address
-	*/
-	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
-	irq_exit();
-}
-
-/**
- * connect_bsp_APIC - attach the APIC to the interrupt system
- */
-void __init connect_bsp_APIC(void)
-{
-#ifdef CONFIG_X86_32
-	if (pic_mode) {
-		/*
-		 * Do not trust the local APIC being empty at bootup.
-		 */
-		clear_local_APIC();
-		/*
-		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
-		 * local APIC to INT and NMI lines.
-		 */
-		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-				"enabling APIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x01, 0x23);
-	}
-#endif
-	enable_apic_mode();
-}
-
-/**
- * disconnect_bsp_APIC - detach the APIC from the interrupt system
- * @virt_wire_setup:	indicates, whether virtual wire mode is selected
- *
- * Virtual wire mode is necessary to deliver legacy interrupts even when the
- * APIC is disabled.
- */
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-	unsigned int value;
-
-#ifdef CONFIG_X86_32
-	if (pic_mode) {
-		/*
-		 * Put the board back into PIC mode (has an effect only on
-		 * certain older boards).  Note that APIC interrupts, including
-		 * IPIs, won't work beyond this point!  The only exception are
-		 * INIT IPIs.
-		 */
-		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-				"entering PIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x00, 0x23);
-		return;
-	}
-#endif
-
-	/* Go back to Virtual Wire compatibility mode */
-
-	/* For the spurious interrupt use vector F, and enable it */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-	value |= 0xf;
-	apic_write(APIC_SPIV, value);
-
-	if (!virt_wire_setup) {
-		/*
-		 * For LVT0 make it edge triggered, active high,
-		 * external and enabled
-		 */
-		value = apic_read(APIC_LVT0);
-		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-		apic_write(APIC_LVT0, value);
-	} else {
-		/* Disable LVT0 */
-		apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	}
-
-	/*
-	 * For LVT1 make it edge triggered, active high,
-	 * nmi and enabled
-	 */
-	value = apic_read(APIC_LVT1);
-	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-	apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit generic_processor_info(int apicid, int version)
-{
-	int cpu;
-	cpumask_t tmp_map;
-
-	/*
-	 * Validate version
-	 */
-	if (version == 0x0) {
-		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-				"fixing up to 0x10. (tell your hw vendor)\n",
-				version);
-		version = 0x10;
-	}
-	apic_version[apicid] = version;
-
-	if (num_processors >= NR_CPUS) {
-		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-			"  Processor ignored.\n", NR_CPUS);
-		return;
-	}
-
-	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-
-	physid_set(apicid, phys_cpu_present_map);
-	if (apicid == boot_cpu_physical_apicid) {
-		/*
-		 * x86_bios_cpu_apicid is required to have processors listed
-		 * in same order as logical cpu numbers. Hence the first
-		 * entry is BSP, and so on.
-		 */
-		cpu = 0;
-	}
-	if (apicid > max_physical_apicid)
-		max_physical_apicid = apicid;
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-	 * but we need to work other dependencies like SMP_SUSPEND etc
-	 * before this can be done without some confusion.
-	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-	 *       - Ashok Raj <ashok.raj@intel.com>
-	 */
-	if (max_physical_apicid >= 8) {
-		switch (boot_cpu_data.x86_vendor) {
-		case X86_VENDOR_INTEL:
-			if (!APIC_XAPIC(version)) {
-				def_to_bigsmp = 0;
-				break;
-			}
-			/* If P4 and above fall through */
-		case X86_VENDOR_AMD:
-			def_to_bigsmp = 1;
-		}
-	}
-#endif
-
-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-	/* are we being called early in kernel startup? */
-	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
-		cpu_to_apicid[cpu] = apicid;
-		bios_cpu_apicid[cpu] = apicid;
-	} else {
-		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-	}
-#endif
-
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
-}
-
-int hard_smp_processor_id(void)
-{
-	return read_apic_id();
-}
-
-/*
- * Power management
- */
-#ifdef CONFIG_PM
-
-static struct {
-	/*
-	 * 'active' is true if the local APIC was enabled by us and
-	 * not the BIOS; this signifies that we are also responsible
-	 * for disabling it before entering apm/acpi suspend
-	 */
-	int active;
-	/* r/w apic fields */
-	unsigned int apic_id;
-	unsigned int apic_taskpri;
-	unsigned int apic_ldr;
-	unsigned int apic_dfr;
-	unsigned int apic_spiv;
-	unsigned int apic_lvtt;
-	unsigned int apic_lvtpc;
-	unsigned int apic_lvt0;
-	unsigned int apic_lvt1;
-	unsigned int apic_lvterr;
-	unsigned int apic_tmict;
-	unsigned int apic_tdcr;
-	unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = lapic_get_maxlvt();
-
-	apic_pm_state.apic_id = apic_read(APIC_ID);
-	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-	if (maxlvt >= 4)
-		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-	if (maxlvt >= 5)
-		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-
-	local_irq_save(flags);
-	disable_local_APIC();
-	local_irq_restore(flags);
-	return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-	unsigned int l, h;
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = lapic_get_maxlvt();
-
-	local_irq_save(flags);
-
-#ifdef CONFIG_X86_64
-	if (x2apic)
-		enable_x2apic();
-	else
-#endif
-	{
-		/*
-		 * Make sure the APICBASE points to the right address
-		 *
-		 * FIXME! This will be wrong if we ever support suspend on
-		 * SMP! We'll need to do this as part of the CPU restore!
-		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_BASE;
-		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-	}
-
-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-	apic_write(APIC_ID, apic_pm_state.apic_id);
-	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-	if (maxlvt >= 5)
-		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-
-	local_irq_restore(flags);
-
-	return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
-	.name		= "lapic",
-	.resume		= lapic_resume,
-	.suspend	= lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-	.id	= 0,
-	.cls	= &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
-	apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
-	int error;
-
-	if (!cpu_has_apic)
-		return 0;
-	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
-	error = sysdev_class_register(&lapic_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic);
-	return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else	/* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif	/* CONFIG_PM */
-
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
-	int i, clusters, zeros;
-	unsigned id;
-	u16 *bios_cpu_apicid;
-	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
-	/*
-	 * there is not this kind of box with AMD CPU yet.
-	 * Some AMD box with quadcore cpu and 8 sockets apicid
-	 * will be [4, 0x23] or [8, 0x27] could be thought to
-	 * vsmp box still need checking...
-	 */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-		return 0;
-
-	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
-	for (i = 0; i < NR_CPUS; i++) {
-		/* are we being called early in kernel startup? */
-		if (bios_cpu_apicid) {
-			id = bios_cpu_apicid[i];
-		}
-		else if (i < nr_cpu_ids) {
-			if (cpu_present(i))
-				id = per_cpu(x86_bios_cpu_apicid, i);
-			else
-				continue;
-		}
-		else
-			break;
-
-		if (id != BAD_APICID)
-			__set_bit(APIC_CLUSTERID(id), clustermap);
-	}
-
-	/* Problem:  Partially populated chassis may not have CPUs in some of
-	 * the APIC clusters they have been allocated.  Only present CPUs have
-	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
-	 * Since clusters are allocated sequentially, count zeros only if
-	 * they are bounded by ones.
-	 */
-	clusters = 0;
-	zeros = 0;
-	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
-		if (test_bit(i, clustermap)) {
-			clusters += 1 + zeros;
-			zeros = 0;
-		} else
-			++zeros;
-	}
-
-	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-	 * not guaranteed to be synced between boards
-	 */
-	if (is_vsmp_box() && clusters > 1)
-		return 1;
-
-	/*
-	 * If clusters > 2, then should be multi-chassis.
-	 * May have to revisit this when multi-core + hyperthreaded CPUs come
-	 * out, but AFAIK this will work even for them.
-	 */
-	return (clusters > 2);
-}
-
-static __init int setup_nox2apic(char *str)
-{
-	disable_x2apic = 1;
-	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
-	return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-
-
-/*
- * APIC command line parameters
- */
-static int __init setup_disableapic(char *arg)
-{
-	disable_apic = 1;
-	setup_clear_cpu_cap(X86_FEATURE_APIC);
-	return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
-	return setup_disableapic(arg);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-	local_apic_timer_c2_ok = 1;
-	return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init parse_disable_apic_timer(char *arg)
-{
-	disable_apic_timer = 1;
-	return 0;
-}
-early_param("noapictimer", parse_disable_apic_timer);
-
-static int __init parse_nolapic_timer(char *arg)
-{
-	disable_apic_timer = 1;
-	return 0;
-}
-early_param("nolapic_timer", parse_nolapic_timer);
-
-static __init int setup_apicpmtimer(char *s)
-{
-	apic_calibrate_pmtmr = 1;
-	notsc_setup(NULL);
-	return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-
-static int __init apic_set_verbosity(char *arg)
-{
-	if (!arg)  {
-#ifdef CONFIG_X86_64
-		skip_ioapic_setup = 0;
-		ioapic_force = 1;
-		return 0;
-#endif
-		return -EINVAL;
-	}
-
-	if (strcmp("debug", arg) == 0)
-		apic_verbosity = APIC_DEBUG;
-	else if (strcmp("verbose", arg) == 0)
-		apic_verbosity = APIC_VERBOSE;
-	else {
-		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-			" use apic=verbose or apic=debug\n", arg);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-static int __init lapic_insert_resource(void)
-{
-	if (!apic_phys)
-		return -1;
-
-	/* Put local APIC into the resource map. */
-	lapic_resource.start = apic_phys;
-	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-	insert_resource(&iomem_resource, &lapic_resource);
-
-	return 0;
-}
-
-/*
- * need call insert after e820_reserve_resources()
- * that is using request_resource
- */
-late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index fdd585f9c53..f0dfe6f17e7 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -1,8 +1,6 @@
 /*
  * BIOS run time interface routines.
  *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
- *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
@@ -16,33 +14,128 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include <linux/io.h>
 #include <asm/uv/bios.h>
+#include <asm/uv/uv_hub.h>
+
+struct uv_systab uv_systab;
 
-const char *
-x86_bios_strerror(long status)
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
-	const char *str;
-	switch (status) {
-	case  0: str = "Call completed without error";	break;
-	case -1: str = "Not implemented";		break;
-	case -2: str = "Invalid argument";		break;
-	case -3: str = "Call completed with error";	break;
-	default: str = "Unknown BIOS status code";	break;
-	}
-	return str;
+	struct uv_systab *tab = &uv_systab;
+
+	if (!tab->function)
+		/*
+		 * BIOS does not support UV systab
+		 */
+		return BIOS_STATUS_UNIMPLEMENTED;
+
+	return efi_call6((void *)__va(tab->function),
+					(u64)which, a1, a2, a3, a4, a5);
 }
 
-long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-		   unsigned long *drift_info)
+s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+					u64 a4, u64 a5)
 {
-	struct uv_bios_retval isrv;
+	unsigned long bios_flags;
+	s64 ret;
 
-	BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
-	*ticks_per_second = isrv.v0;
-	*drift_info = isrv.v1;
-	return isrv.status;
+	local_irq_save(bios_flags);
+	ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+	local_irq_restore(bios_flags);
+
+	return ret;
 }
-EXPORT_SYMBOL_GPL(x86_bios_freq_base);
+
+s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+					u64 a4, u64 a5)
+{
+	s64 ret;
+
+	preempt_disable();
+	ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+	preempt_enable();
+
+	return ret;
+}
+
+
+long sn_partition_id;
+EXPORT_SYMBOL_GPL(sn_partition_id);
+long uv_coherency_id;
+EXPORT_SYMBOL_GPL(uv_coherency_id);
+long uv_region_size;
+EXPORT_SYMBOL_GPL(uv_region_size);
+int uv_type;
+
+
+s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
+		long *region)
+{
+	s64 ret;
+	u64 v0, v1;
+	union partition_info_u part;
+
+	ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
+				(u64)(&v0), (u64)(&v1), 0, 0);
+	if (ret != BIOS_STATUS_SUCCESS)
+		return ret;
+
+	part.val = v0;
+	if (uvtype)
+		*uvtype = part.hub_version;
+	if (partid)
+		*partid = part.partition_id;
+	if (coher)
+		*coher = part.coherence_id;
+	if (region)
+		*region = part.region_size;
+	return ret;
+}
+
+
+s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
+{
+	return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
+			   (u64)ticks_per_second, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_freq_base);
+
+
+#ifdef CONFIG_EFI
+void uv_bios_init(void)
+{
+	struct uv_systab *tab;
+
+	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+	    (efi.uv_systab == (unsigned long)NULL)) {
+		printk(KERN_CRIT "No EFI UV System Table.\n");
+		uv_systab.function = (unsigned long)NULL;
+		return;
+	}
+
+	tab = (struct uv_systab *)ioremap(efi.uv_systab,
+					sizeof(struct uv_systab));
+	if (strncmp(tab->signature, "UVST", 4) != 0)
+		printk(KERN_ERR "bad signature in UV system table!");
+
+	/*
+	 * Copy table to permanent spot for later use.
+	 */
+	memcpy(&uv_systab, tab, sizeof(struct uv_systab));
+	iounmap(tab);
+
+	printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+}
+#else	/* !CONFIG_EFI */
+
+void uv_bios_init(void) { }
+#endif
+
diff --git a/arch/x86/kernel/cpu/.gitignore b/arch/x86/kernel/cpu/.gitignore
new file mode 100644
index 00000000000..667df55a439
--- /dev/null
+++ b/arch/x86/kernel/cpu/.gitignore
@@ -0,0 +1 @@
+capflags.c
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 32e73520adf..8f1e31db2ad 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	}
 	numa_set_node(cpu, node);
 
-	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+	printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 06fcce516d5..b0461856acf 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -1,5 +1,5 @@
 /*
- *  (C) 2001-2004  Dave Jones. <davej@codemonkey.org.uk>
+ *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
  *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
 module_param(revid_errata, int, 0644);
 MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index b5ced806a31..c1ac5790c63 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void)
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 0a61159d7b7..7c7d56b4313 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -1,6 +1,6 @@
 /*
  *  AMD K7 Powernow driver.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs.
  *  (C) 2003-2004 Dave Jones <davej@redhat.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -692,7 +692,7 @@ static void __exit powernow_exit (void)
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 84bb395038d..008d23ba491 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -7,7 +7,7 @@
  *  Support : mark.langsdorf@amd.com
  *
  *  Based on the powernow-k7.c module written by Dave Jones.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs
  *  (C) 2004 Dominik Brodowski <linux@brodo.de>
  *  (C) 2004 Pavel Machek <pavel@suse.cz>
  *  Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 191f7263c61..04d0376b64b 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -431,7 +431,7 @@ static void __exit speedstep_exit(void)
 }
 
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 99468dbd08d..cce0b6118d5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
 		node = first_node(node_online_map);
 	numa_set_node(cpu, node);
 
-	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+	printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index f390c9f6635..dd3af6e7b39 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -1,6 +1,6 @@
 /*
- * Athlon/Hammer specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
+ * Athlon specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 774d87cfd8c..0ebf3fc6a61 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -1,6 +1,6 @@
 /*
  * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
+ * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index cc1fccdd31e..a74af128efc 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -1,7 +1,7 @@
 /*
  * Non Fatal Machine Check Exception Reporting
  *
- * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
+ * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
  *
  * This file contains routines to check for non-fatal MCEs every 15s
  *
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6bff382094f..9abd48b2267 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -17,6 +17,8 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/nmi.h>
+#include <linux/kprobes.h>
+
 #include <asm/apic.h>
 #include <asm/intel_arch_perfmon.h>
 
@@ -336,7 +338,8 @@ static void single_msr_unreserve(void)
 	release_perfctr_nmi(wd_ops->perfctr);
 }
 
-static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes
+single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	/* start the cycle over again */
 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
 	return 1;
 }
 
-static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	/*
 	 * P6 based Pentium M need to re-unmask
@@ -605,7 +608,7 @@ static void p4_unreserve(void)
 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 }
 
-static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	unsigned dummy;
 	/*
@@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
 	return hz;
 }
 
-int lapic_wd_event(unsigned nmi_hz)
+int __kprobes lapic_wd_event(unsigned nmi_hz)
 {
 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 	u64 ctr;
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 72d0c56c1b4..f7cdb3b457a 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -13,6 +13,9 @@
 
 static void *kdump_buf_page;
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index e90a60ef10c..045b36cada6 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -10,6 +10,9 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 945a31cdd81..1119d247fe1 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -367,6 +367,10 @@ void __init efi_init(void)
 			efi.smbios = config_tables[i].table;
 			printk(" SMBIOS=0x%lx ", config_tables[i].table);
 		} else if (!efi_guidcmp(config_tables[i].guid,
+					UV_SYSTEM_TABLE_GUID)) {
+			efi.uv_systab = config_tables[i].table;
+			printk(" UVsystab=0x%lx ", config_tables[i].table);
+		} else if (!efi_guidcmp(config_tables[i].guid,
 					HCDP_TABLE_GUID)) {
 			efi.hcdp = config_tables[i].table;
 			printk(" HCDP=0x%lx ", config_tables[i].table);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe3..c356423a602 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -629,7 +629,7 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
 vector=0
-.rept NR_IRQS
+.rept NR_VECTORS
 	ALIGN
  .if vector
 	CFI_ADJUST_CFA_OFFSET -4
@@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback)
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 ENTRY(mcount)
-	pushl %eax
-	pushl %ecx
-	pushl %edx
-	movl 0xc(%esp), %eax
-	subl $MCOUNT_INSN_SIZE, %eax
-
-.globl mcount_call
-mcount_call:
-	call ftrace_stub
-
-	popl %edx
-	popl %ecx
-	popl %eax
-
 	ret
 END(mcount)
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1db6ce4314e..09e7145484c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -64,32 +64,6 @@
 #ifdef CONFIG_FTRACE
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
-
-	subq $0x38, %rsp
-	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-
-	movq 0x38(%rsp), %rdi
-	subq $MCOUNT_INSN_SIZE, %rdi
-
-.globl mcount_call
-mcount_call:
-	call ftrace_stub
-
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
-	movq (%rsp), %rax
-	addq $0x38, %rsp
-
 	retq
 END(mcount)
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ab115cd15fd..d073d981a73 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -11,17 +11,18 @@
 
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/list.h>
 
-#include <asm/alternative.h>
 #include <asm/ftrace.h>
+#include <asm/nops.h>
 
 
 /* Long is fine, even if it is only 4 bytes ;-) */
-static long *ftrace_nop;
+static unsigned long *ftrace_nop;
 
 union ftrace_code_union {
 	char code[MCOUNT_INSN_SIZE];
@@ -60,11 +61,7 @@ notrace int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
-	unsigned replaced;
-	unsigned old = *(unsigned *)old_code; /* 4 bytes */
-	unsigned new = *(unsigned *)new_code; /* 4 bytes */
-	unsigned char newch = new_code[4];
-	int faulted = 0;
+	unsigned char replaced[MCOUNT_INSN_SIZE];
 
 	/*
 	 * Note: Due to modules and __init, code can
@@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	 *  as well as code changing.
 	 *
 	 * No real locking needed, this code is run through
-	 * kstop_machine.
+	 * kstop_machine, or before SMP starts.
 	 */
-	asm volatile (
-		"1: lock\n"
-		"   cmpxchg %3, (%2)\n"
-		"   jnz 2f\n"
-		"   movb %b4, 4(%2)\n"
-		"2:\n"
-		".section .fixup, \"ax\"\n"
-		"3:	movl $1, %0\n"
-		"	jmp 2b\n"
-		".previous\n"
-		_ASM_EXTABLE(1b, 3b)
-		: "=r"(faulted), "=a"(replaced)
-		: "r"(ip), "r"(new), "c"(newch),
-		  "0"(faulted), "a"(old)
-		: "memory");
-	sync_core();
+	if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
+		return 1;
+
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return 2;
 
-	if (replaced != old && replaced != new)
-		faulted = 2;
+	WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
+				    MCOUNT_INSN_SIZE));
 
-	return faulted;
+	sync_core();
+
+	return 0;
 }
 
 notrace int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -112,30 +100,76 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 
 notrace int ftrace_mcount_set(unsigned long *data)
 {
-	unsigned long ip = (long)(&mcount_call);
-	unsigned long *addr = data;
-	unsigned char old[MCOUNT_INSN_SIZE], *new;
-
-	/*
-	 * Replace the mcount stub with a pointer to the
-	 * ip recorder function.
-	 */
-	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
-	new = ftrace_call_replace(ip, *addr);
-	*addr = ftrace_modify_code(ip, old, new);
-
+	/* mcount is initialized as a nop */
+	*data = 0;
 	return 0;
 }
 
 int __init ftrace_dyn_arch_init(void *data)
 {
-	const unsigned char *const *noptable = find_nop_table();
-
-	/* This is running in kstop_machine */
-
-	ftrace_mcount_set(data);
+	extern const unsigned char ftrace_test_p6nop[];
+	extern const unsigned char ftrace_test_nop5[];
+	extern const unsigned char ftrace_test_jmp[];
+	int faulted = 0;
 
-	ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+	/*
+	 * There is no good nop for all x86 archs.
+	 * We will default to using the P6_NOP5, but first we
+	 * will test to make sure that the nop will actually
+	 * work on this CPU. If it faults, we will then
+	 * go to a lesser efficient 5 byte nop. If that fails
+	 * we then just use a jmp as our nop. This isn't the most
+	 * efficient nop, but we can not use a multi part nop
+	 * since we would then risk being preempted in the middle
+	 * of that nop, and if we enabled tracing then, it might
+	 * cause a system crash.
+	 *
+	 * TODO: check the cpuid to determine the best nop.
+	 */
+	asm volatile (
+		"jmp ftrace_test_jmp\n"
+		/* This code needs to stay around */
+		".section .text, \"ax\"\n"
+		"ftrace_test_jmp:"
+		"jmp ftrace_test_p6nop\n"
+		"nop\n"
+		"nop\n"
+		"nop\n"  /* 2 byte jmp + 3 bytes */
+		"ftrace_test_p6nop:"
+		P6_NOP5
+		"jmp 1f\n"
+		"ftrace_test_nop5:"
+		".byte 0x66,0x66,0x66,0x66,0x90\n"
+		"jmp 1f\n"
+		".previous\n"
+		"1:"
+		".section .fixup, \"ax\"\n"
+		"2:	movl $1, %0\n"
+		"	jmp ftrace_test_nop5\n"
+		"3:	movl $2, %0\n"
+		"	jmp 1b\n"
+		".previous\n"
+		_ASM_EXTABLE(ftrace_test_p6nop, 2b)
+		_ASM_EXTABLE(ftrace_test_nop5, 3b)
+		: "=r"(faulted) : "0" (faulted));
+
+	switch (faulted) {
+	case 0:
+		pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+		ftrace_nop = (unsigned long *)ftrace_test_p6nop;
+		break;
+	case 1:
+		pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+		ftrace_nop = (unsigned long *)ftrace_test_nop5;
+		break;
+	case 2:
+		pr_info("ftrace: converting mcount calls to jmp . + 5\n");
+		ftrace_nop = (unsigned long *)ftrace_test_jmp;
+		break;
+	}
+
+	/* The return code is retured via data */
+	*(unsigned long *)data = 0;
 
 	return 0;
 }
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 9eca5ba7a6b..2ec2de8d8c4 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	 * is an example).
 	 */
 	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
-		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+		printk(KERN_DEBUG "system APIC only can use physical flat");
 		return 1;
+	}
 #endif
 
 	return 0;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 33581d94a90..bfd532843df 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
 
 static __init void uv_rtc_init(void)
 {
-	long status, ticks_per_sec, drift;
+	long status;
+	u64 ticks_per_sec;
 
-	status =
-	    x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
-					&drift);
-	if (status != 0 || ticks_per_sec < 100000) {
+	status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+					&ticks_per_sec);
+	if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
 		printk(KERN_WARNING
 			"unable to determine platform RTC clock frequency, "
 			"guessing.\n");
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
 		sn_rtc_cycles_per_second = ticks_per_sec;
 }
 
-static bool uv_system_inited;
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ * 	ZZZ hotplug not supported yet
+ */
+void __cpuinit uv_cpu_init(void)
+{
+	/* CPU 0 initilization will be done via uv_system_init. */
+	if (!uv_blade_info)
+		return;
+
+	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+		set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
 
 void __init uv_system_init(void)
 {
@@ -412,6 +427,9 @@ void __init uv_system_init(void)
 	gnode_upper = (((unsigned long)node_id.s.node_id) &
 		       ~((1 << n_val) - 1)) << m_val;
 
+	uv_bios_init();
+	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
+			    &uv_coherency_id, &uv_region_size);
 	uv_rtc_init();
 
 	for_each_present_cpu(cpu) {
@@ -433,7 +451,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
-		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 		max_pnode = max(pnode, max_pnode);
@@ -448,21 +466,6 @@ void __init uv_system_init(void)
 	map_mmr_high(max_pnode);
 	map_config_high(max_pnode);
 	map_mmioh_high(max_pnode);
-	uv_system_inited = true;
-}
 
-/*
- * Called on each cpu to initialize the per_cpu UV data area.
- * 	ZZZ hotplug not supported yet
- */
-void __cpuinit uv_cpu_init(void)
-{
-	BUG_ON(!uv_system_inited);
-
-	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
-
-	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
-		set_x2apic_extra_bits(uv_hub_info->pnode);
+	uv_cpu_init();
 }
-
-
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acf62fc233d..77017e834cf 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,29 +1,49 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/sysdev.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/hpet.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/cpu.h>
 #include <linux/pm.h>
+#include <linux/io.h>
 
 #include <asm/fixmap.h>
-#include <asm/hpet.h>
 #include <asm/i8253.h>
-#include <asm/io.h>
+#include <asm/hpet.h>
 
-#define HPET_MASK	CLOCKSOURCE_MASK(32)
-#define HPET_SHIFT	22
+#define HPET_MASK			CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT			22
 
 /* FSEC = 10^-15
    NSEC = 10^-9 */
-#define FSEC_PER_NSEC	1000000L
+#define FSEC_PER_NSEC			1000000L
+
+#define HPET_DEV_USED_BIT		2
+#define HPET_DEV_USED			(1 << HPET_DEV_USED_BIT)
+#define HPET_DEV_VALID			0x8
+#define HPET_DEV_FSB_CAP		0x1000
+#define HPET_DEV_PERI_CAP		0x2000
+
+#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
 
 /*
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
-unsigned long hpet_address;
-static void __iomem *hpet_virt_address;
+unsigned long				hpet_address;
+unsigned long				hpet_num_timers;
+static void __iomem			*hpet_virt_address;
+
+struct hpet_dev {
+	struct clock_event_device	evt;
+	unsigned int			num;
+	int				cpu;
+	unsigned int			irq;
+	unsigned int			flags;
+	char				name[10];
+};
 
 unsigned long hpet_readl(unsigned long a)
 {
@@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void)
 static int boot_hpet_disable;
 int hpet_force_user;
 
-static int __init hpet_setup(char* str)
+static int __init hpet_setup(char *str)
 {
 	if (str) {
 		if (!strncmp("disable", str, 7))
@@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet);
 
 static inline int is_hpet_capable(void)
 {
-	return (!boot_hpet_disable && hpet_address);
+	return !boot_hpet_disable && hpet_address;
 }
 
 /*
@@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
  * timer 0 and timer 1 in case of RTC emulation.
  */
 #ifdef CONFIG_HPET
+
+static void hpet_reserve_msi_timers(struct hpet_data *hd);
+
 static void hpet_reserve_platform_timers(unsigned long id)
 {
 	struct hpet __iomem *hpet = hpet_virt_address;
@@ -111,10 +134,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
 
 	nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
 
-	memset(&hd, 0, sizeof (hd));
-	hd.hd_phys_address = hpet_address;
-	hd.hd_address = hpet;
-	hd.hd_nirqs = nrtimers;
+	memset(&hd, 0, sizeof(hd));
+	hd.hd_phys_address	= hpet_address;
+	hd.hd_address		= hpet;
+	hd.hd_nirqs		= nrtimers;
 	hpet_reserve_timer(&hd, 0);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
@@ -130,10 +153,12 @@ static void hpet_reserve_platform_timers(unsigned long id)
 	hd.hd_irq[1] = HPET_LEGACY_RTC;
 
 	for (i = 2; i < nrtimers; timer++, i++) {
-		hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >>
-			Tn_INT_ROUTE_CNF_SHIFT;
+		hd.hd_irq[i] = (readl(&timer->hpet_config) &
+			Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
 	}
 
+	hpet_reserve_msi_timers(&hd);
+
 	hpet_alloc(&hd);
 
 }
@@ -227,60 +252,70 @@ static void hpet_legacy_clockevent_register(void)
 	printk(KERN_DEBUG "hpet clockevent registered\n");
 }
 
-static void hpet_legacy_set_mode(enum clock_event_mode mode,
-			  struct clock_event_device *evt)
+static int hpet_setup_msi_irq(unsigned int irq);
+
+static void hpet_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt, int timer)
 {
 	unsigned long cfg, cmp, now;
 	uint64_t delta;
 
-	switch(mode) {
+	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
-		delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
-		delta >>= hpet_clockevent.shift;
+		delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+		delta >>= evt->shift;
 		now = hpet_readl(HPET_COUNTER);
 		cmp = now + (unsigned long) delta;
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
 		       HPET_TN_SETVAL | HPET_TN_32BIT;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		/*
 		 * The first write after writing TN_SETVAL to the
 		 * config register sets the counter value, the second
 		 * write sets the period.
 		 */
-		hpet_writel(cmp, HPET_T0_CMP);
+		hpet_writel(cmp, HPET_Tn_CMP(timer));
 		udelay(1);
-		hpet_writel((unsigned long) delta, HPET_T0_CMP);
+		hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
 		break;
 
 	case CLOCK_EVT_MODE_ONESHOT:
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg &= ~HPET_TN_PERIODIC;
 		cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		break;
 
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg &= ~HPET_TN_ENABLE;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
-		hpet_enable_legacy_int();
+		if (timer == 0) {
+			hpet_enable_legacy_int();
+		} else {
+			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+			hpet_setup_msi_irq(hdev->irq);
+			disable_irq(hdev->irq);
+			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+			enable_irq(hdev->irq);
+		}
 		break;
 	}
 }
 
-static int hpet_legacy_next_event(unsigned long delta,
-				  struct clock_event_device *evt)
+static int hpet_next_event(unsigned long delta,
+			   struct clock_event_device *evt, int timer)
 {
 	u32 cnt;
 
 	cnt = hpet_readl(HPET_COUNTER);
 	cnt += (u32) delta;
-	hpet_writel(cnt, HPET_T0_CMP);
+	hpet_writel(cnt, HPET_Tn_CMP(timer));
 
 	/*
 	 * We need to read back the CMP register to make sure that
@@ -292,6 +327,347 @@ static int hpet_legacy_next_event(unsigned long delta,
 	return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
 
+static void hpet_legacy_set_mode(enum clock_event_mode mode,
+			struct clock_event_device *evt)
+{
+	hpet_set_mode(mode, evt, 0);
+}
+
+static int hpet_legacy_next_event(unsigned long delta,
+			struct clock_event_device *evt)
+{
+	return hpet_next_event(delta, evt, 0);
+}
+
+/*
+ * HPET MSI Support
+ */
+#ifdef CONFIG_PCI_MSI
+
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+static struct hpet_dev	*hpet_devs;
+
+void hpet_msi_unmask(unsigned int irq)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+	unsigned long cfg;
+
+	/* unmask it */
+	cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+	cfg |= HPET_TN_FSB;
+	hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_mask(unsigned int irq)
+{
+	unsigned long cfg;
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	/* mask it */
+	cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+	cfg &= ~HPET_TN_FSB;
+	hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
+	hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+}
+
+void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
+	msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
+	msg->address_hi = 0;
+}
+
+static void hpet_msi_set_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
+{
+	struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+	hpet_set_mode(mode, evt, hdev->num);
+}
+
+static int hpet_msi_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+	return hpet_next_event(delta, evt, hdev->num);
+}
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+	if (arch_setup_hpet_msi(irq)) {
+		destroy_irq(irq);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int hpet_assign_irq(struct hpet_dev *dev)
+{
+	unsigned int irq;
+
+	irq = create_irq();
+	if (!irq)
+		return -EINVAL;
+
+	set_irq_data(irq, dev);
+
+	if (hpet_setup_msi_irq(irq))
+		return -EINVAL;
+
+	dev->irq = irq;
+	return 0;
+}
+
+static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+{
+	struct hpet_dev *dev = (struct hpet_dev *)data;
+	struct clock_event_device *hevt = &dev->evt;
+
+	if (!hevt->event_handler) {
+		printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
+				dev->num);
+		return IRQ_HANDLED;
+	}
+
+	hevt->event_handler(hevt);
+	return IRQ_HANDLED;
+}
+
+static int hpet_setup_irq(struct hpet_dev *dev)
+{
+
+	if (request_irq(dev->irq, hpet_interrupt_handler,
+			IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+		return -1;
+
+	disable_irq(dev->irq);
+	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+	enable_irq(dev->irq);
+
+	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
+			 dev->name, dev->irq);
+
+	return 0;
+}
+
+/* This should be called in specific @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+{
+	struct clock_event_device *evt = &hdev->evt;
+	uint64_t hpet_freq;
+
+	WARN_ON(cpu != smp_processor_id());
+	if (!(hdev->flags & HPET_DEV_VALID))
+		return;
+
+	if (hpet_setup_msi_irq(hdev->irq))
+		return;
+
+	hdev->cpu = cpu;
+	per_cpu(cpu_hpet_dev, cpu) = hdev;
+	evt->name = hdev->name;
+	hpet_setup_irq(hdev);
+	evt->irq = hdev->irq;
+
+	evt->rating = 110;
+	evt->features = CLOCK_EVT_FEAT_ONESHOT;
+	if (hdev->flags & HPET_DEV_PERI_CAP)
+		evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+
+	evt->set_mode = hpet_msi_set_mode;
+	evt->set_next_event = hpet_msi_next_event;
+	evt->shift = 32;
+
+	/*
+	 * The period is a femto seconds value. We need to calculate the
+	 * scaled math multiplication factor for nanosecond to hpet tick
+	 * conversion.
+	 */
+	hpet_freq = 1000000000000000ULL;
+	do_div(hpet_freq, hpet_period);
+	evt->mult = div_sc((unsigned long) hpet_freq,
+				      NSEC_PER_SEC, evt->shift);
+	/* Calculate the max delta */
+	evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
+	/* 5 usec minimum reprogramming delta. */
+	evt->min_delta_ns = 5000;
+
+	evt->cpumask = cpumask_of_cpu(hdev->cpu);
+	clockevents_register_device(evt);
+}
+
+#ifdef CONFIG_HPET
+/* Reserve at least one timer for userspace (/dev/hpet) */
+#define RESERVE_TIMERS 1
+#else
+#define RESERVE_TIMERS 0
+#endif
+
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+	unsigned int id;
+	unsigned int num_timers;
+	unsigned int num_timers_used = 0;
+	int i;
+
+	id = hpet_readl(HPET_ID);
+
+	num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+	num_timers++; /* Value read out starts from 0 */
+
+	hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+	if (!hpet_devs)
+		return;
+
+	hpet_num_timers = num_timers;
+
+	for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
+		struct hpet_dev *hdev = &hpet_devs[num_timers_used];
+		unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+
+		/* Only consider HPET timer with MSI support */
+		if (!(cfg & HPET_TN_FSB_CAP))
+			continue;
+
+		hdev->flags = 0;
+		if (cfg & HPET_TN_PERIODIC_CAP)
+			hdev->flags |= HPET_DEV_PERI_CAP;
+		hdev->num = i;
+
+		sprintf(hdev->name, "hpet%d", i);
+		if (hpet_assign_irq(hdev))
+			continue;
+
+		hdev->flags |= HPET_DEV_FSB_CAP;
+		hdev->flags |= HPET_DEV_VALID;
+		num_timers_used++;
+		if (num_timers_used == num_possible_cpus())
+			break;
+	}
+
+	printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
+		num_timers, num_timers_used);
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+	int i;
+
+	if (!hpet_devs)
+		return;
+
+	for (i = 0; i < hpet_num_timers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+
+		hd->hd_irq[hdev->num] = hdev->irq;
+		hpet_reserve_timer(hd, hdev->num);
+	}
+}
+#endif
+
+static struct hpet_dev *hpet_get_unused_timer(void)
+{
+	int i;
+
+	if (!hpet_devs)
+		return NULL;
+
+	for (i = 0; i < hpet_num_timers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+		if (test_and_set_bit(HPET_DEV_USED_BIT,
+			(unsigned long *)&hdev->flags))
+			continue;
+		return hdev;
+	}
+	return NULL;
+}
+
+struct hpet_work_struct {
+	struct delayed_work work;
+	struct completion complete;
+};
+
+static void hpet_work(struct work_struct *w)
+{
+	struct hpet_dev *hdev;
+	int cpu = smp_processor_id();
+	struct hpet_work_struct *hpet_work;
+
+	hpet_work = container_of(w, struct hpet_work_struct, work.work);
+
+	hdev = hpet_get_unused_timer();
+	if (hdev)
+		init_one_hpet_msi_clockevent(hdev, cpu);
+
+	complete(&hpet_work->complete);
+}
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+		unsigned long action, void *hcpu)
+{
+	unsigned long cpu = (unsigned long)hcpu;
+	struct hpet_work_struct work;
+	struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+
+	switch (action & 0xf) {
+	case CPU_ONLINE:
+		INIT_DELAYED_WORK(&work.work, hpet_work);
+		init_completion(&work.complete);
+		/* FIXME: add schedule_work_on() */
+		schedule_delayed_work_on(cpu, &work.work, 0);
+		wait_for_completion(&work.complete);
+		break;
+	case CPU_DEAD:
+		if (hdev) {
+			free_irq(hdev->irq, hdev);
+			hdev->flags &= ~HPET_DEV_USED;
+			per_cpu(cpu_hpet_dev, cpu) = NULL;
+		}
+		break;
+	}
+	return NOTIFY_OK;
+}
+#else
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+	return 0;
+}
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+	return;
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+	return;
+}
+#endif
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+		unsigned long action, void *hcpu)
+{
+	return NOTIFY_OK;
+}
+
+#endif
+
 /*
  * Clock source related code
  */
@@ -427,8 +803,10 @@ int __init hpet_enable(void)
 
 	if (id & HPET_ID_LEGSUP) {
 		hpet_legacy_clockevent_register();
+		hpet_msi_capability_lookup(2);
 		return 1;
 	}
+	hpet_msi_capability_lookup(0);
 	return 0;
 
 out_nohpet:
@@ -445,6 +823,8 @@ out_nohpet:
  */
 static __init int hpet_late_init(void)
 {
+	int cpu;
+
 	if (boot_hpet_disable)
 		return -ENODEV;
 
@@ -460,6 +840,13 @@ static __init int hpet_late_init(void)
 
 	hpet_reserve_platform_timers(hpet_readl(HPET_ID));
 
+	for_each_online_cpu(cpu) {
+		hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
+	}
+
+	/* This notifier should be called after workqueue is ready */
+	hotcpu_notifier(hpet_cpuhp_notify, -20);
+
 	return 0;
 }
 fs_initcall(hpet_late_init);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic.c
index 02063ae042f..b764d7429c6 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic.c
@@ -27,17 +27,21 @@
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
 #include <linux/acpi.h>
+#include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
-#include <linux/dmar.h>
-#include <linux/jiffies.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>	/* time_after() */
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
 #include <linux/bootmem.h>
 #include <linux/dmar.h>
+#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -46,61 +50,28 @@
 #include <asm/proto.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
+#include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/setup.h>
 #include <asm/irq_remapping.h>
+#include <asm/hpet.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 
 #define __apicdebuginit(type) static type __init
 
-struct irq_cfg {
-	cpumask_t domain;
-	cpumask_t old_domain;
-	unsigned move_cleanup_count;
-	u8 vector;
-	u8 move_in_progress : 1;
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
-	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
-};
-
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-int sis_apic_bug; /* not actually supported, dummy for compile */
-
-static int no_timer_check;
-
-static int disable_timer_pin_1 __initdata;
-
-int timer_through_8259 __initdata;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
 
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
@@ -110,9 +81,6 @@ static DEFINE_SPINLOCK(vector_lock);
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
-/* I/O APIC RTE contents at the OS boot up */
-struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
 /* I/O APIC entries */
 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
@@ -123,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *str)
+{
+	/* disable IO-APIC */
+	disable_ioapic_setup();
+	return 0;
+}
+early_param("noapic", parse_noapic);
+
+struct irq_pin_list;
+struct irq_cfg {
+	unsigned int irq;
+	struct irq_pin_list *irq_2_pin;
+	cpumask_t domain;
+	cpumask_t old_domain;
+	unsigned move_cleanup_count;
+	u8 vector;
+	u8 move_in_progress : 1;
+};
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
+	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+};
+
+#define for_each_irq_cfg(irq, cfg)		\
+	for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq < nr_irqs ? irq_cfgx + irq : NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+	return irq_cfg(irq);
+}
+
 /*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
+ * Rough estimation of how many shared IRQs there are, can be changed
+ * anytime.
  */
 #define MAX_PLUS_SHARED_IRQS NR_IRQS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
@@ -139,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  * between pins and IRQs.
  */
 
-static struct irq_pin_list {
-	short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
+static struct irq_pin_list *irq_2_pin_ptr;
+
+static void __init irq_2_pin_init(void)
+{
+	struct irq_pin_list *pin = irq_2_pin_head;
+	int i;
+
+	for (i = 1; i < PIN_MAP_SIZE; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = &pin[0];
+}
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+	struct irq_pin_list *pin = irq_2_pin_ptr;
+
+	if (!pin)
+		panic("can not get more irq_2_pin\n");
+
+	irq_2_pin_ptr = pin->next;
+	pin->next = NULL;
+	return pin;
+}
 
 struct io_apic {
 	unsigned int index;
@@ -172,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
 /*
  * Re-write a value: to be used for read-modify-write
  * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
  */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
+
+	if (sis_apic_bug)
+		writel(reg, &io_apic->index);
 	writel(value, &io_apic->data);
 }
 
@@ -183,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = irq_2_pin + irq;
+	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
 		int pin;
 
-		pin = entry->pin;
-		if (pin == -1)
+		if (!entry)
 			break;
+		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
 		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -201,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 		}
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return false;
 }
 
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)					\
-									\
-{									\
-	int pin;							\
-	struct irq_pin_list *entry = irq_2_pin + irq;			\
-									\
-	BUG_ON(irq >= NR_IRQS);						\
-	for (;;) {							\
-		unsigned int reg;					\
-		pin = entry->pin;					\
-		if (pin == -1)						\
-			break;						\
-		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION;						\
-		io_apic_modify(entry->apic, reg);			\
-		FINAL;							\
-		if (!entry->next)					\
-			break;						\
-		entry = irq_2_pin + entry->next;			\
-	}								\
-}
-
 union entry_union {
 	struct { u32 w1, w2; };
 	struct IO_APIC_route_entry entry;
@@ -299,59 +326,71 @@ static void ioapic_mask_entry(int apic, int pin)
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
 	int apic, pin;
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
 
-	BUG_ON(irq >= NR_IRQS);
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
+
+		if (!entry)
+			break;
+
 		apic = entry->apic;
 		pin = entry->pin;
-		if (pin == -1)
-			break;
+#ifdef CONFIG_INTR_REMAP
 		/*
 		 * With interrupt-remapping, destination information comes
 		 * from interrupt-remapping table entry.
 		 */
 		if (!irq_remapped(irq))
 			io_apic_write(apic, 0x11 + pin*2, dest);
+#else
+		io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
-		io_apic_modify(apic, reg);
+		io_apic_modify(apic, 0x10 + pin*2, reg);
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 }
 
+static int assign_irq_vector(int irq, cpumask_t mask);
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
+	cfg = irq_cfg(irq);
 	if (assign_irq_vector(irq, mask))
 		return;
 
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
-
 	/*
 	 * Only the high 8 bits are valid.
 	 */
 	dest = SET_APIC_LOGICAL_ID(dest);
 
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	irq_desc[irq].affinity = mask;
+	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
-#endif
+#endif /* CONFIG_SMP */
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -360,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  */
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	static int first_free_entry = NR_IRQS;
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	/* first time to refer irq_cfg, so with new */
+	cfg = irq_cfg_alloc(irq);
+	entry = cfg->irq_2_pin;
+	if (!entry) {
+		entry = get_one_free_irq_2_pin();
+		cfg->irq_2_pin = entry;
+		entry->apic = apic;
+		entry->pin = pin;
+		return;
+	}
 
-	BUG_ON(irq >= NR_IRQS);
-	while (entry->next)
-		entry = irq_2_pin + entry->next;
+	while (entry->next) {
+		/* not again, please */
+		if (entry->apic == apic && entry->pin == pin)
+			return;
 
-	if (entry->pin != -1) {
-		entry->next = first_free_entry;
-		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= PIN_MAP_SIZE)
-			panic("io_apic.c: ran out of irq_2_pin entries!");
+		entry = entry->next;
 	}
+
+	entry->next = get_one_free_irq_2_pin();
+	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
 }
@@ -384,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_pin_list *entry = cfg->irq_2_pin;
+	int replaced = 0;
 
-	while (1) {
+	while (entry) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
-		}
-		if (!entry->next)
+			replaced = 1;
+			/* every one is different, right? */
 			break;
-		entry = irq_2_pin + entry->next;
+		}
+		entry = entry->next;
+	}
+
+	/* why? call replace before add? */
+	if (!replaced)
+		add_pin_to_irq(irq, newapic, newpin);
+}
+
+static inline void io_apic_modify_irq(unsigned int irq,
+				int mask_and, int mask_or,
+				void (*final)(struct irq_pin_list *entry))
+{
+	int pin;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	cfg = irq_cfg(irq);
+	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+		unsigned int reg;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+		reg &= mask_and;
+		reg |= mask_or;
+		io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+		if (final)
+			final(entry);
 	}
 }
 
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
 
-#define DO_ACTION(name,R,ACTION, FINAL)					\
-									\
-	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION, FINAL)
+#ifdef CONFIG_X86_64
+void io_apic_sync(struct irq_pin_list *entry)
+{
+	/*
+	 * Synchronize the IO-APIC and the CPU by doing
+	 * a dummy read from the IO-APIC
+	 */
+	struct io_apic __iomem *io_apic;
+	io_apic = io_apic_base(entry->apic);
+	readl(&io_apic->data);
+}
 
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+#else /* CONFIG_X86_32 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+}
 
-/* mask = 0 */
-DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+			IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+}
+#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq (unsigned int irq)
 {
@@ -450,6 +556,68 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+void send_IPI_self(int vector)
+{
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write(APIC_ICR, cfg);
+}
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+
+static int __init ioapic_pirq_setup(char *str)
+{
+	int i, max;
+	int ints[MAX_PIRQS+1];
+
+	get_options(str, ARRAY_SIZE(ints), ints);
+
+	for (i = 0; i < MAX_PIRQS; i++)
+		pirq_entries[i] = -1;
+
+	pirqs_enabled = 1;
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"PIRQ redirection, working around broken MP-BIOS.\n");
+	max = MAX_PIRQS;
+	if (ints[0] < MAX_PIRQS)
+		max = ints[0];
+
+	for (i = 0; i < max; i++) {
+		apic_printk(APIC_VERBOSE, KERN_DEBUG
+				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+		/*
+		 * PIRQs are mapped upside down, usually.
+		 */
+		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+	}
+	return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /*
  * Saves and masks all the unmasked IO-APIC RTE's
  */
@@ -474,7 +642,7 @@ int save_mask_IO_APIC_setup(void)
 			kzalloc(sizeof(struct IO_APIC_route_entry) *
 				nr_ioapic_registers[apic], GFP_KERNEL);
 		if (!early_ioapic_entries[apic])
-			return -ENOMEM;
+			goto nomem;
 	}
 
 	for (apic = 0; apic < nr_ioapics; apic++)
@@ -488,17 +656,31 @@ int save_mask_IO_APIC_setup(void)
 				ioapic_write_entry(apic, pin, entry);
 			}
 		}
+
 	return 0;
+
+nomem:
+	while (apic >= 0)
+		kfree(early_ioapic_entries[apic--]);
+	memset(early_ioapic_entries, 0,
+		ARRAY_SIZE(early_ioapic_entries));
+
+	return -ENOMEM;
 }
 
 void restore_IO_APIC_setup(void)
 {
 	int apic, pin;
 
-	for (apic = 0; apic < nr_ioapics; apic++)
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		if (!early_ioapic_entries[apic])
+			break;
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
 			ioapic_write_entry(apic, pin,
 					   early_ioapic_entries[apic][pin]);
+		kfree(early_ioapic_entries[apic]);
+		early_ioapic_entries[apic] = NULL;
+	}
 }
 
 void reinit_intr_remapped_IO_APIC(int intr_remapping)
@@ -512,25 +694,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
 	 */
 	restore_IO_APIC_setup();
 }
-
-int skip_ioapic_setup;
-int ioapic_force;
-
-static int __init parse_noapic(char *str)
-{
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
+#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -634,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 				best_guess = irq;
 		}
 	}
-	BUG_ON(best_guess >= NR_IRQS);
 	return best_guess;
 }
 
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+	if (irq < 16) {
+		unsigned int port = 0x4d0 + (irq >> 3);
+		return (inb(port) >> (irq & 7)) & 1;
+	}
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"Broken MPtable reports ISA irq %d\n", irq);
+	return 0;
+}
+
+#endif
+
 /* ISA interrupts are always polarity zero edge triggered,
  * when listed as conforming in the MP table. */
 
 #define default_ISA_trigger(idx)	(0)
 #define default_ISA_polarity(idx)	(0)
 
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
+
 /* PCI interrupts are always polarity one level triggered,
  * when listed as conforming in the MP table. */
 
 #define default_PCI_trigger(idx)	(1)
 #define default_PCI_polarity(idx)	(1)
 
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)	(1)
+#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
+
 static int MPBIOS_polarity(int idx)
 {
 	int bus = mp_irqs[idx].mp_srcbus;
@@ -707,6 +903,36 @@ static int MPBIOS_trigger(int idx)
 				trigger = default_ISA_trigger(idx);
 			else
 				trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+			switch (mp_bus_id_to_type[bus]) {
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					trigger = default_MCA_trigger(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+#endif
 			break;
 		case 1: /* edge */
 		{
@@ -744,6 +970,7 @@ static inline int irq_trigger(int idx)
 	return MPBIOS_trigger(idx);
 }
 
+int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
@@ -765,8 +992,32 @@ static int pin_2_irq(int idx, int apic, int pin)
 		while (i < apic)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
+		/*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+		if (ioapic_renumber_irq)
+			irq = ioapic_renumber_irq(apic, irq);
 	}
-	BUG_ON(irq >= NR_IRQS);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
+	 */
+	if ((pin >= 16) && (pin <= 23)) {
+		if (pirq_entries[pin-16] != -1) {
+			if (!pirq_entries[pin-16]) {
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"disabling PIRQ%d\n", pin-16);
+			} else {
+				irq = pirq_entries[pin-16];
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"using PIRQ%d -> IRQ %d\n",
+						pin-16, irq);
+			}
+		}
+	}
+#endif
+
 	return irq;
 }
 
@@ -801,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	int cpu;
 	struct irq_cfg *cfg;
 
-	BUG_ON((unsigned)irq >= NR_IRQS);
-	cfg = &irq_cfg[irq];
+	cfg = irq_cfg(irq);
 
 	/* Only try and allocate irqs on cpus that are present */
 	cpus_and(mask, mask, cpu_online_map);
@@ -837,8 +1087,13 @@ next:
 		}
 		if (unlikely(current_vector == vector))
 			continue;
+#ifdef CONFIG_X86_64
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
+#else
+		if (vector == SYSCALL_VECTOR)
+			goto next;
+#endif
 		for_each_cpu_mask_nr(new_cpu, new_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
@@ -875,8 +1130,7 @@ static void __clear_irq_vector(int irq)
 	cpumask_t mask;
 	int cpu, vector;
 
-	BUG_ON((unsigned)irq >= NR_IRQS);
-	cfg = &irq_cfg[irq];
+	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
@@ -893,12 +1147,13 @@ void __setup_vector_irq(int cpu)
 	/* Initialize vector_irq on a new cpu */
 	/* This function must be called with vector_lock held */
 	int irq, vector;
+	struct irq_cfg *cfg;
 
 	/* Mark the inuse vectors */
-	for (irq = 0; irq < NR_IRQS; ++irq) {
-		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+	for_each_irq_cfg(irq, cfg) {
+		if (!cpu_isset(cpu, cfg->domain))
 			continue;
-		vector = irq_cfg[irq].vector;
+		vector = cfg->vector;
 		per_cpu(vector_irq, cpu)[vector] = irq;
 	}
 	/* Mark the free vectors */
@@ -906,7 +1161,9 @@ void __setup_vector_irq(int cpu)
 		irq = per_cpu(vector_irq, cpu)[vector];
 		if (irq < 0)
 			continue;
-		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+		cfg = irq_cfg(irq);
+		if (!cpu_isset(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
 	}
 }
@@ -916,16 +1173,49 @@ static struct irq_chip ioapic_chip;
 static struct irq_chip ir_ioapic_chip;
 #endif
 
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	int apic, idx, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			idx = find_irq_entry(apic, pin, mp_INT);
+			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+				return irq_trigger(idx);
+		}
+	}
+	/*
+         * nonexistent IRQs are edge default
+         */
+	return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	return 1;
+}
+#endif
+
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
-	if (trigger)
-		irq_desc[irq].status |= IRQ_LEVEL;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
+		desc->status |= IRQ_LEVEL;
 	else
-		irq_desc[irq].status &= ~IRQ_LEVEL;
+		desc->status &= ~IRQ_LEVEL;
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
-		irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+		desc->status |= IRQ_MOVE_PCNTXT;
 		if (trigger)
 			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
 						      handle_fasteoi_irq,
@@ -936,7 +1226,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 		return;
 	}
 #endif
-	if (trigger)
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					      handle_fasteoi_irq,
 					      "fasteoi");
@@ -1009,13 +1300,15 @@ static int setup_ioapic_entry(int apic, int irq,
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 			      int trigger, int polarity)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
 	cpumask_t mask;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
+	cfg = irq_cfg(irq);
+
 	mask = TARGET_CPUS;
 	if (assign_irq_vector(irq, mask))
 		return;
@@ -1047,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic, pin, idx, irq, first_notcon = 1;
+	int apic, pin, idx, irq;
+	int notcon = 0;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
 	for (apic = 0; apic < nr_ioapics; apic++) {
-	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-		idx = find_irq_entry(apic,pin,mp_INT);
-		if (idx == -1) {
-			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-				first_notcon = 0;
-			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-			continue;
-		}
-		if (!first_notcon) {
-			apic_printk(APIC_VERBOSE, " not connected.\n");
-			first_notcon = 1;
-		}
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 
-		irq = pin_2_irq(idx, apic, pin);
-		add_pin_to_irq(irq, apic, pin);
+			idx = find_irq_entry(apic, pin, mp_INT);
+			if (idx == -1) {
+				if (!notcon) {
+					notcon = 1;
+					apic_printk(APIC_VERBOSE,
+						KERN_DEBUG " %d-%d",
+						mp_ioapics[apic].mp_apicid,
+						pin);
+				} else
+					apic_printk(APIC_VERBOSE, " %d-%d",
+						mp_ioapics[apic].mp_apicid,
+						pin);
+				continue;
+			}
+			if (notcon) {
+				apic_printk(APIC_VERBOSE,
+					" (apicid-pin) not connected\n");
+				notcon = 0;
+			}
 
-		setup_IO_APIC_irq(apic, pin, irq,
-				  irq_trigger(idx), irq_polarity(idx));
-	}
+			irq = pin_2_irq(idx, apic, pin);
+#ifdef CONFIG_X86_32
+			if (multi_timer_check(apic, irq))
+				continue;
+#endif
+			add_pin_to_irq(irq, apic, pin);
+
+			setup_IO_APIC_irq(apic, pin, irq,
+					irq_trigger(idx), irq_polarity(idx));
+		}
 	}
 
-	if (!first_notcon)
-		apic_printk(APIC_VERBOSE, " not connected.\n");
+	if (notcon)
+		apic_printk(APIC_VERBOSE,
+			" (apicid-pin) not connected\n");
 }
 
 /*
@@ -1088,8 +1393,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
 	struct IO_APIC_route_entry entry;
 
+#ifdef CONFIG_INTR_REMAP
 	if (intr_remapping_enabled)
 		return;
+#endif
 
 	memset(&entry, 0, sizeof(entry));
 
@@ -1124,7 +1431,10 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
+	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
+	struct irq_cfg *cfg;
+	unsigned int irq;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1147,12 +1457,16 @@ __apicdebuginit(void) print_IO_APIC(void)
 	reg_01.raw = io_apic_read(apic, 1);
 	if (reg_01.bits.version >= 0x10)
 		reg_02.raw = io_apic_read(apic, 2);
+	if (reg_01.bits.version >= 0x20)
+		reg_03.raw = io_apic_read(apic, 3);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
 	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 
 	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1160,11 +1474,27 @@ __apicdebuginit(void) print_IO_APIC(void)
 	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 
-	if (reg_01.bits.version >= 0x10) {
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+	 * but the value of reg_02 is read as the previous read register
+	 * value, so ignore it if reg_02 == reg_01.
+	 */
+	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 	}
 
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+	 * or reg_03, but the value of reg_0[23] is read as the previous read
+	 * register value, so ignore it if reg_03 == reg_0[12].
+	 */
+	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+	    reg_03.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+	}
+
 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
 	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1193,16 +1523,16 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < NR_IRQS; i++) {
-		struct irq_pin_list *entry = irq_2_pin + i;
-		if (entry->pin < 0)
+	for_each_irq_cfg(irq, cfg) {
+		struct irq_pin_list *entry = cfg->irq_2_pin;
+		if (!entry)
 			continue;
-		printk(KERN_DEBUG "IRQ%d ", i);
+		printk(KERN_DEBUG "IRQ%d ", irq);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
 				break;
-			entry = irq_2_pin + entry->next;
+			entry = entry->next;
 		}
 		printk("\n");
 	}
@@ -1236,7 +1566,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
 	unsigned int v, ver, maxlvt;
-	unsigned long icr;
+	u64 icr;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1253,20 +1583,31 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	v = apic_read(APIC_TASKPRI);
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
-	v = apic_read(APIC_ARBPRI);
-	printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-		v & APIC_ARBPRI_MASK);
-	v = apic_read(APIC_PROCPRI);
-	printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+		if (!APIC_XAPIC(ver)) {
+			v = apic_read(APIC_ARBPRI);
+			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+			       v & APIC_ARBPRI_MASK);
+		}
+		v = apic_read(APIC_PROCPRI);
+		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	}
+
+	/*
+	 * Remote read supported only in the 82489DX and local APIC for
+	 * Pentium processors.
+	 */
+	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+		v = apic_read(APIC_RRR);
+		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+	}
 
-	v = apic_read(APIC_EOI);
-	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-	v = apic_read(APIC_RRR);
-	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
 	v = apic_read(APIC_LDR);
 	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	v = apic_read(APIC_DFR);
-	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	if (!x2apic_enabled()) {
+		v = apic_read(APIC_DFR);
+		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	}
 	v = apic_read(APIC_SPIV);
 	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
@@ -1277,8 +1618,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC IRR field:\n");
 	print_APIC_bitfield(APIC_IRR);
 
-	v = apic_read(APIC_ESR);
-	printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	}
 
 	icr = apic_icr_read();
 	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1312,7 +1658,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 
 __apicdebuginit(void) print_all_local_APICs(void)
 {
-	on_each_cpu(print_local_APIC, NULL, 1);
+	int cpu;
+
+	preempt_disable();
+	for_each_online_cpu(cpu)
+		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	preempt_enable();
 }
 
 __apicdebuginit(void) print_PIC(void)
@@ -1359,17 +1710,22 @@ __apicdebuginit(int) print_all_ICs(void)
 fs_initcall(print_all_ICs);
 
 
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
 void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
 	int i8259_apic, i8259_pin;
-	int i, apic;
+	int apic;
 	unsigned long flags;
 
-	for (i = 0; i < PIN_MAP_SIZE; i++) {
-		irq_2_pin[i].pin = -1;
-		irq_2_pin[i].next = 0;
-	}
+#ifdef CONFIG_X86_32
+	int i;
+	if (!pirqs_enabled)
+		for (i = 0; i < MAX_PIRQS; i++)
+			pirq_entries[i] = -1;
+#endif
 
 	/*
 	 * The number of IO-APIC IRQ registers (== #pins):
@@ -1399,6 +1755,10 @@ void __init enable_IO_APIC(void)
 	}
  found_i8259:
 	/* Look to see what if the MP table has reported the ExtINT */
+	/* If we could not find the appropriate pin by looking at the ioapic
+	 * the i8259 probably is not connected the ioapic but give the
+	 * mptable a chance anyway.
+	 */
 	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
 	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
 	/* Trust the MP table if nothing is setup in the hardware */
@@ -1458,6 +1818,133 @@ void disable_IO_APIC(void)
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+	union IO_APIC_reg_00 reg_00;
+	physid_mask_t phys_id_present_map;
+	int apic;
+	int i;
+	unsigned char old_id;
+	unsigned long flags;
+
+	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+		return;
+
+	/*
+	 * Don't check I/O APIC IDs for xAPIC systems.  They have
+	 * no meaning without the serial APIC bus.
+	 */
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return;
+	/*
+	 * This is broken; anything with a real cpu count has to
+	 * circumvent this idiocy regardless.
+	 */
+	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	/*
+	 * Set the IOAPIC ID to the value stored in the MPC table.
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+		/* Read the register 0 value */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		old_id = mp_ioapics[apic].mp_apicid;
+
+		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+				apic, mp_ioapics[apic].mp_apicid);
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				reg_00.bits.ID);
+			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+		}
+
+		/*
+		 * Sanity check, is the ID really free? Every APIC in a
+		 * system must have a unique ID or we get lots of nice
+		 * 'stuck on smp_invalidate_needed IPI wait' messages.
+		 */
+		if (check_apicid_used(phys_id_present_map,
+					mp_ioapics[apic].mp_apicid)) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+				apic, mp_ioapics[apic].mp_apicid);
+			for (i = 0; i < get_physical_broadcast(); i++)
+				if (!physid_isset(i, phys_id_present_map))
+					break;
+			if (i >= get_physical_broadcast())
+				panic("Max APIC ID exceeded!\n");
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				i);
+			physid_set(i, phys_id_present_map);
+			mp_ioapics[apic].mp_apicid = i;
+		} else {
+			physid_mask_t tmp;
+			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+			apic_printk(APIC_VERBOSE, "Setting %d in the "
+					"phys_id_present_map\n",
+					mp_ioapics[apic].mp_apicid);
+			physids_or(phys_id_present_map, phys_id_present_map, tmp);
+		}
+
+
+		/*
+		 * We need to adjust the IRQ routing table
+		 * if the ID changed.
+		 */
+		if (old_id != mp_ioapics[apic].mp_apicid)
+			for (i = 0; i < mp_irq_entries; i++)
+				if (mp_irqs[i].mp_dstapic == old_id)
+					mp_irqs[i].mp_dstapic
+						= mp_ioapics[apic].mp_apicid;
+
+		/*
+		 * Read the right value from the MPC table and
+		 * write it into the ID register.
+		 */
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"...changing IO-APIC physical APIC ID to %d ...",
+			mp_ioapics[apic].mp_apicid);
+
+		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(apic, 0, reg_00.raw);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/*
+		 * Sanity check
+		 */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+			printk("could not set ID!\n");
+		else
+			apic_printk(APIC_VERBOSE, " ok.\n");
+	}
+}
+#endif
+
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+	no_timer_check = 1;
+	return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1471,6 +1958,9 @@ static int __init timer_irq_works(void)
 	unsigned long t1 = jiffies;
 	unsigned long flags;
 
+	if (no_timer_check)
+		return 1;
+
 	local_save_flags(flags);
 	local_irq_enable();
 	/* Let ten ticks pass... */
@@ -1531,9 +2021,11 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
+#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
@@ -1542,6 +2034,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
 
 	return 1;
 }
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+	send_IPI_self(irq_cfg(irq)->vector);
+
+	return 1;
+}
+#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1580,11 +2080,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  */
 static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
-	int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 
@@ -1598,9 +2098,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
+	desc = irq_to_desc(irq);
+	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
 		spin_lock_irqsave(&ioapic_lock, flags);
 		__target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1622,18 +2125,19 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	irq_desc[irq].affinity = mask;
+	desc->affinity = mask;
 }
 
 static int migrate_irq_remapped_level(int irq)
 {
 	int ret = -1;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	mask_IO_APIC_irq(irq);
 
 	if (io_apic_level_ack_pending(irq)) {
 		/*
-	 	 * Interrupt in progress. Migrating irq now will change the
+		 * Interrupt in progress. Migrating irq now will change the
 		 * vector information in the IO-APIC RTE and that will confuse
 		 * the EOI broadcast performed by cpu.
 		 * So, delay the irq migration to the next instance.
@@ -1643,11 +2147,11 @@ static int migrate_irq_remapped_level(int irq)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+	migrate_ioapic_irq(irq, desc->pending_mask);
 
 	ret = 0;
-	irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(irq_desc[irq].pending_mask);
+	desc->status &= ~IRQ_MOVE_PENDING;
+	cpus_clear(desc->pending_mask);
 
 unmask:
 	unmask_IO_APIC_irq(irq);
@@ -1656,10 +2160,10 @@ unmask:
 
 static void ir_irq_migration(struct work_struct *work)
 {
-	int irq;
+	unsigned int irq;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
-		struct irq_desc *desc = irq_desc + irq;
+	for_each_irq_desc(irq, desc) {
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -1671,8 +2175,7 @@ static void ir_irq_migration(struct work_struct *work)
 				continue;
 			}
 
-			desc->chip->set_affinity(irq,
-					         irq_desc[irq].pending_mask);
+			desc->chip->set_affinity(irq, desc->pending_mask);
 			spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
@@ -1683,9 +2186,11 @@ static void ir_irq_migration(struct work_struct *work)
  */
 static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	if (irq_desc[irq].status & IRQ_LEVEL) {
-		irq_desc[irq].status |= IRQ_MOVE_PENDING;
-		irq_desc[irq].pending_mask = mask;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (desc->status & IRQ_LEVEL) {
+		desc->status |= IRQ_MOVE_PENDING;
+		desc->pending_mask = mask;
 		migrate_irq_remapped_level(irq);
 		return;
 	}
@@ -1698,7 +2203,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
 	ack_APIC_irq();
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 
 	me = smp_processor_id();
@@ -1707,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_desc *desc;
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
-		if (irq >= NR_IRQS)
+
+		desc = irq_to_desc(irq);
+		if (!desc)
 			continue;
 
-		desc = irq_desc + irq;
-		cfg = irq_cfg + irq;
+		cfg = irq_cfg(irq);
 		spin_lock(&desc->lock);
 		if (!cfg->move_cleanup_count)
 			goto unlock;
@@ -1730,7 +2238,7 @@ unlock:
 
 static void irq_complete_move(unsigned int irq)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned vector, me;
 
 	if (likely(!cfg->move_in_progress))
@@ -1769,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
+atomic_t irq_mis_count;
+
 static void ack_apic_level(unsigned int irq)
 {
+#ifdef CONFIG_X86_32
+	unsigned long v;
+	int i;
+#endif
 	int do_unmask_irq = 0;
 
 	irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
 		mask_IO_APIC_irq(irq);
 	}
 #endif
 
+#ifdef CONFIG_X86_32
+	/*
+	* It appears there is an erratum which affects at least version 0x11
+	* of I/O APIC (that's the 82093AA and cores integrated into various
+	* chipsets).  Under certain conditions a level-triggered interrupt is
+	* erroneously delivered as edge-triggered one but the respective IRR
+	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	* message but it will never arrive and further interrupts are blocked
+	* from the source.  The exact reason is so far unknown, but the
+	* phenomenon was observed when two consecutive interrupt requests
+	* from a given source get delivered to the same CPU and the source is
+	* temporarily disabled in between.
+	*
+	* A workaround is to simulate an EOI message manually.  We achieve it
+	* by setting the trigger mode to edge and then to level when the edge
+	* trigger mode gets detected in the TMR of a local APIC for a
+	* level-triggered interrupt.  We mask the source for the time of the
+	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+	* The idea is from Manfred Spraul.  --macro
+	*/
+	i = irq_cfg(irq)->vector;
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+#endif
+
 	/*
 	 * We must acknowledge the irq before we move it or the acknowledge will
 	 * not propagate properly.
@@ -1820,31 +2359,41 @@ static void ack_apic_level(unsigned int irq)
 			move_masked_irq(irq);
 		unmask_IO_APIC_irq(irq);
 	}
+
+#ifdef CONFIG_X86_32
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+		spin_lock(&ioapic_lock);
+		__mask_and_edge_IO_APIC_irq(irq);
+		__unmask_and_level_IO_APIC_irq(irq);
+		spin_unlock(&ioapic_lock);
+	}
+#endif
 }
 
 static struct irq_chip ioapic_chip __read_mostly = {
-	.name 		= "IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_apic_edge,
-	.eoi 		= ack_apic_level,
+	.name		= "IO-APIC",
+	.startup	= startup_ioapic_irq,
+	.mask		= mask_IO_APIC_irq,
+	.unmask		= unmask_IO_APIC_irq,
+	.ack		= ack_apic_edge,
+	.eoi		= ack_apic_level,
 #ifdef CONFIG_SMP
-	.set_affinity 	= set_ioapic_affinity_irq,
+	.set_affinity	= set_ioapic_affinity_irq,
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
 
 #ifdef CONFIG_INTR_REMAP
 static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name 		= "IR-IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_x2apic_edge,
-	.eoi 		= ack_x2apic_level,
+	.name		= "IR-IO-APIC",
+	.startup	= startup_ioapic_irq,
+	.mask		= mask_IO_APIC_irq,
+	.unmask		= unmask_IO_APIC_irq,
+	.ack		= ack_x2apic_edge,
+	.eoi		= ack_x2apic_level,
 #ifdef CONFIG_SMP
-	.set_affinity 	= set_ir_ioapic_affinity_irq,
+	.set_affinity	= set_ir_ioapic_affinity_irq,
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
@@ -1853,6 +2402,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
 static inline void init_IO_APIC_traps(void)
 {
 	int irq;
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1865,8 +2416,8 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < NR_IRQS ; irq++) {
-		if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+	for_each_irq_cfg(irq, cfg) {
+		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -1874,27 +2425,33 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (irq < 16)
 				make_8259A_irq(irq);
-			else
+			else {
+				desc = irq_to_desc(irq);
 				/* Strange. Oh, well.. */
-				irq_desc[irq].chip = &no_irq_chip;
+				desc->chip = &no_irq_chip;
+			}
 		}
 	}
 }
 
-static void unmask_lapic_irq(unsigned int irq)
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
 
 	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
-static void mask_lapic_irq(unsigned int irq)
+static void unmask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
 
 	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
 static void ack_lapic_irq (unsigned int irq)
@@ -1911,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq)
 {
-	irq_desc[irq].status &= ~IRQ_LEVEL;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
 }
@@ -1919,19 +2479,19 @@ static void lapic_register_intr(int irq)
 static void __init setup_nmi(void)
 {
 	/*
- 	 * Dirty trick to enable the NMI watchdog ...
+	 * Dirty trick to enable the NMI watchdog ...
 	 * We put the 8259A master into AEOI mode and
 	 * unmask on all local APICs LVT0 as NMI.
 	 *
 	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
 	 * is from Maciej W. Rozycki - so we do not have to EOI from
 	 * the NMI handler or the timer interrupt.
-	 */ 
-	printk(KERN_INFO "activating NMI Watchdog ...");
+	 */
+	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 
 	enable_NMI_through_LVT0();
 
-	printk(" done.\n");
+	apic_printk(APIC_VERBOSE, " done.\n");
 }
 
 /*
@@ -1948,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void)
 	unsigned char save_control, save_freq_select;
 
 	pin  = find_isa_irq_pin(8, mp_INT);
+	if (pin == -1) {
+		WARN_ON_ONCE(1);
+		return;
+	}
 	apic = find_isa_irq_apic(8, mp_INT);
-	if (pin == -1)
+	if (apic == -1) {
+		WARN_ON_ONCE(1);
 		return;
+	}
 
 	entry0 = ioapic_read_entry(apic, pin);
-
 	clear_IO_APIC_pin(apic, pin);
 
 	memset(&entry1, 0, sizeof(entry1));
@@ -1988,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void)
 	ioapic_write_entry(apic, pin, entry0);
 }
 
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+	disable_timer_pin_1 = 1;
+	return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  *
- * FIXME: really need to revamp this for modern platforms only.
+ * FIXME: really need to revamp this for all platforms.
  */
 static inline void __init check_timer(void)
 {
-	struct irq_cfg *cfg = irq_cfg + 0;
+	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
+	unsigned int ver;
 	int no_pin1 = 0;
 
 	local_irq_save(flags);
 
+	ver = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(ver);
+
 	/*
 	 * get/set the timer IRQ vector:
 	 */
@@ -2013,10 +2593,18 @@ static inline void __init check_timer(void)
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
-	 * wire has to be disabled in the local APIC.
+	 * wire has to be disabled in the local APIC.  Also
+	 * timer interrupts need to be acknowledged manually in
+	 * the 8259A for the i82489DX when using the NMI
+	 * watchdog as that APIC treats NMIs as level-triggered.
+	 * The AEOI mode will finish them in the 8259A
+	 * automatically.
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
+#ifdef CONFIG_X86_32
+	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2035,8 +2623,10 @@ static inline void __init check_timer(void)
 	 * 8259A.
 	 */
 	if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
 		if (intr_remapping_enabled)
 			panic("BIOS bug: timer not connected to IO-APIC");
+#endif
 		pin1 = pin2;
 		apic1 = apic2;
 		no_pin1 = 1;
@@ -2054,7 +2644,7 @@ static inline void __init check_timer(void)
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
 		unmask_IO_APIC_irq(0);
-		if (!no_timer_check && timer_irq_works()) {
+		if (timer_irq_works()) {
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
 				enable_8259A_irq(0);
@@ -2063,8 +2653,10 @@ static inline void __init check_timer(void)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 		}
+#ifdef CONFIG_INTR_REMAP
 		if (intr_remapping_enabled)
 			panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2104,6 +2696,9 @@ static inline void __init check_timer(void)
 			    "through the IO-APIC - disabling NMI Watchdog!\n");
 		nmi_watchdog = NMI_NONE;
 	}
+#ifdef CONFIG_X86_32
+	timer_ack = 0;
+#endif
 
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2140,13 +2735,6 @@ out:
 	local_irq_restore(flags);
 }
 
-static int __init notimercheck(char *s)
-{
-	no_timer_check = 1;
-	return 1;
-}
-__setup("no_timer_check", notimercheck);
-
 /*
  * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
  * to devices.  However there may be an I/O APIC pin available for
@@ -2164,25 +2752,49 @@ __setup("no_timer_check", notimercheck);
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS	(1<<2)
+#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
 
+#ifdef CONFIG_X86_32
+	enable_IO_APIC();
+#else
 	/*
 	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
 	 */
+#endif
 
 	io_apic_irqs = ~PIC_IRQS;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-
+	/*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+	if (!acpi_ioapic)
+		setup_ioapic_ids_from_mpc();
+#endif
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
 	check_timer();
 }
 
+/*
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+	if (sis_apic_bug == -1)
+		sis_apic_bug = 0;
+	return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
 struct sysfs_ioapic_data {
 	struct sys_device dev;
 	struct IO_APIC_route_entry entry[0];
@@ -2270,32 +2882,51 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	int irq;
-	int new;
+	unsigned int irq;
+	unsigned int new;
 	unsigned long flags;
+	struct irq_cfg *cfg_new;
+
+	irq_want = nr_irqs - 1;
 
-	irq = -ENOSPC;
+	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (NR_IRQS - 1); new >= 0; new--) {
+	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		if (irq_cfg[new].vector != 0)
+		cfg_new = irq_cfg(new);
+		if (cfg_new && cfg_new->vector != 0)
 			continue;
+		/* check if need to create one */
+		if (!cfg_new)
+			cfg_new = irq_cfg_alloc(new);
 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (irq >= 0) {
+	if (irq > 0) {
 		dynamic_irq_init(irq);
 	}
 	return irq;
 }
 
+int create_irq(void)
+{
+	int irq;
+
+	irq = create_irq_nr(nr_irqs - 1);
+
+	if (irq == 0)
+		irq = -1;
+
+	return irq;
+}
+
 void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
@@ -2316,7 +2947,7 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
 	cpumask_t tmp;
@@ -2326,6 +2957,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	if (err)
 		return err;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, tmp);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2383,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2395,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2406,7 +3040,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -2416,10 +3051,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  */
 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2431,6 +3067,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2454,7 +3091,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -2507,7 +3145,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 	if (index < 0) {
 		printk(KERN_ERR
 		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
-		        pci_name(dev));
+		       pci_name(dev));
 		return -ENOSPC;
 	}
 	return index;
@@ -2528,7 +3166,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
-		struct irq_desc *desc = irq_desc + irq;
+		struct irq_desc *desc = irq_to_desc(irq);
 		/*
 		 * irq migration in process context
 		 */
@@ -2538,16 +3176,34 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 #endif
 		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
+	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
 	return 0;
 }
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+	unsigned int irq;
+
+	irq = dev->bus->number;
+	irq <<= 8;
+	irq |= dev->devfn;
+	irq <<= 12;
+
+	return irq;
+}
+
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	int irq, ret;
+	unsigned int irq;
+	int ret;
+	unsigned int irq_want;
 
-	irq = create_irq();
-	if (irq < 0)
-		return irq;
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+	irq = create_irq_nr(irq_want);
+	if (irq == 0)
+		return -1;
 
 #ifdef CONFIG_INTR_REMAP
 	if (!intr_remapping_enabled)
@@ -2574,18 +3230,22 @@ error:
 
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-	int irq, ret, sub_handle;
+	unsigned int irq;
+	int ret, sub_handle;
 	struct msi_desc *desc;
+	unsigned int irq_want;
+
 #ifdef CONFIG_INTR_REMAP
 	struct intel_iommu *iommu = 0;
 	int index = 0;
 #endif
 
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
 	sub_handle = 0;
 	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq();
-		if (irq < 0)
-			return irq;
+		irq = create_irq_nr(irq_want--);
+		if (irq == 0)
+			return -1;
 #ifdef CONFIG_INTR_REMAP
 		if (!intr_remapping_enabled)
 			goto no_ir;
@@ -2636,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2648,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2659,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2689,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq)
 }
 #endif
 
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	struct msi_msg msg;
+	unsigned int dest;
+	cpumask_t tmp;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	hpet_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	hpet_msi_write(irq, &msg);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip hpet_msi_type = {
+	.name = "HPET_MSI",
+	.unmask = hpet_msi_unmask,
+	.mask = hpet_msi_mask,
+	.ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity = hpet_msi_set_affinity,
+#endif
+	.retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+
+	hpet_msi_write(irq, &msg);
+	set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+		"edge");
+
+	return 0;
+}
+#endif
+
 #endif /* CONFIG_PCI_MSI */
 /*
  * Hypertransport interrupt support
@@ -2713,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2724,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 
@@ -2745,7 +3474,7 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	int err;
 	cpumask_t tmp;
 
@@ -2755,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		struct ht_irq_msg msg;
 		unsigned dest;
 
+		cfg = irq_cfg(irq);
 		cpus_and(tmp, cfg->domain, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
@@ -2777,20 +3507,196 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
+
+		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
 	}
 	return err;
 }
 #endif /* CONFIG_HT_IRQ */
 
+#ifdef CONFIG_X86_64
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+		       unsigned long mmr_offset)
+{
+	const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+	struct irq_cfg *cfg;
+	int mmr_pnode;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long flags;
+	int err;
+
+	err = assign_irq_vector(irq, *eligible_cpu);
+	if (err != 0)
+		return err;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+				      irq_name);
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	cfg = irq_cfg(irq);
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+	entry->vector = cfg->vector;
+	entry->delivery_mode = INT_DELIVERY_MODE;
+	entry->dest_mode = INT_DEST_MODE;
+	entry->polarity = 0;
+	entry->trigger = 0;
+	entry->mask = 0;
+	entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+{
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	int mmr_pnode;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+	entry->mask = 1;
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+#endif /* CONFIG_X86_64 */
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.entries;
+}
+
+int __init probe_nr_irqs(void)
+{
+	int idx;
+	int nr = 0;
+#ifndef CONFIG_XEN
+	int nr_min = 32;
+#else
+	int nr_min = NR_IRQS;
+#endif
+
+	for (idx = 0; idx < nr_ioapics; idx++)
+		nr += io_apic_get_redir_entries(idx) + 1;
+
+	/* double it for hotplug and msi and nmi */
+	nr <<= 1;
+
+	/* something wrong ? */
+	if (nr < nr_min)
+		nr = nr_min;
+
+	return nr;
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
 
 #ifdef CONFIG_ACPI
 
-#define IO_APIC_MAX_ID		0xFE
+#ifdef CONFIG_X86_32
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
+{
+	union IO_APIC_reg_00 reg_00;
+	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+	physid_mask_t tmp;
+	unsigned long flags;
+	int i = 0;
 
-int __init io_apic_get_redir_entries (int ioapic)
+	/*
+	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
+	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
+	 * supports up to 16 on one shared APIC bus.
+	 *
+	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+	 *      advantage of new APIC bus architecture.
+	 */
+
+	if (physids_empty(apic_id_map))
+		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(ioapic, 0);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	if (apic_id >= get_physical_broadcast()) {
+		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+			"%d\n", ioapic, apic_id, reg_00.bits.ID);
+		apic_id = reg_00.bits.ID;
+	}
+
+	/*
+	 * Every APIC in a system must have a unique ID or we get lots of nice
+	 * 'stuck on smp_invalidate_needed IPI wait' messages.
+	 */
+	if (check_apicid_used(apic_id_map, apic_id)) {
+
+		for (i = 0; i < get_physical_broadcast(); i++) {
+			if (!check_apicid_used(apic_id_map, i))
+				break;
+		}
+
+		if (i == get_physical_broadcast())
+			panic("Max apic_id exceeded!\n");
+
+		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+			"trying %d\n", ioapic, apic_id, i);
+
+		apic_id = i;
+	}
+
+	tmp = apicid_to_cpu_present(apic_id);
+	physids_or(apic_id_map, apic_id_map, tmp);
+
+	if (reg_00.bits.ID != apic_id) {
+		reg_00.bits.ID = apic_id;
+
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(ioapic, 0, reg_00.raw);
+		reg_00.raw = io_apic_read(ioapic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/* Sanity check */
+		if (reg_00.bits.ID != apic_id) {
+			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+			return -1;
+		}
+	}
+
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+	return apic_id;
+}
+
+int __init io_apic_get_version(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -2799,9 +3705,9 @@ int __init io_apic_get_redir_entries (int ioapic)
 	reg_01.raw = io_apic_read(ioapic, 1);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
-	return reg_01.bits.entries;
+	return reg_01.bits.version;
 }
-
+#endif
 
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
@@ -2853,6 +3759,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
+	struct irq_cfg *cfg;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -2868,7 +3775,8 @@ void __init setup_ioapic_dest(void)
 			 * when you have too many devices, because at that time only boot
 			 * cpu is online.
 			 */
-			if (!irq_cfg[irq].vector)
+			cfg = irq_cfg(irq);
+			if (!cfg->vector)
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
@@ -2926,18 +3834,33 @@ void __init ioapic_init_mappings(void)
 	struct resource *ioapic_res;
 	int i;
 
+	irq_2_pin_init();
 	ioapic_res = ioapic_setup_resources();
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
+#ifdef CONFIG_X86_32
+			if (!ioapic_phys) {
+				printk(KERN_ERR
+				       "WARNING: bogus zero IO-APIC "
+				       "address found in MPTABLE, "
+				       "disabling IO/APIC support!\n");
+				smp_found_config = 0;
+				skip_ioapic_setup = 1;
+				goto fake_ioapic_page;
+			}
+#endif
 		} else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
 			ioapic_phys = (unsigned long)
 				alloc_bootmem_pages(PAGE_SIZE);
 			ioapic_phys = __pa(ioapic_phys);
 		}
 		set_fixmap_nocache(idx, ioapic_phys);
 		apic_printk(APIC_VERBOSE,
-			    "mapped IOAPIC to %016lx (%016lx)\n",
+			    "mapped IOAPIC to %08lx (%08lx)\n",
 			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
 
@@ -2971,4 +3894,3 @@ static int __init ioapic_insert_resources(void)
 /* Insert the IO APIC resources after PCI initialization has occured to handle
  * IO APICS that are mapped in on a BAR in PCI space. */
 late_initcall(ioapic_insert_resources);
-
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
deleted file mode 100644
index e710289f673..00000000000
--- a/arch/x86/kernel/io_apic_32.c
+++ /dev/null
@@ -1,2908 +0,0 @@
-/*
- *	Intel IO-APIC support for multi-Pentium hosts.
- *
- *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- *	Many thanks to Stig Venaas for trying out countless experimental
- *	patches and reporting/debugging problems patiently!
- *
- *	(c) 1999, Multiple IO-APIC support, developed by
- *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- *	further tested and cleaned up by Zach Brown <zab@redhat.com>
- *	and Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
- *					thanks to Eric Gilmore
- *					and Rolf G. Tews
- *					for testing these extensively
- *	Paul Diefenbaugh	:	Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/mc146818rtc.h>
-#include <linux/compiler.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/jiffies.h>	/* time_after() */
-
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-#include <asm/nmi.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
-#include <asm/setup.h>
-
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-
-#define __apicdebuginit(type) static type __init
-
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-atomic_t irq_mis_count;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
-
-int timer_through_8259 __initdata;
-
-/*
- *	Is the SiS APIC rmw bug present ?
- *	-1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-int mp_bus_id_to_type[MAX_MP_BUSSES];
-#endif
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-static int disable_timer_pin_1 __initdata;
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-static struct irq_pin_list {
-	int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
-
-struct io_apic {
-	unsigned int index;
-	unsigned int unused[3];
-	unsigned int data;
-};
-
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
-{
-	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
-}
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	writel(reg, &io_apic->index);
-	return readl(&io_apic->data);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	writel(reg, &io_apic->index);
-	writel(value, &io_apic->data);
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
-	if (sis_apic_bug)
-		writel(reg, &io_apic->index);
-	writel(value, &io_apic->data);
-}
-
-union entry_union {
-	struct { u32 w1, w2; };
-	struct IO_APIC_route_entry entry;
-};
-
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
-{
-	union entry_union eu;
-	unsigned long flags;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-	return eu.entry;
-}
-
-/*
- * When we write a new IO APIC routing entry, we need to write the high
- * word first! If the mask bit in the low word is clear, we will enable
- * the interrupt, and we need to make sure the entry is fully populated
- * before that happens.
- */
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-	union entry_union eu;
-	eu.entry = e;
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-}
-
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__ioapic_write_entry(apic, pin, e);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * When we mask an IO APIC routing entry, we need to write the low
- * word first, in order to set the mask bit before we change the
- * high bits!
- */
-static void ioapic_mask_entry(int apic, int pin)
-{
-	unsigned long flags;
-	union entry_union eu = { .entry.mask = 1 };
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
-	static int first_free_entry = NR_IRQS;
-	struct irq_pin_list *entry = irq_2_pin + irq;
-
-	while (entry->next)
-		entry = irq_2_pin + entry->next;
-
-	if (entry->pin != -1) {
-		entry->next = first_free_entry;
-		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= PIN_MAP_SIZE)
-			panic("io_apic.c: whoops");
-	}
-	entry->apic = apic;
-	entry->pin = pin;
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq(unsigned int irq,
-				      int oldapic, int oldpin,
-				      int newapic, int newpin)
-{
-	struct irq_pin_list *entry = irq_2_pin + irq;
-
-	while (1) {
-		if (entry->apic == oldapic && entry->pin == oldpin) {
-			entry->apic = newapic;
-			entry->pin = newpin;
-		}
-		if (!entry->next)
-			break;
-		entry = irq_2_pin + entry->next;
-	}
-}
-
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
-{
-	struct irq_pin_list *entry = irq_2_pin + irq;
-	unsigned int pin, reg;
-
-	for (;;) {
-		pin = entry->pin;
-		if (pin == -1)
-			break;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		reg &= ~disable;
-		reg |= enable;
-		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = irq_2_pin + entry->next;
-	}
-}
-
-/* mask = 1 */
-static void __mask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
-}
-
-/* mask = 0 */
-static void __unmask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
-}
-
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
-				IO_APIC_REDIR_LEVEL_TRIGGER);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
-				IO_APIC_REDIR_MASKED);
-}
-
-static void mask_IO_APIC_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__mask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__unmask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
-	struct IO_APIC_route_entry entry;
-
-	/* Check delivery_mode to be sure we're not clearing an SMI pin */
-	entry = ioapic_read_entry(apic, pin);
-	if (entry.delivery_mode == dest_SMI)
-		return;
-
-	/*
-	 * Disable it in the IO-APIC irq-routing table:
-	 */
-	ioapic_mask_entry(apic, pin);
-}
-
-static void clear_IO_APIC(void)
-{
-	int apic, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-			clear_IO_APIC_pin(apic, pin);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
-{
-	unsigned long flags;
-	int pin;
-	struct irq_pin_list *entry = irq_2_pin + irq;
-	unsigned int apicid_value;
-	cpumask_t tmp;
-
-	cpus_and(tmp, cpumask, cpu_online_map);
-	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
-
-	cpus_and(cpumask, tmp, CPU_MASK_ALL);
-
-	apicid_value = cpu_mask_to_apicid(cpumask);
-	/* Prepare to do the io_apic_write */
-	apicid_value = apicid_value << 24;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	for (;;) {
-		pin = entry->pin;
-		if (pin == -1)
-			break;
-		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
-		if (!entry->next)
-			break;
-		entry = irq_2_pin + entry->next;
-	}
-	irq_desc[irq].affinity = cpumask;
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#if defined(CONFIG_IRQBALANCE)
-# include <asm/processor.h>	/* kernel_thread() */
-# include <linux/kernel_stat.h>	/* kstat */
-# include <linux/slab.h>		/* kmalloc() */
-# include <linux/timer.h>
-
-#define IRQBALANCE_CHECK_ARCH -999
-#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
-#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
-#define BALANCED_IRQ_LESS_DELTA		(HZ)
-
-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
-static int physical_balance __read_mostly;
-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
-
-static struct irq_cpu_info {
-	unsigned long *last_irq;
-	unsigned long *irq_delta;
-	unsigned long irq;
-} irq_cpu_data[NR_CPUS];
-
-#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
-#define LAST_CPU_IRQ(cpu, irq)   (irq_cpu_data[cpu].last_irq[irq])
-#define IRQ_DELTA(cpu, irq) 	(irq_cpu_data[cpu].irq_delta[irq])
-
-#define IDLE_ENOUGH(cpu,now) \
-	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
-
-#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
-
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
-
-static cpumask_t balance_irq_affinity[NR_IRQS] = {
-	[0 ... NR_IRQS-1] = CPU_MASK_ALL
-};
-
-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	balance_irq_affinity[irq] = mask;
-}
-
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
-			unsigned long now, int direction)
-{
-	int search_idle = 1;
-	int cpu = curr_cpu;
-
-	goto inside;
-
-	do {
-		if (unlikely(cpu == curr_cpu))
-			search_idle = 0;
-inside:
-		if (direction == 1) {
-			cpu++;
-			if (cpu >= NR_CPUS)
-				cpu = 0;
-		} else {
-			cpu--;
-			if (cpu == -1)
-				cpu = NR_CPUS-1;
-		}
-	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
-			(search_idle && !IDLE_ENOUGH(cpu, now)));
-
-	return cpu;
-}
-
-static inline void balance_irq(int cpu, int irq)
-{
-	unsigned long now = jiffies;
-	cpumask_t allowed_mask;
-	unsigned int new_cpu;
-
-	if (irqbalance_disabled)
-		return;
-
-	cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
-	new_cpu = move(cpu, allowed_mask, now, 1);
-	if (cpu != new_cpu)
-		set_pending_irq(irq, cpumask_of_cpu(new_cpu));
-}
-
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
-{
-	int i, j;
-
-	for_each_online_cpu(i) {
-		for (j = 0; j < NR_IRQS; j++) {
-			if (!irq_desc[j].action)
-				continue;
-			/* Is it a significant load ?  */
-			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
-						useful_load_threshold)
-				continue;
-			balance_irq(i, j);
-		}
-	}
-	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
-	return;
-}
-
-static void do_irq_balance(void)
-{
-	int i, j;
-	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
-	unsigned long move_this_load = 0;
-	int max_loaded = 0, min_loaded = 0;
-	int load;
-	unsigned long useful_load_threshold = balanced_irq_interval + 10;
-	int selected_irq;
-	int tmp_loaded, first_attempt = 1;
-	unsigned long tmp_cpu_irq;
-	unsigned long imbalance = 0;
-	cpumask_t allowed_mask, target_cpu_mask, tmp;
-
-	for_each_possible_cpu(i) {
-		int package_index;
-		CPU_IRQ(i) = 0;
-		if (!cpu_online(i))
-			continue;
-		package_index = CPU_TO_PACKAGEINDEX(i);
-		for (j = 0; j < NR_IRQS; j++) {
-			unsigned long value_now, delta;
-			/* Is this an active IRQ or balancing disabled ? */
-			if (!irq_desc[j].action || irq_balancing_disabled(j))
-				continue;
-			if (package_index == i)
-				IRQ_DELTA(package_index, j) = 0;
-			/* Determine the total count per processor per IRQ */
-			value_now = (unsigned long) kstat_cpu(i).irqs[j];
-
-			/* Determine the activity per processor per IRQ */
-			delta = value_now - LAST_CPU_IRQ(i, j);
-
-			/* Update last_cpu_irq[][] for the next time */
-			LAST_CPU_IRQ(i, j) = value_now;
-
-			/* Ignore IRQs whose rate is less than the clock */
-			if (delta < useful_load_threshold)
-				continue;
-			/* update the load for the processor or package total */
-			IRQ_DELTA(package_index, j) += delta;
-
-			/* Keep track of the higher numbered sibling as well */
-			if (i != package_index)
-				CPU_IRQ(i) += delta;
-			/*
-			 * We have sibling A and sibling B in the package
-			 *
-			 * cpu_irq[A] = load for cpu A + load for cpu B
-			 * cpu_irq[B] = load for cpu B
-			 */
-			CPU_IRQ(package_index) += delta;
-		}
-	}
-	/* Find the least loaded processor package */
-	for_each_online_cpu(i) {
-		if (i != CPU_TO_PACKAGEINDEX(i))
-			continue;
-		if (min_cpu_irq > CPU_IRQ(i)) {
-			min_cpu_irq = CPU_IRQ(i);
-			min_loaded = i;
-		}
-	}
-	max_cpu_irq = ULONG_MAX;
-
-tryanothercpu:
-	/*
-	 * Look for heaviest loaded processor.
-	 * We may come back to get the next heaviest loaded processor.
-	 * Skip processors with trivial loads.
-	 */
-	tmp_cpu_irq = 0;
-	tmp_loaded = -1;
-	for_each_online_cpu(i) {
-		if (i != CPU_TO_PACKAGEINDEX(i))
-			continue;
-		if (max_cpu_irq <= CPU_IRQ(i))
-			continue;
-		if (tmp_cpu_irq < CPU_IRQ(i)) {
-			tmp_cpu_irq = CPU_IRQ(i);
-			tmp_loaded = i;
-		}
-	}
-
-	if (tmp_loaded == -1) {
-	 /*
-	  * In the case of small number of heavy interrupt sources,
-	  * loading some of the cpus too much. We use Ingo's original
-	  * approach to rotate them around.
-	  */
-		if (!first_attempt && imbalance >= useful_load_threshold) {
-			rotate_irqs_among_cpus(useful_load_threshold);
-			return;
-		}
-		goto not_worth_the_effort;
-	}
-
-	first_attempt = 0;		/* heaviest search */
-	max_cpu_irq = tmp_cpu_irq;	/* load */
-	max_loaded = tmp_loaded;	/* processor */
-	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
-
-	/*
-	 * if imbalance is less than approx 10% of max load, then
-	 * observe diminishing returns action. - quit
-	 */
-	if (imbalance < (max_cpu_irq >> 3))
-		goto not_worth_the_effort;
-
-tryanotherirq:
-	/* if we select an IRQ to move that can't go where we want, then
-	 * see if there is another one to try.
-	 */
-	move_this_load = 0;
-	selected_irq = -1;
-	for (j = 0; j < NR_IRQS; j++) {
-		/* Is this an active IRQ? */
-		if (!irq_desc[j].action)
-			continue;
-		if (imbalance <= IRQ_DELTA(max_loaded, j))
-			continue;
-		/* Try to find the IRQ that is closest to the imbalance
-		 * without going over.
-		 */
-		if (move_this_load < IRQ_DELTA(max_loaded, j)) {
-			move_this_load = IRQ_DELTA(max_loaded, j);
-			selected_irq = j;
-		}
-	}
-	if (selected_irq == -1)
-		goto tryanothercpu;
-
-	imbalance = move_this_load;
-
-	/* For physical_balance case, we accumulated both load
-	 * values in the one of the siblings cpu_irq[],
-	 * to use the same code for physical and logical processors
-	 * as much as possible.
-	 *
-	 * NOTE: the cpu_irq[] array holds the sum of the load for
-	 * sibling A and sibling B in the slot for the lowest numbered
-	 * sibling (A), _AND_ the load for sibling B in the slot for
-	 * the higher numbered sibling.
-	 *
-	 * We seek the least loaded sibling by making the comparison
-	 * (A+B)/2 vs B
-	 */
-	load = CPU_IRQ(min_loaded) >> 1;
-	for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
-		if (load > CPU_IRQ(j)) {
-			/* This won't change cpu_sibling_map[min_loaded] */
-			load = CPU_IRQ(j);
-			min_loaded = j;
-		}
-	}
-
-	cpus_and(allowed_mask,
-		cpu_online_map,
-		balance_irq_affinity[selected_irq]);
-	target_cpu_mask = cpumask_of_cpu(min_loaded);
-	cpus_and(tmp, target_cpu_mask, allowed_mask);
-
-	if (!cpus_empty(tmp)) {
-		/* mark for change destination */
-		set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
-
-		/* Since we made a change, come back sooner to
-		 * check for more variation.
-		 */
-		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
-		return;
-	}
-	goto tryanotherirq;
-
-not_worth_the_effort:
-	/*
-	 * if we did not find an IRQ to move, then adjust the time interval
-	 * upward
-	 */
-	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
-		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
-	return;
-}
-
-static int balanced_irq(void *unused)
-{
-	int i;
-	unsigned long prev_balance_time = jiffies;
-	long time_remaining = balanced_irq_interval;
-
-	/* push everything to CPU 0 to give us a starting point.  */
-	for (i = 0 ; i < NR_IRQS ; i++) {
-		irq_desc[i].pending_mask = cpumask_of_cpu(0);
-		set_pending_irq(i, cpumask_of_cpu(0));
-	}
-
-	set_freezable();
-	for ( ; ; ) {
-		time_remaining = schedule_timeout_interruptible(time_remaining);
-		try_to_freeze();
-		if (time_after(jiffies,
-				prev_balance_time+balanced_irq_interval)) {
-			preempt_disable();
-			do_irq_balance();
-			prev_balance_time = jiffies;
-			time_remaining = balanced_irq_interval;
-			preempt_enable();
-		}
-	}
-	return 0;
-}
-
-static int __init balanced_irq_init(void)
-{
-	int i;
-	struct cpuinfo_x86 *c;
-	cpumask_t tmp;
-
-	cpus_shift_right(tmp, cpu_online_map, 2);
-	c = &boot_cpu_data;
-	/* When not overwritten by the command line ask subarchitecture. */
-	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
-		irqbalance_disabled = NO_BALANCE_IRQ;
-	if (irqbalance_disabled)
-		return 0;
-
-	 /* disable irqbalance completely if there is only one processor online */
-	if (num_online_cpus() < 2) {
-		irqbalance_disabled = 1;
-		return 0;
-	}
-	/*
-	 * Enable physical balance only if more than 1 physical processor
-	 * is present
-	 */
-	if (smp_num_siblings > 1 && !cpus_empty(tmp))
-		physical_balance = 1;
-
-	for_each_online_cpu(i) {
-		irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
-		irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
-		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
-			printk(KERN_ERR "balanced_irq_init: out of memory");
-			goto failed;
-		}
-	}
-
-	printk(KERN_INFO "Starting balanced_irq\n");
-	if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
-		return 0;
-	printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
-failed:
-	for_each_possible_cpu(i) {
-		kfree(irq_cpu_data[i].irq_delta);
-		irq_cpu_data[i].irq_delta = NULL;
-		kfree(irq_cpu_data[i].last_irq);
-		irq_cpu_data[i].last_irq = NULL;
-	}
-	return 0;
-}
-
-int __devinit irqbalance_disable(char *str)
-{
-	irqbalance_disabled = 1;
-	return 1;
-}
-
-__setup("noirqbalance", irqbalance_disable);
-
-late_initcall(balanced_irq_init);
-#endif /* CONFIG_IRQBALANCE */
-#endif /* CONFIG_SMP */
-
-#ifndef CONFIG_SMP
-void send_IPI_self(int vector)
-{
-	unsigned int cfg;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
-	/*
-	 * Send the IPI. The write to APIC_ICR fires this off.
-	 */
-	apic_write(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP */
-
-
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
-int skip_ioapic_setup;
-
-static int __init ioapic_pirq_setup(char *str)
-{
-	int i, max;
-	int ints[MAX_PIRQS+1];
-
-	get_options(str, ARRAY_SIZE(ints), ints);
-
-	for (i = 0; i < MAX_PIRQS; i++)
-		pirq_entries[i] = -1;
-
-	pirqs_enabled = 1;
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"PIRQ redirection, working around broken MP-BIOS.\n");
-	max = MAX_PIRQS;
-	if (ints[0] < MAX_PIRQS)
-		max = ints[0];
-
-	for (i = 0; i < max; i++) {
-		apic_printk(APIC_VERBOSE, KERN_DEBUG
-				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
-		/*
-		 * PIRQs are mapped upside down, usually.
-		 */
-		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
-	}
-	return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == type &&
-		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
-		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
-		    mp_irqs[i].mp_dstirq == pin)
-			return i;
-
-	return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
-
-			return mp_irqs[i].mp_dstirq;
-	}
-	return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
-			break;
-	}
-	if (i < mp_irq_entries) {
-		int apic;
-		for (apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
-				return apic;
-		}
-	}
-
-	return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-	int apic, i, best_guess = -1;
-
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
-		"slot:%d, pin:%d.\n", bus, slot, pin);
-	if (test_bit(bus, mp_bus_not_pci)) {
-		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-		return -1;
-	}
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
-			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
-				break;
-
-		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].mp_irqtype &&
-		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
-
-			if (!(apic || IO_APIC_IRQ(irq)))
-				continue;
-
-			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
-				return irq;
-			/*
-			 * Use the first all-but-pin matching entry as a
-			 * best-guess fuzzy result for broken mptables.
-			 */
-			if (best_guess < 0)
-				best_guess = irq;
-		}
-	}
-	return best_guess;
-}
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-	int pin, ioapic, irq, irq_entry;
-
-	if (skip_ioapic_setup == 1)
-		return;
-
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-			set_ioapic_affinity_irq(irq, TARGET_CPUS);
-		}
-
-	}
-}
-#endif
-
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
-	if (irq < 16) {
-		unsigned int port = 0x4d0 + (irq >> 3);
-		return (inb(port) >> (irq & 7)) & 1;
-	}
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"Broken MPtable reports ISA irq %d\n", irq);
-	return 0;
-}
-#endif
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx)	(0)
-#define default_ISA_polarity(idx)	(0)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value.  If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
-#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx)	(1)
-#define default_PCI_polarity(idx)	(1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx)	(1)
-#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
-
-static int MPBIOS_polarity(int idx)
-{
-	int bus = mp_irqs[idx].mp_srcbus;
-	int polarity;
-
-	/*
-	 * Determine IRQ line polarity (high active or low active):
-	 */
-	switch (mp_irqs[idx].mp_irqflag & 3) {
-	case 0: /* conforms, ie. bus-type dependent polarity */
-	{
-		polarity = test_bit(bus, mp_bus_not_pci)?
-			default_ISA_polarity(idx):
-			default_PCI_polarity(idx);
-		break;
-	}
-	case 1: /* high active */
-	{
-		polarity = 0;
-		break;
-	}
-	case 2: /* reserved */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		polarity = 1;
-		break;
-	}
-	case 3: /* low active */
-	{
-		polarity = 1;
-		break;
-	}
-	default: /* invalid */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		polarity = 1;
-		break;
-	}
-	}
-	return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
-	int bus = mp_irqs[idx].mp_srcbus;
-	int trigger;
-
-	/*
-	 * Determine IRQ trigger mode (edge or level sensitive):
-	 */
-	switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
-	case 0: /* conforms, ie. bus-type dependent */
-	{
-		trigger = test_bit(bus, mp_bus_not_pci)?
-				default_ISA_trigger(idx):
-				default_PCI_trigger(idx);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-		switch (mp_bus_id_to_type[bus]) {
-		case MP_BUS_ISA: /* ISA pin */
-		{
-			/* set before the switch */
-			break;
-		}
-		case MP_BUS_EISA: /* EISA pin */
-		{
-			trigger = default_EISA_trigger(idx);
-			break;
-		}
-		case MP_BUS_PCI: /* PCI pin */
-		{
-			/* set before the switch */
-			break;
-		}
-		case MP_BUS_MCA: /* MCA pin */
-		{
-			trigger = default_MCA_trigger(idx);
-			break;
-		}
-		default:
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 1;
-			break;
-		}
-	}
-#endif
-		break;
-	}
-	case 1: /* edge */
-	{
-		trigger = 0;
-		break;
-	}
-	case 2: /* reserved */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		trigger = 1;
-		break;
-	}
-	case 3: /* level */
-	{
-		trigger = 1;
-		break;
-	}
-	default: /* invalid */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		trigger = 0;
-		break;
-	}
-	}
-	return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
-	return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
-	return MPBIOS_trigger(idx);
-}
-
-static int pin_2_irq(int idx, int apic, int pin)
-{
-	int irq, i;
-	int bus = mp_irqs[idx].mp_srcbus;
-
-	/*
-	 * Debugging check, we are in big trouble if this message pops up!
-	 */
-	if (mp_irqs[idx].mp_dstirq != pin)
-		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
-	if (test_bit(bus, mp_bus_not_pci))
-		irq = mp_irqs[idx].mp_srcbusirq;
-	else {
-		/*
-		 * PCI IRQs are mapped in order
-		 */
-		i = irq = 0;
-		while (i < apic)
-			irq += nr_ioapic_registers[i++];
-		irq += pin;
-
-		/*
-		 * For MPS mode, so far only needed by ES7000 platform
-		 */
-		if (ioapic_renumber_irq)
-			irq = ioapic_renumber_irq(apic, irq);
-	}
-
-	/*
-	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
-	 */
-	if ((pin >= 16) && (pin <= 23)) {
-		if (pirq_entries[pin-16] != -1) {
-			if (!pirq_entries[pin-16]) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"disabling PIRQ%d\n", pin-16);
-			} else {
-				irq = pirq_entries[pin-16];
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"using PIRQ%d -> IRQ %d\n",
-						pin-16, irq);
-			}
-		}
-	}
-	return irq;
-}
-
-static inline int IO_APIC_irq_trigger(int irq)
-{
-	int apic, idx, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			idx = find_irq_entry(apic, pin, mp_INT);
-			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-				return irq_trigger(idx);
-		}
-	}
-	/*
-	 * nonexistent IRQs are edge default
-	 */
-	return 0;
-}
-
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
-
-static int __assign_irq_vector(int irq)
-{
-	static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
-	int vector, offset;
-
-	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
-
-	if (irq_vector[irq] > 0)
-		return irq_vector[irq];
-
-	vector = current_vector;
-	offset = current_offset;
-next:
-	vector += 8;
-	if (vector >= first_system_vector) {
-		offset = (offset + 1) % 8;
-		vector = FIRST_DEVICE_VECTOR + offset;
-	}
-	if (vector == current_vector)
-		return -ENOSPC;
-	if (test_and_set_bit(vector, used_vectors))
-		goto next;
-
-	current_vector = vector;
-	current_offset = offset;
-	irq_vector[irq] = vector;
-
-	return vector;
-}
-
-static int assign_irq_vector(int irq)
-{
-	unsigned long flags;
-	int vector;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	vector = __assign_irq_vector(irq);
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	return vector;
-}
-
-static struct irq_chip ioapic_chip;
-
-#define IOAPIC_AUTO	-1
-#define IOAPIC_EDGE	0
-#define IOAPIC_LEVEL	1
-
-static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
-{
-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL) {
-		irq_desc[irq].status |= IRQ_LEVEL;
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					 handle_fasteoi_irq, "fasteoi");
-	} else {
-		irq_desc[irq].status &= ~IRQ_LEVEL;
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					 handle_edge_irq, "edge");
-	}
-	set_intr_gate(vector, interrupt[irq]);
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
-	struct IO_APIC_route_entry entry;
-	int apic, pin, idx, irq, first_notcon = 1, vector;
-
-	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-		/*
-		 * add it to the IO-APIC irq-routing table:
-		 */
-		memset(&entry, 0, sizeof(entry));
-
-		entry.delivery_mode = INT_DELIVERY_MODE;
-		entry.dest_mode = INT_DEST_MODE;
-		entry.mask = 0;				/* enable IRQ */
-		entry.dest.logical.logical_dest =
-					cpu_mask_to_apicid(TARGET_CPUS);
-
-		idx = find_irq_entry(apic, pin, mp_INT);
-		if (idx == -1) {
-			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						" IO-APIC (apicid-pin) %d-%d",
-						mp_ioapics[apic].mp_apicid,
-						pin);
-				first_notcon = 0;
-			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d",
-					mp_ioapics[apic].mp_apicid, pin);
-			continue;
-		}
-
-		if (!first_notcon) {
-			apic_printk(APIC_VERBOSE, " not connected.\n");
-			first_notcon = 1;
-		}
-
-		entry.trigger = irq_trigger(idx);
-		entry.polarity = irq_polarity(idx);
-
-		if (irq_trigger(idx)) {
-			entry.trigger = 1;
-			entry.mask = 1;
-		}
-
-		irq = pin_2_irq(idx, apic, pin);
-		/*
-		 * skip adding the timer int on secondary nodes, which causes
-		 * a small but painful rift in the time-space continuum
-		 */
-		if (multi_timer_check(apic, irq))
-			continue;
-		else
-			add_pin_to_irq(irq, apic, pin);
-
-		if (!apic && !IO_APIC_IRQ(irq))
-			continue;
-
-		if (IO_APIC_IRQ(irq)) {
-			vector = assign_irq_vector(irq);
-			entry.vector = vector;
-			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
-
-			if (!apic && (irq < 16))
-				disable_8259A_irq(irq);
-		}
-		ioapic_write_entry(apic, pin, entry);
-	}
-	}
-
-	if (!first_notcon)
-		apic_printk(APIC_VERBOSE, " not connected.\n");
-}
-
-/*
- * Set up the timer pin, possibly with the 8259A-master behind.
- */
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
-					int vector)
-{
-	struct IO_APIC_route_entry entry;
-
-	memset(&entry, 0, sizeof(entry));
-
-	/*
-	 * We use logical delivery to get the timer IRQ
-	 * to the first CPU.
-	 */
-	entry.dest_mode = INT_DEST_MODE;
-	entry.mask = 1;					/* mask IRQ now */
-	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.polarity = 0;
-	entry.trigger = 0;
-	entry.vector = vector;
-
-	/*
-	 * The timer IRQ doesn't have to know that behind the
-	 * scene we may have a 8259A-master in AEOI mode ...
-	 */
-	ioapic_register_intr(0, vector, IOAPIC_EDGE);
-
-	/*
-	 * Add it to the IO-APIC irq-routing table:
-	 */
-	ioapic_write_entry(apic, pin, entry);
-}
-
-
-__apicdebuginit(void) print_IO_APIC(void)
-{
-	int apic, i;
-	union IO_APIC_reg_00 reg_00;
-	union IO_APIC_reg_01 reg_01;
-	union IO_APIC_reg_02 reg_02;
-	union IO_APIC_reg_03 reg_03;
-	unsigned long flags;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-	for (i = 0; i < nr_ioapics; i++)
-		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
-
-	/*
-	 * We are a bit conservative about what we expect.  We have to
-	 * know about every hardware change ASAP.
-	 */
-	printk(KERN_INFO "testing the IO APIC.......................\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(apic, 0);
-	reg_01.raw = io_apic_read(apic, 1);
-	if (reg_01.bits.version >= 0x10)
-		reg_02.raw = io_apic_read(apic, 2);
-	if (reg_01.bits.version >= 0x20)
-		reg_03.raw = io_apic_read(apic, 3);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
-	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
-	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
-	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
-	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-
-	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
-	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-
-	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
-	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-
-	/*
-	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
-	 * but the value of reg_02 is read as the previous read register
-	 * value, so ignore it if reg_02 == reg_01.
-	 */
-	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
-		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-	}
-
-	/*
-	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
-	 * or reg_03, but the value of reg_0[23] is read as the previous read
-	 * register value, so ignore it if reg_03 == reg_0[12].
-	 */
-	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
-	    reg_03.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
-		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-	}
-
-	printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
-	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
-			  " Stat Dest Deli Vect:   \n");
-
-	for (i = 0; i <= reg_01.bits.entries; i++) {
-		struct IO_APIC_route_entry entry;
-
-		entry = ioapic_read_entry(apic, i);
-
-		printk(KERN_DEBUG " %02x %03X %02X  ",
-			i,
-			entry.dest.logical.logical_dest,
-			entry.dest.physical.physical_dest
-		);
-
-		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
-			entry.mask,
-			entry.trigger,
-			entry.irr,
-			entry.polarity,
-			entry.delivery_status,
-			entry.dest_mode,
-			entry.delivery_mode,
-			entry.vector
-		);
-	}
-	}
-	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < NR_IRQS; i++) {
-		struct irq_pin_list *entry = irq_2_pin + i;
-		if (entry->pin < 0)
-			continue;
-		printk(KERN_DEBUG "IRQ%d ", i);
-		for (;;) {
-			printk("-> %d:%d", entry->apic, entry->pin);
-			if (!entry->next)
-				break;
-			entry = irq_2_pin + entry->next;
-		}
-		printk("\n");
-	}
-
-	printk(KERN_INFO ".................................... done.\n");
-
-	return;
-}
-
-__apicdebuginit(void) print_APIC_bitfield(int base)
-{
-	unsigned int v;
-	int i, j;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-	for (i = 0; i < 8; i++) {
-		v = apic_read(base + i*0x10);
-		for (j = 0; j < 32; j++) {
-			if (v & (1<<j))
-				printk("1");
-			else
-				printk("0");
-		}
-		printk("\n");
-	}
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-	unsigned int v, ver, maxlvt;
-	u64 icr;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
-			GET_APIC_ID(v));
-	v = apic_read(APIC_LVR);
-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-	ver = GET_APIC_VERSION(v);
-	maxlvt = lapic_get_maxlvt();
-
-	v = apic_read(APIC_TASKPRI);
-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
-		v = apic_read(APIC_ARBPRI);
-		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			v & APIC_ARBPRI_MASK);
-		v = apic_read(APIC_PROCPRI);
-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-	}
-
-	v = apic_read(APIC_EOI);
-	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-	v = apic_read(APIC_RRR);
-	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-	v = apic_read(APIC_LDR);
-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	v = apic_read(APIC_DFR);
-	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-	v = apic_read(APIC_SPIV);
-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_bitfield(APIC_ISR);
-	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_bitfield(APIC_TMR);
-	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_bitfield(APIC_IRR);
-
-	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		v = apic_read(APIC_ESR);
-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-	}
-
-	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
-
-	v = apic_read(APIC_LVTT);
-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-	if (maxlvt > 3) {                       /* PC is LVT#4. */
-		v = apic_read(APIC_LVTPC);
-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-	}
-	v = apic_read(APIC_LVT0);
-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-	v = apic_read(APIC_LVT1);
-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-	if (maxlvt > 2) {			/* ERR is LVT#3. */
-		v = apic_read(APIC_LVTERR);
-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_TMICT);
-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-	v = apic_read(APIC_TMCCT);
-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-	v = apic_read(APIC_TDCR);
-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-	printk("\n");
-}
-
-__apicdebuginit(void) print_all_local_APICs(void)
-{
-	on_each_cpu(print_local_APIC, NULL, 1);
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-	unsigned int v;
-	unsigned long flags;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	v = inb(0xa1) << 8 | inb(0x21);
-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-	v = inb(0xa0) << 8 | inb(0x20);
-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-	outb(0x0b, 0xa0);
-	outb(0x0b, 0x20);
-	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a, 0xa0);
-	outb(0x0a, 0x20);
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-	v = inb(0x4d1) << 8 | inb(0x4d0);
-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-__apicdebuginit(int) print_all_ICs(void)
-{
-	print_PIC();
-	print_all_local_APICs();
-	print_IO_APIC();
-
-	return 0;
-}
-
-fs_initcall(print_all_ICs);
-
-
-static void __init enable_IO_APIC(void)
-{
-	union IO_APIC_reg_01 reg_01;
-	int i8259_apic, i8259_pin;
-	int i, apic;
-	unsigned long flags;
-
-	for (i = 0; i < PIN_MAP_SIZE; i++) {
-		irq_2_pin[i].pin = -1;
-		irq_2_pin[i].next = 0;
-	}
-	if (!pirqs_enabled)
-		for (i = 0; i < MAX_PIRQS; i++)
-			pirq_entries[i] = -1;
-
-	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_01.raw = io_apic_read(apic, 1);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-	}
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		int pin;
-		/* See if any of the pins is in ExtINT mode */
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			struct IO_APIC_route_entry entry;
-			entry = ioapic_read_entry(apic, pin);
-
-
-			/* If the interrupt line is enabled and in ExtInt mode
-			 * I have found the pin where the i8259 is connected.
-			 */
-			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
-				ioapic_i8259.apic = apic;
-				ioapic_i8259.pin  = pin;
-				goto found_i8259;
-			}
-		}
-	}
- found_i8259:
-	/* Look to see what if the MP table has reported the ExtINT */
-	/* If we could not find the appropriate pin by looking at the ioapic
-	 * the i8259 probably is not connected the ioapic but give the
-	 * mptable a chance anyway.
-	 */
-	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
-	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
-	/* Trust the MP table if nothing is setup in the hardware */
-	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
-		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
-		ioapic_i8259.pin  = i8259_pin;
-		ioapic_i8259.apic = i8259_apic;
-	}
-	/* Complain if the MP table and the hardware disagree */
-	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
-		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
-	{
-		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
-	}
-
-	/*
-	 * Do not trust the IO-APIC being empty at bootup
-	 */
-	clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
-	/*
-	 * Clear the IO-APIC before rebooting:
-	 */
-	clear_IO_APIC();
-
-	/*
-	 * If the i8259 is routed through an IOAPIC
-	 * Put that IOAPIC in virtual wire mode
-	 * so legacy interrupts can be delivered.
-	 */
-	if (ioapic_i8259.pin != -1) {
-		struct IO_APIC_route_entry entry;
-
-		memset(&entry, 0, sizeof(entry));
-		entry.mask            = 0; /* Enabled */
-		entry.trigger         = 0; /* Edge */
-		entry.irr             = 0;
-		entry.polarity        = 0; /* High */
-		entry.delivery_status = 0;
-		entry.dest_mode       = 0; /* Physical */
-		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
-		entry.vector          = 0;
-		entry.dest.physical.physical_dest = read_apic_id();
-
-		/*
-		 * Add it to the IO-APIC irq-routing table:
-		 */
-		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
-	}
-	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc(void)
-{
-	union IO_APIC_reg_00 reg_00;
-	physid_mask_t phys_id_present_map;
-	int apic;
-	int i;
-	unsigned char old_id;
-	unsigned long flags;
-
-	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
-		return;
-
-	/*
-	 * Don't check I/O APIC IDs for xAPIC systems.  They have
-	 * no meaning without the serial APIC bus.
-	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return;
-	/*
-	 * This is broken; anything with a real cpu count has to
-	 * circumvent this idiocy regardless.
-	 */
-	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-	/*
-	 * Set the IOAPIC ID to the value stored in the MPC table.
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-
-		/* Read the register 0 value */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		old_id = mp_ioapics[apic].mp_apicid;
-
-		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-				reg_00.bits.ID);
-			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
-		}
-
-		/*
-		 * Sanity check, is the ID really free? Every APIC in a
-		 * system must have a unique ID or we get lots of nice
-		 * 'stuck on smp_invalidate_needed IPI wait' messages.
-		 */
-		if (check_apicid_used(phys_id_present_map,
-					mp_ioapics[apic].mp_apicid)) {
-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
-			for (i = 0; i < get_physical_broadcast(); i++)
-				if (!physid_isset(i, phys_id_present_map))
-					break;
-			if (i >= get_physical_broadcast())
-				panic("Max APIC ID exceeded!\n");
-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-				i);
-			physid_set(i, phys_id_present_map);
-			mp_ioapics[apic].mp_apicid = i;
-		} else {
-			physid_mask_t tmp;
-			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
-			apic_printk(APIC_VERBOSE, "Setting %d in the "
-					"phys_id_present_map\n",
-					mp_ioapics[apic].mp_apicid);
-			physids_or(phys_id_present_map, phys_id_present_map, tmp);
-		}
-
-
-		/*
-		 * We need to adjust the IRQ routing table
-		 * if the ID changed.
-		 */
-		if (old_id != mp_ioapics[apic].mp_apicid)
-			for (i = 0; i < mp_irq_entries; i++)
-				if (mp_irqs[i].mp_dstapic == old_id)
-					mp_irqs[i].mp_dstapic
-						= mp_ioapics[apic].mp_apicid;
-
-		/*
-		 * Read the right value from the MPC table and
-		 * write it into the ID register.
-		 */
-		apic_printk(APIC_VERBOSE, KERN_INFO
-			"...changing IO-APIC physical APIC ID to %d ...",
-			mp_ioapics[apic].mp_apicid);
-
-		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
-		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(apic, 0, reg_00.raw);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		/*
-		 * Sanity check
-		 */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
-			printk("could not set ID!\n");
-		else
-			apic_printk(APIC_VERBOSE, " ok.\n");
-	}
-}
-
-int no_timer_check __initdata;
-
-static int __init notimercheck(char *s)
-{
-	no_timer_check = 1;
-	return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- *	- timer IRQ defaults to IO-APIC IRQ
- *	- if this function detects that timer IRQs are defunct, then we fall
- *	  back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
-	unsigned long t1 = jiffies;
-	unsigned long flags;
-
-	if (no_timer_check)
-		return 1;
-
-	local_save_flags(flags);
-	local_irq_enable();
-	/* Let ten ticks pass... */
-	mdelay((10 * 1000) / HZ);
-	local_irq_restore(flags);
-
-	/*
-	 * Expect a few ticks at least, to be sure some possible
-	 * glue logic does not lock up after one or two first
-	 * ticks in a non-ExtINT mode.  Also the local APIC
-	 * might have cached one ExtINT interrupt.  Finally, at
-	 * least one tick may be lost due to delays.
-	 */
-	if (time_after(jiffies, t1 + 4))
-		return 1;
-
-	return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Startup quirk:
- *
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- *
- * (We do this for level-triggered IRQs too - it cannot hurt.)
- */
-static unsigned int startup_ioapic_irq(unsigned int irq)
-{
-	int was_pending = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	if (irq < 16) {
-		disable_8259A_irq(irq);
-		if (i8259A_irq_pending(irq))
-			was_pending = 1;
-	}
-	__unmask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return was_pending;
-}
-
-static void ack_ioapic_irq(unsigned int irq)
-{
-	move_native_irq(irq);
-	ack_APIC_irq();
-}
-
-static void ack_ioapic_quirk_irq(unsigned int irq)
-{
-	unsigned long v;
-	int i;
-
-	move_native_irq(irq);
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets).  Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless.  As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source.  The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually.  We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt.  We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul.  --macro
- */
-	i = irq_vector[irq];
-
-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-	ack_APIC_irq();
-
-	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
-		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
-		spin_unlock(&ioapic_lock);
-	}
-}
-
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-	send_IPI_self(irq_vector[irq]);
-
-	return 1;
-}
-
-static struct irq_chip ioapic_chip __read_mostly = {
-	.name 		= "IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_ioapic_irq,
-	.eoi 		= ack_ioapic_quirk_irq,
-#ifdef CONFIG_SMP
-	.set_affinity 	= set_ioapic_affinity_irq,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-
-static inline void init_IO_APIC_traps(void)
-{
-	int irq;
-
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	for (irq = 0; irq < NR_IRQS ; irq++) {
-		if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
-			/*
-			 * Hmm.. We don't have an entry for this,
-			 * so default to an old-fashioned 8259
-			 * interrupt if we can..
-			 */
-			if (irq < 16)
-				make_8259A_irq(irq);
-			else
-				/* Strange. Oh, well.. */
-				irq_desc[irq].chip = &no_irq_chip;
-		}
-	}
-}
-
-/*
- * The local APIC irq-chip implementation:
- */
-
-static void ack_lapic_irq(unsigned int irq)
-{
-	ack_APIC_irq();
-}
-
-static void mask_lapic_irq(unsigned int irq)
-{
-	unsigned long v;
-
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void unmask_lapic_irq(unsigned int irq)
-{
-	unsigned long v;
-
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static struct irq_chip lapic_chip __read_mostly = {
-	.name		= "local-APIC",
-	.mask		= mask_lapic_irq,
-	.unmask		= unmask_lapic_irq,
-	.ack		= ack_lapic_irq,
-};
-
-static void lapic_register_intr(int irq, int vector)
-{
-	irq_desc[irq].status &= ~IRQ_LEVEL;
-	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
-				      "edge");
-	set_intr_gate(vector, interrupt[irq]);
-}
-
-static void __init setup_nmi(void)
-{
-	/*
-	 * Dirty trick to enable the NMI watchdog ...
-	 * We put the 8259A master into AEOI mode and
-	 * unmask on all local APICs LVT0 as NMI.
-	 *
-	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-	 * is from Maciej W. Rozycki - so we do not have to EOI from
-	 * the NMI handler or the timer interrupt.
-	 */
-	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-	enable_NMI_through_LVT0();
-
-	apic_printk(APIC_VERBOSE, " done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic.  ICR does
- * not support the ExtINT mode, unfortunately.  We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA.  --macro
- */
-static inline void __init unlock_ExtINT_logic(void)
-{
-	int apic, pin, i;
-	struct IO_APIC_route_entry entry0, entry1;
-	unsigned char save_control, save_freq_select;
-
-	pin  = find_isa_irq_pin(8, mp_INT);
-	if (pin == -1) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-	apic = find_isa_irq_apic(8, mp_INT);
-	if (apic == -1) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
-	entry0 = ioapic_read_entry(apic, pin);
-	clear_IO_APIC_pin(apic, pin);
-
-	memset(&entry1, 0, sizeof(entry1));
-
-	entry1.dest_mode = 0;			/* physical delivery */
-	entry1.mask = 0;			/* unmask IRQ now */
-	entry1.dest.physical.physical_dest = hard_smp_processor_id();
-	entry1.delivery_mode = dest_ExtINT;
-	entry1.polarity = entry0.polarity;
-	entry1.trigger = 0;
-	entry1.vector = 0;
-
-	ioapic_write_entry(apic, pin, entry1);
-
-	save_control = CMOS_READ(RTC_CONTROL);
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
-		   RTC_FREQ_SELECT);
-	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
-	i = 100;
-	while (i-- > 0) {
-		mdelay(10);
-		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
-			i -= 10;
-	}
-
-	CMOS_WRITE(save_control, RTC_CONTROL);
-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-	clear_IO_APIC_pin(apic, pin);
-
-	ioapic_write_entry(apic, pin, entry0);
-}
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
- * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- */
-static inline void __init check_timer(void)
-{
-	int apic1, pin1, apic2, pin2;
-	int no_pin1 = 0;
-	int vector;
-	unsigned int ver;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	ver = apic_read(APIC_LVR);
-	ver = GET_APIC_VERSION(ver);
-
-	/*
-	 * get/set the timer IRQ vector:
-	 */
-	disable_8259A_irq(0);
-	vector = assign_irq_vector(0);
-	set_intr_gate(vector, interrupt[0]);
-
-	/*
-	 * As IRQ0 is to be enabled in the 8259A, the virtual
-	 * wire has to be disabled in the local APIC.  Also
-	 * timer interrupts need to be acknowledged manually in
-	 * the 8259A for the i82489DX when using the NMI
-	 * watchdog as that APIC treats NMIs as level-triggered.
-	 * The AEOI mode will finish them in the 8259A
-	 * automatically.
-	 */
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-	init_8259A(1);
-	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-
-	pin1  = find_isa_irq_pin(0, mp_INT);
-	apic1 = find_isa_irq_apic(0, mp_INT);
-	pin2  = ioapic_i8259.pin;
-	apic2 = ioapic_i8259.apic;
-
-	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
-		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		    vector, apic1, pin1, apic2, pin2);
-
-	/*
-	 * Some BIOS writers are clueless and report the ExtINTA
-	 * I/O APIC input from the cascaded 8259A as the timer
-	 * interrupt input.  So just in case, if only one pin
-	 * was found above, try it both directly and through the
-	 * 8259A.
-	 */
-	if (pin1 == -1) {
-		pin1 = pin2;
-		apic1 = apic2;
-		no_pin1 = 1;
-	} else if (pin2 == -1) {
-		pin2 = pin1;
-		apic2 = apic1;
-	}
-
-	if (pin1 != -1) {
-		/*
-		 * Ok, does IRQ0 through the IOAPIC work?
-		 */
-		if (no_pin1) {
-			add_pin_to_irq(0, apic1, pin1);
-			setup_timer_IRQ0_pin(apic1, pin1, vector);
-		}
-		unmask_IO_APIC_irq(0);
-		if (timer_irq_works()) {
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			if (disable_timer_pin_1 > 0)
-				clear_IO_APIC_pin(0, pin1);
-			goto out;
-		}
-		clear_IO_APIC_pin(apic1, pin1);
-		if (!no_pin1)
-			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
-				    "8254 timer not connected to IO-APIC\n");
-
-		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
-			    "(IRQ0) through the 8259A ...\n");
-		apic_printk(APIC_QUIET, KERN_INFO
-			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
-		/*
-		 * legacy devices should be connected to IO APIC #0
-		 */
-		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
-		setup_timer_IRQ0_pin(apic2, pin2, vector);
-		unmask_IO_APIC_irq(0);
-		enable_8259A_irq(0);
-		if (timer_irq_works()) {
-			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
-			timer_through_8259 = 1;
-			if (nmi_watchdog == NMI_IO_APIC) {
-				disable_8259A_irq(0);
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			goto out;
-		}
-		/*
-		 * Cleanup, just in case ...
-		 */
-		disable_8259A_irq(0);
-		clear_IO_APIC_pin(apic2, pin2);
-		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
-	}
-
-	if (nmi_watchdog == NMI_IO_APIC) {
-		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-			    "through the IO-APIC - disabling NMI Watchdog!\n");
-		nmi_watchdog = NMI_NONE;
-	}
-	timer_ack = 0;
-
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as Virtual Wire IRQ...\n");
-
-	lapic_register_intr(0, vector);
-	apic_write(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
-	enable_8259A_irq(0);
-
-	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-		goto out;
-	}
-	disable_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
-
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as ExtINT IRQ...\n");
-
-	init_8259A(0);
-	make_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
-	unlock_ExtINT_logic();
-
-	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-		goto out;
-	}
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
-	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
-		"report.  Then try booting with the 'noapic' option.\n");
-out:
-	local_irq_restore(flags);
-}
-
-/*
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
- * to devices.  However there may be an I/O APIC pin available for
- * this interrupt regardless.  The pin may be left unconnected, but
- * typically it will be reused as an ExtINT cascade interrupt for
- * the master 8259A.  In the MPS case such a pin will normally be
- * reported as an ExtINT interrupt in the MP table.  With ACPI
- * there is no provision for ExtINT interrupts, and in the absence
- * of an override it would be treated as an ordinary ISA I/O APIC
- * interrupt, that is edge-triggered and unmasked by default.  We
- * used to do this, but it caused problems on some systems because
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
- * the same ExtINT cascade interrupt to drive the local APIC of the
- * bootstrap processor.  Therefore we refrain from routing IRQ2 to
- * the I/O APIC in all cases now.  No actual device should request
- * it anyway.  --macro
- */
-#define PIC_IRQS	(1 << PIC_CASCADE_IR)
-
-void __init setup_IO_APIC(void)
-{
-	int i;
-
-	/* Reserve all the system vectors. */
-	for (i = first_system_vector; i < NR_VECTORS; i++)
-		set_bit(i, used_vectors);
-
-	enable_IO_APIC();
-
-	io_apic_irqs = ~PIC_IRQS;
-
-	printk("ENABLING IO-APIC IRQs\n");
-
-	/*
-	 * Set up IO-APIC IRQ routing.
-	 */
-	if (!acpi_ioapic)
-		setup_ioapic_ids_from_mpc();
-	sync_Arb_IDs();
-	setup_IO_APIC_irqs();
-	init_IO_APIC_traps();
-	check_timer();
-}
-
-/*
- *	Called after all the initialization is done. If we didnt find any
- *	APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
-	if (sis_apic_bug == -1)
-		sis_apic_bug = 0;
-	return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-struct sysfs_ioapic_data {
-	struct sys_device dev;
-	struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
-	int i;
-
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		entry[i] = ioapic_read_entry(dev->id, i);
-
-	return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
-	unsigned long flags;
-	union IO_APIC_reg_00 reg_00;
-	int i;
-
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
-		io_apic_write(dev->id, 0, reg_00.raw);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		ioapic_write_entry(dev->id, i, entry[i]);
-
-	return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
-	.name = "ioapic",
-	.suspend = ioapic_suspend,
-	.resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
-	struct sys_device *dev;
-	int i, size, error = 0;
-
-	error = sysdev_class_register(&ioapic_sysdev_class);
-	if (error)
-		return error;
-
-	for (i = 0; i < nr_ioapics; i++) {
-		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
-			* sizeof(struct IO_APIC_route_entry);
-		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-		if (!mp_ioapic_data[i]) {
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-		dev = &mp_ioapic_data[i]->dev;
-		dev->id = i;
-		dev->cls = &ioapic_sysdev_class;
-		error = sysdev_register(dev);
-		if (error) {
-			kfree(mp_ioapic_data[i]);
-			mp_ioapic_data[i] = NULL;
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-	}
-
-	return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/*
- * Dynamic irq allocate and deallocation
- */
-int create_irq(void)
-{
-	/* Allocate an unused irq */
-	int irq, new, vector = 0;
-	unsigned long flags;
-
-	irq = -ENOSPC;
-	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (NR_IRQS - 1); new >= 0; new--) {
-		if (platform_legacy_irq(new))
-			continue;
-		if (irq_vector[new] != 0)
-			continue;
-		vector = __assign_irq_vector(new);
-		if (likely(vector > 0))
-			irq = new;
-		break;
-	}
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (irq >= 0) {
-		set_intr_gate(vector, interrupt[irq]);
-		dynamic_irq_init(irq);
-	}
-	return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	dynamic_irq_cleanup(irq);
-
-	spin_lock_irqsave(&vector_lock, flags);
-	clear_bit(irq_vector[irq], used_vectors);
-	irq_vector[irq] = 0;
-	spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-/*
- * MSI message composition
- */
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
-{
-	int vector;
-	unsigned dest;
-
-	vector = assign_irq_vector(irq);
-	if (vector >= 0) {
-		dest = cpu_mask_to_apicid(TARGET_CPUS);
-
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->address_lo =
-			MSI_ADDR_BASE_LO |
-			((INT_DEST_MODE == 0) ?
-MSI_ADDR_DEST_MODE_PHYSICAL:
-				MSI_ADDR_DEST_MODE_LOGICAL) |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_ADDR_REDIRECTION_CPU:
-				MSI_ADDR_REDIRECTION_LOWPRI) |
-			MSI_ADDR_DEST_ID(dest);
-
-		msg->data =
-			MSI_DATA_TRIGGER_EDGE |
-			MSI_DATA_LEVEL_ASSERT |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-MSI_DATA_DELIVERY_FIXED:
-				MSI_DATA_DELIVERY_LOWPRI) |
-			MSI_DATA_VECTOR(vector);
-	}
-	return vector;
-}
-
-#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct msi_msg msg;
-	unsigned int dest;
-	cpumask_t tmp;
-	int vector;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
-
-	vector = assign_irq_vector(irq);
-	if (vector < 0)
-		return;
-
-	dest = cpu_mask_to_apicid(mask);
-
-	read_msi_msg(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = mask;
-}
-#endif /* CONFIG_SMP */
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-	.name		= "PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
-	.ack		= ack_ioapic_irq,
-#ifdef CONFIG_SMP
-	.set_affinity	= set_msi_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
-	struct msi_msg msg;
-	int irq, ret;
-	irq = create_irq();
-	if (irq < 0)
-		return irq;
-
-	ret = msi_compose_msg(dev, irq, &msg);
-	if (ret < 0) {
-		destroy_irq(irq);
-		return ret;
-	}
-
-	set_irq_msi(irq, desc);
-	write_msi_msg(irq, &msg);
-
-	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
-				      "edge");
-
-	return 0;
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
-	destroy_irq(irq);
-}
-
-#endif /* CONFIG_PCI_MSI */
-
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-#ifdef CONFIG_SMP
-
-static void target_ht_irq(unsigned int irq, unsigned int dest)
-{
-	struct ht_irq_msg msg;
-	fetch_ht_irq_msg(irq, &msg);
-
-	msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-	msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-	write_ht_irq_msg(irq, &msg);
-}
-
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	unsigned int dest;
-	cpumask_t tmp;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
-
-	cpus_and(mask, tmp, CPU_MASK_ALL);
-
-	dest = cpu_mask_to_apicid(mask);
-
-	target_ht_irq(irq, dest);
-	irq_desc[irq].affinity = mask;
-}
-#endif
-
-static struct irq_chip ht_irq_chip = {
-	.name		= "PCI-HT",
-	.mask		= mask_ht_irq,
-	.unmask		= unmask_ht_irq,
-	.ack		= ack_ioapic_irq,
-#ifdef CONFIG_SMP
-	.set_affinity	= set_ht_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-	int vector;
-
-	vector = assign_irq_vector(irq);
-	if (vector >= 0) {
-		struct ht_irq_msg msg;
-		unsigned dest;
-		cpumask_t tmp;
-
-		cpus_clear(tmp);
-		cpu_set(vector >> 8, tmp);
-		dest = cpu_mask_to_apicid(tmp);
-
-		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-		msg.address_lo =
-			HT_IRQ_LOW_BASE |
-			HT_IRQ_LOW_DEST_ID(dest) |
-			HT_IRQ_LOW_VECTOR(vector) |
-			((INT_DEST_MODE == 0) ?
-				HT_IRQ_LOW_DM_PHYSICAL :
-				HT_IRQ_LOW_DM_LOGICAL) |
-			HT_IRQ_LOW_RQEOI_EDGE |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				HT_IRQ_LOW_MT_FIXED :
-				HT_IRQ_LOW_MT_ARBITRATED) |
-			HT_IRQ_LOW_IRQ_MASKED;
-
-		write_ht_irq_msg(irq, &msg);
-
-		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
-					      handle_edge_irq, "edge");
-	}
-	return vector;
-}
-#endif /* CONFIG_HT_IRQ */
-
-/* --------------------------------------------------------------------------
-			ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
-{
-	union IO_APIC_reg_00 reg_00;
-	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
-	physid_mask_t tmp;
-	unsigned long flags;
-	int i = 0;
-
-	/*
-	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
-	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
-	 * supports up to 16 on one shared APIC bus.
-	 *
-	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
-	 *      advantage of new APIC bus architecture.
-	 */
-
-	if (physids_empty(apic_id_map))
-		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic, 0);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	if (apic_id >= get_physical_broadcast()) {
-		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
-			"%d\n", ioapic, apic_id, reg_00.bits.ID);
-		apic_id = reg_00.bits.ID;
-	}
-
-	/*
-	 * Every APIC in a system must have a unique ID or we get lots of nice
-	 * 'stuck on smp_invalidate_needed IPI wait' messages.
-	 */
-	if (check_apicid_used(apic_id_map, apic_id)) {
-
-		for (i = 0; i < get_physical_broadcast(); i++) {
-			if (!check_apicid_used(apic_id_map, i))
-				break;
-		}
-
-		if (i == get_physical_broadcast())
-			panic("Max apic_id exceeded!\n");
-
-		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
-			"trying %d\n", ioapic, apic_id, i);
-
-		apic_id = i;
-	}
-
-	tmp = apicid_to_cpu_present(apic_id);
-	physids_or(apic_id_map, apic_id_map, tmp);
-
-	if (reg_00.bits.ID != apic_id) {
-		reg_00.bits.ID = apic_id;
-
-		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(ioapic, 0, reg_00.raw);
-		reg_00.raw = io_apic_read(ioapic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		/* Sanity check */
-		if (reg_00.bits.ID != apic_id) {
-			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
-			return -1;
-		}
-	}
-
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
-	return apic_id;
-}
-
-
-int __init io_apic_get_version(int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.version;
-}
-
-
-int __init io_apic_get_redir_entries(int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
-{
-	struct IO_APIC_route_entry entry;
-
-	if (!IO_APIC_IRQ(irq)) {
-		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
-		return -EINVAL;
-	}
-
-	/*
-	 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
-	 * Note that we mask (disable) IRQs now -- these get enabled when the
-	 * corresponding device driver registers for this IRQ.
-	 */
-
-	memset(&entry, 0, sizeof(entry));
-
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.dest_mode = INT_DEST_MODE;
-	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-	entry.trigger = edge_level;
-	entry.polarity = active_high_low;
-	entry.mask  = 1;
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= 16)
-		add_pin_to_irq(irq, ioapic, pin);
-
-	entry.vector = assign_irq_vector(irq);
-
-	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
-		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
-		mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
-		edge_level, active_high_low);
-
-	ioapic_register_intr(irq, entry.vector, edge_level);
-
-	if (!ioapic && (irq < 16))
-		disable_8259A_irq(irq);
-
-	ioapic_write_entry(ioapic, pin, entry);
-
-	return 0;
-}
-
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
-{
-	int i;
-
-	if (skip_ioapic_setup)
-		return -1;
-
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == mp_INT &&
-		    mp_irqs[i].mp_srcbusirq == bus_irq)
-			break;
-	if (i >= mp_irq_entries)
-		return -1;
-
-	*trigger = irq_trigger(i);
-	*polarity = irq_polarity(i);
-	return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-static int __init parse_disable_timer_pin_1(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 0;
-}
-early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
-
-static int __init parse_enable_timer_pin_1(char *arg)
-{
-	disable_timer_pin_1 = -1;
-	return 0;
-}
-early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
-
-static int __init parse_noapic(char *arg)
-{
-	/* disable IO-APIC */
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
-void __init ioapic_init_mappings(void)
-{
-	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-	int i;
-
-	for (i = 0; i < nr_ioapics; i++) {
-		if (smp_found_config) {
-			ioapic_phys = mp_ioapics[i].mp_apicaddr;
-			if (!ioapic_phys) {
-				printk(KERN_ERR
-				       "WARNING: bogus zero IO-APIC "
-				       "address found in MPTABLE, "
-				       "disabling IO/APIC support!\n");
-				smp_found_config = 0;
-				skip_ioapic_setup = 1;
-				goto fake_ioapic_page;
-			}
-		} else {
-fake_ioapic_page:
-			ioapic_phys = (unsigned long)
-				      alloc_bootmem_pages(PAGE_SIZE);
-			ioapic_phys = __pa(ioapic_phys);
-		}
-		set_fixmap_nocache(idx, ioapic_phys);
-		printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
-		       __fix_to_virt(idx), ioapic_phys);
-		idx++;
-	}
-}
-
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
new file mode 100644
index 00000000000..ccf6c503fc3
--- /dev/null
+++ b/arch/x86/kernel/irq.c
@@ -0,0 +1,189 @@
+/*
+ * Common interrupt code for 32 and 64 bit
+ */
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/smp.h>
+
+atomic_t irq_err_count;
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 * But only ack when the APIC is enabled -AK
+	 */
+	if (cpu_has_apic)
+		ack_APIC_irq();
+#endif
+}
+
+#ifdef CONFIG_X86_32
+# define irq_stats(x)		(&per_cpu(irq_stat,x))
+#else
+# define irq_stats(x)		cpu_pda(x)
+#endif
+/*
+ * /proc/interrupts printing:
+ */
+static int show_other_interrupts(struct seq_file *p)
+{
+	int j;
+
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+	seq_printf(p, "  Non-maskable interrupts\n");
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "LOC: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+	seq_printf(p, "  Local timer interrupts\n");
+#endif
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+	seq_printf(p, "  Rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+	seq_printf(p, "  Function call interrupts\n");
+	seq_printf(p, "TLB: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+	seq_printf(p, "  TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+	seq_printf(p, "TRM: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+	seq_printf(p, "  Thermal event interrupts\n");
+# ifdef CONFIG_X86_64
+	seq_printf(p, "THR: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+	seq_printf(p, "  Threshold APIC interrupts\n");
+# endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "SPU: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+	seq_printf(p, "  Spurious interrupts\n");
+#endif
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+	return 0;
+}
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	unsigned long flags, any_count = 0;
+	int i = *(loff_t *) v, j;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
+
+	if (i == nr_irqs)
+		return show_other_interrupts(p);
+
+	/* print header */
+	if (i == 0) {
+		seq_printf(p, "           ");
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%-8d",j);
+		seq_putc(p, '\n');
+	}
+
+	desc = irq_to_desc(i);
+	spin_lock_irqsave(&desc->lock, flags);
+#ifndef CONFIG_SMP
+	any_count = kstat_irqs(i);
+#else
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
+#endif
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
+
+	seq_printf(p, "%3d: ", i);
+#ifndef CONFIG_SMP
+	seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+#endif
+	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
+
+	if (action) {
+		seq_printf(p, "  %s", action->name);
+		while ((action = action->next) != NULL)
+			seq_printf(p, ", %s", action->name);
+	}
+
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return 0;
+}
+
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = irq_stats(cpu)->__nmi_count;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	sum += irq_stats(cpu)->apic_timer_irqs;
+#endif
+#ifdef CONFIG_SMP
+	sum += irq_stats(cpu)->irq_resched_count;
+	sum += irq_stats(cpu)->irq_call_count;
+	sum += irq_stats(cpu)->irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
+	sum += irq_stats(cpu)->irq_thermal_count;
+# ifdef CONFIG_X86_64
+	sum += irq_stats(cpu)->irq_threshold_count;
+#endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	sum += irq_stats(cpu)->irq_spurious_count;
+#endif
+	return sum;
+}
+
+u64 arch_irq_stat(void)
+{
+	u64 sum = atomic_read(&irq_err_count);
+
+#ifdef CONFIG_X86_IO_APIC
+	sum += atomic_read(&irq_mis_count);
+#endif
+	return sum;
+}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b71e02d42f4..a51382672de 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 * But only ack when the APIC is enabled -AK
-	 */
-	if (cpu_has_apic)
-		ack_APIC_irq();
-#endif
-}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /* Debugging check for stack overflow: is there less than 1KB free? */
 static int check_stack_overflow(void)
@@ -223,20 +200,25 @@ unsigned int do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
-	int overflow, irq = ~regs->orig_ax;
-	struct irq_desc *desc = irq_desc + irq;
+	int overflow;
+	unsigned vector = ~regs->orig_ax;
+	struct irq_desc *desc;
+	unsigned irq;
 
-	if (unlikely((unsigned)irq >= NR_IRQS)) {
-		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-					__func__, irq);
-		BUG();
-	}
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
+	irq = __get_cpu_var(vector_irq)[vector];
 
 	overflow = check_stack_overflow();
 
+	desc = irq_to_desc(irq);
+	if (unlikely(!desc)) {
+		printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
+					__func__, irq, vector, smp_processor_id());
+		BUG();
+	}
+
 	if (!execute_on_irq_stack(overflow, desc, irq)) {
 		if (unlikely(overflow))
 			print_stack_overflow();
@@ -248,146 +230,6 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	return 1;
 }
 
-/*
- * Interrupt statistics:
- */
-
-atomic_t irq_err_count;
-
-/*
- * /proc/interrupts printing:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%-8d",j);
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		unsigned any_count = 0;
-
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
-#ifndef CONFIG_SMP
-		any_count = kstat_irqs(i);
-#else
-		for_each_online_cpu(j)
-			any_count |= kstat_cpu(j).irqs[i];
-#endif
-		action = irq_desc[i].action;
-		if (!action && !any_count)
-			goto skip;
-		seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
-		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
-
-		if (action) {
-			seq_printf(p, "  %s", action->name);
-			while ((action = action->next) != NULL)
-				seq_printf(p, ", %s", action->name);
-		}
-
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
-		seq_printf(p, "NMI: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", nmi_count(j));
-		seq_printf(p, "  Non-maskable interrupts\n");
-#ifdef CONFIG_X86_LOCAL_APIC
-		seq_printf(p, "LOC: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).apic_timer_irqs);
-		seq_printf(p, "  Local timer interrupts\n");
-#endif
-#ifdef CONFIG_SMP
-		seq_printf(p, "RES: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_resched_count);
-		seq_printf(p, "  Rescheduling interrupts\n");
-		seq_printf(p, "CAL: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_call_count);
-		seq_printf(p, "  Function call interrupts\n");
-		seq_printf(p, "TLB: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_tlb_count);
-		seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-		seq_printf(p, "TRM: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_thermal_count);
-		seq_printf(p, "  Thermal event interrupts\n");
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-		seq_printf(p, "SPU: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_spurious_count);
-		seq_printf(p, "  Spurious interrupts\n");
-#endif
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
-		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-	}
-	return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
-	u64 sum = nmi_count(cpu);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
-#endif
-#ifdef CONFIG_SMP
-	sum += per_cpu(irq_stat, cpu).irq_resched_count;
-	sum += per_cpu(irq_stat, cpu).irq_call_count;
-	sum += per_cpu(irq_stat, cpu).irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
-	sum += per_cpu(irq_stat, cpu).irq_thermal_count;
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-	sum += per_cpu(irq_stat, cpu).irq_spurious_count;
-#endif
-	return sum;
-}
-
-u64 arch_irq_stat(void)
-{
-	u64 sum = atomic_read(&irq_err_count);
-
-#ifdef CONFIG_X86_IO_APIC
-	sum += atomic_read(&irq_mis_count);
-#endif
-	return sum;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
@@ -395,20 +237,22 @@ void fixup_irqs(cpumask_t map)
 {
 	unsigned int irq;
 	static int warned;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
+	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
+
 		if (irq == 2)
 			continue;
 
-		cpus_and(mask, irq_desc[irq].affinity, map);
+		cpus_and(mask, desc->affinity, map);
 		if (any_online_cpu(mask) == NR_CPUS) {
 			printk("Breaking affinity for irq %i\n", irq);
 			mask = map;
 		}
-		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
-		else if (irq_desc[irq].action && !(warned++))
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, mask);
+		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f065fe9071b..60eb84eb77a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,28 +18,6 @@
 #include <asm/idle.h>
 #include <asm/smp.h>
 
-atomic_t irq_err_count;
-
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 * But don't ack when the APIC is disabled. -AK
-	 */
-	if (!disable_apic)
-		ack_APIC_irq();
-}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /*
  * Probabilistic stack overflow check:
@@ -65,122 +43,6 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 #endif
 
 /*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%-8d",j);
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		unsigned any_count = 0;
-
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
-#ifndef CONFIG_SMP
-		any_count = kstat_irqs(i);
-#else
-		for_each_online_cpu(j)
-			any_count |= kstat_cpu(j).irqs[i];
-#endif
-		action = irq_desc[i].action;
-		if (!action && !any_count)
-			goto skip;
-		seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
-		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
-
-		if (action) {
-			seq_printf(p, "  %s", action->name);
-			while ((action = action->next) != NULL)
-				seq_printf(p, ", %s", action->name);
-		}
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
-		seq_printf(p, "NMI: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
-		seq_printf(p, "  Non-maskable interrupts\n");
-		seq_printf(p, "LOC: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
-		seq_printf(p, "  Local timer interrupts\n");
-#ifdef CONFIG_SMP
-		seq_printf(p, "RES: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
-		seq_printf(p, "  Rescheduling interrupts\n");
-		seq_printf(p, "CAL: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-		seq_printf(p, "  Function call interrupts\n");
-		seq_printf(p, "TLB: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
-		seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-		seq_printf(p, "TRM: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
-		seq_printf(p, "  Thermal event interrupts\n");
-		seq_printf(p, "THR: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
-		seq_printf(p, "  Threshold APIC interrupts\n");
-#endif
-		seq_printf(p, "SPU: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
-		seq_printf(p, "  Spurious interrupts\n");
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-	}
-	return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
-	u64 sum = cpu_pda(cpu)->__nmi_count;
-
-	sum += cpu_pda(cpu)->apic_timer_irqs;
-#ifdef CONFIG_SMP
-	sum += cpu_pda(cpu)->irq_resched_count;
-	sum += cpu_pda(cpu)->irq_call_count;
-	sum += cpu_pda(cpu)->irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
-	sum += cpu_pda(cpu)->irq_thermal_count;
-	sum += cpu_pda(cpu)->irq_threshold_count;
-#endif
-	sum += cpu_pda(cpu)->irq_spurious_count;
-	return sum;
-}
-
-u64 arch_irq_stat(void)
-{
-	return atomic_read(&irq_err_count);
-}
-
-/*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
  * handlers).
@@ -188,6 +50,7 @@ u64 arch_irq_stat(void)
 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct irq_desc *desc;
 
 	/* high bit used in ret_from_ code  */
 	unsigned vector = ~regs->orig_ax;
@@ -201,8 +64,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	if (likely(irq < NR_IRQS))
-		generic_handle_irq(irq);
+	desc = irq_to_desc(irq);
+	if (likely(desc))
+		generic_handle_irq_desc(irq, desc);
 	else {
 		if (!disable_apic)
 			ack_APIC_irq();
@@ -223,8 +87,9 @@ void fixup_irqs(cpumask_t map)
 {
 	unsigned int irq;
 	static int warned;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
+	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
@@ -233,32 +98,32 @@ void fixup_irqs(cpumask_t map)
 			continue;
 
 		/* interrupt's are disabled at this point */
-		spin_lock(&irq_desc[irq].lock);
+		spin_lock(&desc->lock);
 
 		if (!irq_has_action(irq) ||
-		    cpus_equal(irq_desc[irq].affinity, map)) {
-			spin_unlock(&irq_desc[irq].lock);
+		    cpus_equal(desc->affinity, map)) {
+			spin_unlock(&desc->lock);
 			continue;
 		}
 
-		cpus_and(mask, irq_desc[irq].affinity, map);
+		cpus_and(mask, desc->affinity, map);
 		if (cpus_empty(mask)) {
 			break_affinity = 1;
 			mask = map;
 		}
 
-		if (irq_desc[irq].chip->mask)
-			irq_desc[irq].chip->mask(irq);
+		if (desc->chip->mask)
+			desc->chip->mask(irq);
 
-		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, mask);
 		else if (!(warned++))
 			set_affinity = 0;
 
-		if (irq_desc[irq].chip->unmask)
-			irq_desc[irq].chip->unmask(irq);
+		if (desc->chip->unmask)
+			desc->chip->unmask(irq);
 
-		spin_unlock(&irq_desc[irq].lock);
+		spin_unlock(&desc->lock);
 
 		if (break_affinity && set_affinity)
 			printk("Broke affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9200a1e2752..845aa9803e8 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,6 +69,13 @@ void __init init_ISA_irqs (void)
 	 * 16 old-style INTA-cycle interrupts:
 	 */
 	for (i = 0; i < 16; i++) {
+		/* first time call this irq_desc */
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
+
 		set_irq_chip_and_handler_name(i, &i8259A_chip,
 					      handle_level_irq, "XT");
 	}
@@ -83,6 +90,27 @@ static struct irqaction irq2 = {
 	.name = "cascade",
 };
 
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+	[0 ... IRQ0_VECTOR - 1] = -1,
+	[IRQ0_VECTOR] = 0,
+	[IRQ1_VECTOR] = 1,
+	[IRQ2_VECTOR] = 2,
+	[IRQ3_VECTOR] = 3,
+	[IRQ4_VECTOR] = 4,
+	[IRQ5_VECTOR] = 5,
+	[IRQ6_VECTOR] = 6,
+	[IRQ7_VECTOR] = 7,
+	[IRQ8_VECTOR] = 8,
+	[IRQ9_VECTOR] = 9,
+	[IRQ10_VECTOR] = 10,
+	[IRQ11_VECTOR] = 11,
+	[IRQ12_VECTOR] = 12,
+	[IRQ13_VECTOR] = 13,
+	[IRQ14_VECTOR] = 14,
+	[IRQ15_VECTOR] = 15,
+	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
+};
+
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
@@ -98,22 +126,14 @@ void __init native_init_IRQ(void)
 	 * us. (some of these will be overridden and become
 	 * 'special' SMP interrupts)
 	 */
-	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
-		int vector = FIRST_EXTERNAL_VECTOR + i;
-		if (i >= NR_IRQS)
-			break;
+	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
 		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (!test_bit(vector, used_vectors))
-			set_intr_gate(vector, interrupt[i]);
+		if (i != SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i]);
 	}
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
-	/*
-	 * IRQ0 must be given a fixed assignment and initialized,
-	 * because it's used before the IO-APIC is set up.
-	 */
-	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
 
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -128,6 +148,9 @@ void __init native_init_IRQ(void)
 
 	/* IPI for single call function */
 	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+
+	/* Low priority IPI to cleanup after moving an irq */
+	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 5b5be9d43c2..ff023539128 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,23 +142,19 @@ void __init init_ISA_irqs(void)
 	init_bsp_APIC();
 	init_8259A(0);
 
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].action = NULL;
-		irq_desc[i].depth = 1;
-
-		if (i < 16) {
-			/*
-			 * 16 old-style INTA-cycle interrupts:
-			 */
-			set_irq_chip_and_handler_name(i, &i8259A_chip,
+	for (i = 0; i < 16; i++) {
+		/* first time call this irq_desc */
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
+
+		/*
+		 * 16 old-style INTA-cycle interrupts:
+		 */
+		set_irq_chip_and_handler_name(i, &i8259A_chip,
 						      handle_level_irq, "XT");
-		} else {
-			/*
-			 * 'high' PCI IRQs filled in on demand
-			 */
-			irq_desc[i].chip = &no_irq_chip;
-		}
 	}
 }
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f9..67465ed8931 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
 	if (!(word & (1 << 13))) {
 		dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
 			"disabling irq balancing and affinity\n");
-#ifdef CONFIG_IRQBALANCE
-		irqbalance_disable("");
-#endif
 		noirqdebug_setup("");
 #ifdef CONFIG_PROC_FS
 		no_irq_affinity = 1;
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 0a23b5795b2..dd6f2b71561 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -52,7 +52,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
 	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
+		cmos_minutes = bcd2bin(cmos_minutes);
 
 	/*
 	 * since we're only adjusting minutes and seconds,
@@ -69,8 +69,8 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
 		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+			real_seconds = bin2bcd(real_seconds);
+			real_minutes = bin2bcd(real_minutes);
 		}
 		CMOS_WRITE(real_seconds,RTC_SECONDS);
 		CMOS_WRITE(real_minutes,RTC_MINUTES);
@@ -124,16 +124,16 @@ unsigned long mach_get_cmos_time(void)
 	WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
 
 	if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hour);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mon);
-		BCD_TO_BIN(year);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
 	}
 
 	if (century) {
-		BCD_TO_BIN(century);
+		century = bcd2bin(century);
 		year += century * 100;
 		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
 	} else
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2255782e8d4..0fa6790c1dd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -561,7 +561,13 @@ static void __init reserve_standard_io_resources(void)
 
 }
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+
+#ifdef CONFIG_CRASH_DUMP
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel. This option will be passed
  * by kexec loader to the capture kernel.
@@ -1067,6 +1073,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	prefill_possible_map();
+
 #ifdef CONFIG_X86_64
 	init_cpu_to_node();
 #endif
@@ -1074,6 +1081,9 @@ void __init setup_arch(char **cmdline_p)
 	init_apic_mappings();
 	ioapic_init_mappings();
 
+	/* need to wait for io_apic is mapped */
+	nr_irqs = probe_nr_irqs();
+
 	kvm_guest_init();
 
 	e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0e67f72d931..410c88f0bfe 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,25 +140,30 @@ static void __init setup_cpu_pda_map(void)
  */
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t size = PERCPU_ENOUGH_ROOM;
+	ssize_t size, old_size;
 	char *ptr;
 	int cpu;
+	unsigned long align = 1;
 
 	/* Setup cpu_pda map */
 	setup_cpu_pda_map();
 
 	/* Copy section for each CPU (we discard the original) */
-	size = PERCPU_ENOUGH_ROOM;
+	old_size = PERCPU_ENOUGH_ROOM;
+	align = max_t(unsigned long, PAGE_SIZE, align);
+	size = roundup(old_size, align);
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
 	for_each_possible_cpu(cpu) {
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-		ptr = alloc_bootmem_pages(size);
+		ptr = __alloc_bootmem(size, align,
+				 __pa(MAX_DMA_ADDRESS));
 #else
 		int node = early_cpu_to_node(cpu);
 		if (!node_online(node) || !NODE_DATA(node)) {
-			ptr = alloc_bootmem_pages(size);
+			ptr = __alloc_bootmem(size, align,
+					 __pa(MAX_DMA_ADDRESS));
 			printk(KERN_INFO
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
@@ -167,7 +172,8 @@ void __init setup_per_cpu_areas(void)
 					 cpu, __pa(ptr));
 		}
 		else {
-			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
+							__pa(MAX_DMA_ADDRESS));
 			if (ptr)
 				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
 					 cpu, node, __pa(ptr));
@@ -175,7 +181,6 @@ void __init setup_per_cpu_areas(void)
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-
 	}
 
 	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7ed9e070a6e..7ece815ea63 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid)
 	int timeout;
 	u32 status;
 
-	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
+	printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
+		printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
 
 		/*
 		 * Wait for idle.
@@ -874,7 +874,7 @@ do_rest:
 	start_ip = setup_trampoline();
 
 	/* So we see what's up   */
-	printk(KERN_INFO "Booting processor %d/%d ip %lx\n",
+	printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
 			  cpu, apicid, start_ip);
 
 	/*
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
new file mode 100644
index 00000000000..aeef529917e
--- /dev/null
+++ b/arch/x86/kernel/uv_irq.c
@@ -0,0 +1,79 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/irq.h>
+
+#include <asm/apic.h>
+#include <asm/uv/uv_irq.h>
+
+static void uv_noop(unsigned int irq)
+{
+}
+
+static unsigned int uv_noop_ret(unsigned int irq)
+{
+	return 0;
+}
+
+static void uv_ack_apic(unsigned int irq)
+{
+	ack_APIC_irq();
+}
+
+struct irq_chip uv_irq_chip = {
+	.name		= "UV-CORE",
+	.startup	= uv_noop_ret,
+	.shutdown	= uv_noop,
+	.enable		= uv_noop,
+	.disable	= uv_noop,
+	.ack		= uv_noop,
+	.mask		= uv_noop,
+	.unmask		= uv_noop,
+	.eoi		= uv_ack_apic,
+	.end		= uv_noop,
+};
+
+/*
+ * Set up a mapping of an available irq and vector, and enable the specified
+ * MMR that defines the MSI that is to be sent to the specified CPU when an
+ * interrupt is raised.
+ */
+int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
+		 unsigned long mmr_offset)
+{
+	int irq;
+	int ret;
+
+	irq = create_irq();
+	if (irq <= 0)
+		return -EBUSY;
+
+	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
+	if (ret != irq)
+		destroy_irq(irq);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uv_setup_irq);
+
+/*
+ * Tear down a mapping of an irq and vector, and disable the specified MMR that
+ * defined the MSI that was to be sent to the specified CPU when an interrupt
+ * was raised.
+ *
+ * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
+ */
+void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+{
+	arch_disable_uv_irq(mmr_blade, mmr_offset);
+	destroy_irq(irq);
+}
+EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
new file mode 100644
index 00000000000..67f9b9dbf80
--- /dev/null
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -0,0 +1,72 @@
+/*
+ * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
+ */
+
+#include <linux/sysdev.h>
+#include <asm/uv/bios.h>
+
+struct kobject *sgi_uv_kobj;
+
+static ssize_t partition_id_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
+}
+
+static ssize_t coherence_id_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
+}
+
+static struct kobj_attribute partition_id_attr =
+	__ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
+
+static struct kobj_attribute coherence_id_attr =
+	__ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
+
+
+static int __init sgi_uv_sysfs_init(void)
+{
+	unsigned long ret;
+
+	if (!sgi_uv_kobj)
+		sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
+	if (!sgi_uv_kobj) {
+		printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n");
+		return -EINVAL;
+	}
+
+	ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
+	if (ret) {
+		printk(KERN_WARNING "sysfs_create_file partition_id failed \n");
+		return ret;
+	}
+
+	ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
+	if (ret) {
+		printk(KERN_WARNING "sysfs_create_file coherence_id failed \n");
+		return ret;
+	}
+
+	return 0;
+}
+
+device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 61a97e616f7..0c9667f0752 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq)
 static unsigned int startup_cobalt_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&cobalt_lock, flags);
-	if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
-		irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
+	if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
+		desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
 	enable_cobalt_irq(irq);
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 	return 0;
@@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq)
 static void end_cobalt_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&cobalt_lock, flags);
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+	if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
 		enable_cobalt_irq(irq);
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 }
@@ -626,12 +628,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 
-	desc = irq_desc + realirq;
+	desc = irq_to_desc(realirq);
 
 	/*
 	 * handle this 'virtual interrupt' as a Cobalt one now.
 	 */
-	kstat_cpu(smp_processor_id()).irqs[realirq]++;
+	kstat_incr_irqs_this_cpu(realirq, desc);
 
 	if (likely(desc->action != NULL))
 		handle_IRQ_event(realirq, desc->action);
@@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void)
 	int i;
 
 	for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].action = 0;
-		irq_desc[i].depth = 1;
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = 0;
+		desc->depth = 1;
 
 		if (i == 0) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_IDE0) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_IDE1) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_8259) {
-			irq_desc[i].chip = &piix4_master_irq_type;
+			desc->chip = &piix4_master_irq_type;
 		}
 		else if (i < CO_IRQ_APIC0) {
-			irq_desc[i].chip = &piix4_virtual_irq_type;
+			desc->chip = &piix4_virtual_irq_type;
 		}
 		else if (IS_CO_APIC(i)) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 	}
 
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 6953859fe28..254ee07f863 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void)
 
 void __init vmi_time_init(void)
 {
+	unsigned int cpu;
 	/* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
 	outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
 
 	vmi_time_init_clockevent();
 	setup_irq(0, &vmi_clock_action);
+	for_each_possible_cpu(cpu)
+		per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 65f0b8a47be..48ee4f9435f 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void)
 	for (i = 0; i < LGUEST_IRQS; i++) {
 		int vector = FIRST_EXTERNAL_VECTOR + i;
 		if (vector != SYSCALL_VECTOR) {
-			set_intr_gate(vector, interrupt[i]);
+			set_intr_gate(vector, interrupt[vector]);
 			set_irq_chip_and_handler_name(i, &lguest_irq_controller,
 						      handle_level_irq,
 						      "level");
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index df37fc9d6a2..3c3b471ea49 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 	 { }
 };
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+        return cpumask_of_cpu(cpu);
+}
 
 static int probe_bigsmp(void)
 {
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 6513d41ea21..28459cab3dd 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 }
 #endif
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8cf58394975..71a309b122e 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6ad6b67a723..6272b5e69da 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -23,4 +23,18 @@ static int probe_summit(void)
 	return 0;
 }
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 199a5f4a873..0f6e8a6523a 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq)
  * the interrupt off to another CPU */
 static void before_handle_vic_irq(unsigned int irq)
 {
-	irq_desc_t *desc = irq_desc + irq;
+	irq_desc_t *desc = irq_to_desc(irq);
 	__u8 cpu = smp_processor_id();
 
 	_raw_spin_lock(&vic_irq_lock);
@@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq)
 /* Finish the VIC interrupt: basically mask */
 static void after_handle_vic_irq(unsigned int irq)
 {
-	irq_desc_t *desc = irq_desc + irq;
+	irq_desc_t *desc = irq_to_desc(irq);
 
 	_raw_spin_lock(&vic_irq_lock);
 	{
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 635b50e8558..2c4baa88f2c 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -56,13 +56,6 @@ struct remap_trace {
 static DEFINE_PER_CPU(struct trap_reason, pf_reason);
 static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
 
-#if 0 /* XXX: no way gather this info anymore */
-/* Access to this is not per-cpu. */
-static DEFINE_PER_CPU(atomic_t, dropped);
-#endif
-
-static struct dentry *marker_file;
-
 static DEFINE_MUTEX(mmiotrace_mutex);
 static DEFINE_SPINLOCK(trace_lock);
 static atomic_t mmiotrace_enabled;
@@ -75,7 +68,7 @@ static LIST_HEAD(trace_list);		/* struct remap_trace */
  *   and trace_lock.
  * - Routines depending on is_enabled() must take trace_lock.
  * - trace_list users must hold trace_lock.
- * - is_enabled() guarantees that mmio_trace_record is allowed.
+ * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed.
  * - pre/post callbacks assume the effect of is_enabled() being true.
  */
 
@@ -97,44 +90,6 @@ static bool is_enabled(void)
 	return atomic_read(&mmiotrace_enabled);
 }
 
-#if 0 /* XXX: needs rewrite */
-/*
- * Write callback for the debugfs entry:
- * Read a marker and write it to the mmio trace log
- */
-static ssize_t write_marker(struct file *file, const char __user *buffer,
-						size_t count, loff_t *ppos)
-{
-	char *event = NULL;
-	struct mm_io_header *headp;
-	ssize_t len = (count > 65535) ? 65535 : count;
-
-	event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
-	if (!event)
-		return -ENOMEM;
-
-	headp = (struct mm_io_header *)event;
-	headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
-	headp->data_len = len;
-
-	if (copy_from_user(event + sizeof(*headp), buffer, len)) {
-		kfree(event);
-		return -EFAULT;
-	}
-
-	spin_lock_irq(&trace_lock);
-#if 0 /* XXX: convert this to use tracing */
-	if (is_enabled())
-		relay_write(chan, event, sizeof(*headp) + len);
-	else
-#endif
-		len = -EINVAL;
-	spin_unlock_irq(&trace_lock);
-	kfree(event);
-	return len;
-}
-#endif
-
 static void print_pte(unsigned long address)
 {
 	unsigned int level;
@@ -307,8 +262,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
 	map.map_id = trace->id;
 
 	spin_lock_irq(&trace_lock);
-	if (!is_enabled())
+	if (!is_enabled()) {
+		kfree(trace);
 		goto not_enabled;
+	}
 
 	mmio_trace_mapping(&map);
 	list_add_tail(&trace->list, &trace_list);
@@ -377,6 +334,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr)
 		iounmap_trace_core(addr);
 }
 
+int mmiotrace_printk(const char *fmt, ...)
+{
+	int ret = 0;
+	va_list args;
+	unsigned long flags;
+	va_start(args, fmt);
+
+	spin_lock_irqsave(&trace_lock, flags);
+	if (is_enabled())
+		ret = mmio_trace_printk(fmt, args);
+	spin_unlock_irqrestore(&trace_lock, flags);
+
+	va_end(args);
+	return ret;
+}
+EXPORT_SYMBOL(mmiotrace_printk);
+
 static void clear_trace_list(void)
 {
 	struct remap_trace *trace;
@@ -462,26 +436,12 @@ static void leave_uniprocessor(void)
 }
 #endif
 
-#if 0 /* XXX: out of order */
-static struct file_operations fops_marker = {
-	.owner =	THIS_MODULE,
-	.write =	write_marker
-};
-#endif
-
 void enable_mmiotrace(void)
 {
 	mutex_lock(&mmiotrace_mutex);
 	if (is_enabled())
 		goto out;
 
-#if 0 /* XXX: tracing does not support text entries */
-	marker_file = debugfs_create_file("marker", 0660, dir, NULL,
-								&fops_marker);
-	if (!marker_file)
-		pr_err(NAME "marker file creation failed.\n");
-#endif
-
 	if (nommiotrace)
 		pr_info(NAME "MMIO tracing disabled.\n");
 	enter_uniprocessor();
@@ -506,11 +466,6 @@ void disable_mmiotrace(void)
 
 	clear_trace_list(); /* guarantees: no more kmmio callbacks */
 	leave_uniprocessor();
-	if (marker_file) {
-		debugfs_remove(marker_file);
-		marker_file = NULL;
-	}
-
 	pr_info(NAME "disabled.\n");
 out:
 	mutex_unlock(&mmiotrace_mutex);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a9ec89c3fbc..407d8784f66 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -792,6 +792,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	/* Must avoid aliasing mappings in the highmem code */
 	kmap_flush_unused();
 
+	vm_unmap_aliases();
+
 	cpa.vaddr = addr;
 	cpa.numpages = numpages;
 	cpa.mask_set = mask_set;
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index efa1911e20c..df3d5c861cd 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 };
 static unsigned int mw64[] = { 0x89, 0x8B };
 #endif /* not __i386__ */
 
-static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
-								int *rexr)
+struct prefix_bits {
+	unsigned shorted:1;
+	unsigned enlarged:1;
+	unsigned rexr:1;
+	unsigned rex:1;
+};
+
+static int skip_prefix(unsigned char *addr, struct prefix_bits *prf)
 {
 	int i;
 	unsigned char *p = addr;
-	*shorted = 0;
-	*enlarged = 0;
-	*rexr = 0;
+	prf->shorted = 0;
+	prf->enlarged = 0;
+	prf->rexr = 0;
+	prf->rex = 0;
 
 restart:
 	for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
 		if (*p == prefix_codes[i]) {
 			if (*p == 0x66)
-				*shorted = 1;
+				prf->shorted = 1;
 #ifdef __amd64__
 			if ((*p & 0xf8) == 0x48)
-				*enlarged = 1;
+				prf->enlarged = 1;
 			if ((*p & 0xf4) == 0x44)
-				*rexr = 1;
+				prf->rexr = 1;
+			if ((*p & 0xf0) == 0x40)
+				prf->rex = 1;
 #endif
 			p++;
 			goto restart;
@@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int shorted, enlarged, rexr;
+	struct prefix_bits prf;
 	int i;
 	enum reason_type rv = OTHERS;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
@@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	for (i = 0; i < ARRAY_SIZE(rw8); i++)
@@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
 
 	for (i = 0; i < ARRAY_SIZE(rw32); i++)
 		if (rw32[i] == opcode)
-			return (shorted ? 2 : (enlarged ? 8 : 4));
+			return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
 
 	printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
 	return 0;
@@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	for (i = 0; i < ARRAY_SIZE(mw8); i++)
@@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
 
 	for (i = 0; i < ARRAY_SIZE(mw32); i++)
 		if (mw32[i] == opcode)
-			return shorted ? 2 : 4;
+			return prf.shorted ? 2 : 4;
 
 	for (i = 0; i < ARRAY_SIZE(mw64); i++)
 		if (mw64[i] == opcode)
-			return shorted ? 2 : (enlarged ? 8 : 4);
+			return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
 
 	printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
 	return 0;
@@ -238,7 +249,7 @@ enum {
 #endif
 };
 
-static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
+static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs)
 {
 	unsigned char *rv = NULL;
 
@@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
 	case arg_DL:
 		rv = (unsigned char *)&regs->dx;
 		break;
-	case arg_AH:
-		rv = 1 + (unsigned char *)&regs->ax;
-		break;
-	case arg_BH:
-		rv = 1 + (unsigned char *)&regs->bx;
-		break;
-	case arg_CH:
-		rv = 1 + (unsigned char *)&regs->cx;
-		break;
-	case arg_DH:
-		rv = 1 + (unsigned char *)&regs->dx;
-		break;
 #ifdef __amd64__
 	case arg_R8:
 		rv = (unsigned char *)&regs->r8;
@@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
 		break;
 #endif
 	default:
-		printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
 		break;
 	}
+
+	if (rv)
+		return rv;
+
+	if (rex) {
+		/*
+		 * If REX prefix exists, access low bytes of SI etc.
+		 * instead of AH etc.
+		 */
+		switch (no) {
+		case arg_SI:
+			rv = (unsigned char *)&regs->si;
+			break;
+		case arg_DI:
+			rv = (unsigned char *)&regs->di;
+			break;
+		case arg_BP:
+			rv = (unsigned char *)&regs->bp;
+			break;
+		case arg_SP:
+			rv = (unsigned char *)&regs->sp;
+			break;
+		default:
+			break;
+		}
+	} else {
+		switch (no) {
+		case arg_AH:
+			rv = 1 + (unsigned char *)&regs->ax;
+			break;
+		case arg_BH:
+			rv = 1 + (unsigned char *)&regs->bx;
+			break;
+		case arg_CH:
+			rv = 1 + (unsigned char *)&regs->cx;
+			break;
+		case arg_DH:
+			rv = 1 + (unsigned char *)&regs->dx;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (!rv)
+		printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+
 	return rv;
 }
 
@@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 	unsigned char mod_rm;
 	int reg;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 	unsigned long rv;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
 		if (reg_rop[i] == opcode) {
@@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 
 do_work:
 	mod_rm = *p;
-	reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
+	reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
 	switch (get_ins_reg_width(ins_addr)) {
 	case 1:
-		return *get_reg_w8(reg, regs);
+		return *get_reg_w8(reg, prf.rex, regs);
 
 	case 2:
 		return *(unsigned short *)get_reg_w32(reg, regs);
@@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr)
 	unsigned char mod_rm;
 	unsigned char mod;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 	unsigned long rv;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
 		if (imm_wop[i] == opcode) {
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index d877c5b423e..ab50a8d7402 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -3,6 +3,7 @@
  */
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/mmiotrace.h>
 
 #define MODULE_NAME "testmmiotrace"
 
@@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
 static void do_write_test(void __iomem *p)
 {
 	unsigned int i;
+	mmiotrace_printk("Write test.\n");
 	for (i = 0; i < 256; i++)
 		iowrite8(i, p + i);
 	for (i = 1024; i < (5 * 1024); i += 2)
@@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p)
 static void do_read_test(void __iomem *p)
 {
 	unsigned int i;
+	mmiotrace_printk("Read test.\n");
 	for (i = 0; i < 256; i++)
 		ioread8(p + i);
 	for (i = 1024; i < (5 * 1024); i += 2)
@@ -39,6 +42,7 @@ static void do_test(void)
 		pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
 		return;
 	}
+	mmiotrace_printk("ioremap returned %p.\n", p);
 	do_write_test(p);
 	do_read_test(p);
 	iounmap(p);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 006599db0dc..bf69dbe08bf 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -493,7 +493,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
 	if (pirq <= 4)
 		irq = read_config_nybble(router, 0x56, pirq - 1);
 	dev_info(&dev->dev,
-		 "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
+		 "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n",
 		 dev->vendor, dev->device, pirq, irq);
 	return irq;
 }
@@ -501,7 +501,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
 static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
 	dev_info(&dev->dev,
-		 "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
+		 "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n",
 		 dev->vendor, dev->device, pirq, irq);
 	if (pirq <= 4)
 		write_config_nybble(router, 0x56, pirq - 1, irq);
@@ -590,13 +590,20 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 	case PCI_DEVICE_ID_INTEL_ICH10_1:
 	case PCI_DEVICE_ID_INTEL_ICH10_2:
 	case PCI_DEVICE_ID_INTEL_ICH10_3:
-	case PCI_DEVICE_ID_INTEL_PCH_0:
-	case PCI_DEVICE_ID_INTEL_PCH_1:
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
 		return 1;
 	}
+
+	if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && 
+		(device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
+		r->name = "PIIX/ICH";
+		r->get = pirq_piix_get;
+		r->set = pirq_piix_set;
+		return 1;
+	}
+
 	return 0;
 }
 
@@ -823,7 +830,7 @@ static void __init pirq_find_router(struct irq_router *r)
 	r->get = NULL;
 	r->set = NULL;
 
-	DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
+	DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
 	    rt->rtr_vendor, rt->rtr_device);
 
 	pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
@@ -843,7 +850,7 @@ static void __init pirq_find_router(struct irq_router *r)
 			h->probe(r, pirq_router_dev, pirq_router_dev->device))
 			break;
 	}
-	dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
+	dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
 		 pirq_router.name,
 		 pirq_router_dev->vendor, pirq_router_dev->device);
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0013a729b41..b61534c7a4c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -871,6 +871,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
 			/* make sure there are no stray mappings of
 			   this page */
 			kmap_flush_unused();
+			vm_unmap_aliases();
 	}
 }
 
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 28b85ab8422..bb042608c60 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void)
 
 static void __init __xen_init_IRQ(void)
 {
-#ifdef CONFIG_X86_64
 	int i;
 
 	/* Create identity vector->irq map */
@@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void)
 		for_each_possible_cpu(cpu)
 			per_cpu(vector_irq, cpu)[i] = i;
 	}
-#endif	/* CONFIG_X86_64 */
 
 	xen_init_IRQ();
 }
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ae173f6edd8..d4d52f5a1cf 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -846,6 +846,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 		/* re-enable interrupts for kmap_flush_unused */
 		xen_mc_issue(0);
 		kmap_flush_unused();
+		vm_unmap_aliases();
 		xen_mc_batch();
 	}
 
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index dd71e3a021c..5601506f2dd 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 
 out:
 	raw_local_irq_restore(flags);
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 02e417d3d8e..a213260b51e 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -55,6 +55,7 @@ config HZ
 	default 100
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index e8362c1efa3..dcbf1be149f 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -115,34 +115,32 @@ EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
  *	(start) dependent operations on their target channel
  * @tx: transaction with dependencies
  */
-void
-async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
+void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
 {
-	struct dma_async_tx_descriptor *next = tx->next;
+	struct dma_async_tx_descriptor *dep = tx->next;
+	struct dma_async_tx_descriptor *dep_next;
 	struct dma_chan *chan;
 
-	if (!next)
+	if (!dep)
 		return;
 
-	tx->next = NULL;
-	chan = next->chan;
+	chan = dep->chan;
 
 	/* keep submitting up until a channel switch is detected
 	 * in that case we will be called again as a result of
 	 * processing the interrupt from async_tx_channel_switch
 	 */
-	while (next && next->chan == chan) {
-		struct dma_async_tx_descriptor *_next;
-
-		spin_lock_bh(&next->lock);
-		next->parent = NULL;
-		_next = next->next;
-		if (_next && _next->chan == chan)
-			next->next = NULL;
-		spin_unlock_bh(&next->lock);
-
-		next->tx_submit(next);
-		next = _next;
+	for (; dep; dep = dep_next) {
+		spin_lock_bh(&dep->lock);
+		dep->parent = NULL;
+		dep_next = dep->next;
+		if (dep_next && dep_next->chan == chan)
+			dep->next = NULL; /* ->next will be submitted */
+		else
+			dep_next = NULL; /* submit current dep and terminate */
+		spin_unlock_bh(&dep->lock);
+
+		dep->tx_submit(dep);
 	}
 
 	chan->device->device_issue_pending(chan);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index b1c723f9f58..70f7f60929c 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -431,7 +431,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
 }
 
 static struct device_attribute alarm_attr = {
-	.attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+	.attr = {.name = "alarm", .mode = 0644},
 	.show = acpi_battery_alarm_show,
 	.store = acpi_battery_alarm_store,
 };
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 10a36512647..7b011e7e29f 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -463,7 +463,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
 }
 
 static struct device_attribute alarm_attr = {
-	.attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+	.attr = {.name = "alarm", .mode = 0644},
 	.show = acpi_battery_alarm_show,
 	.store = acpi_battery_alarm_store,
 };
diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c
index bf5b04de02d..631ee2ee2ca 100644
--- a/drivers/acpi/sleep/proc.c
+++ b/drivers/acpi/sleep/proc.c
@@ -120,13 +120,13 @@ static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset)
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hr);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mo);
-		BCD_TO_BIN(yr);
-		BCD_TO_BIN(cent);
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hr = bcd2bin(hr);
+		day = bcd2bin(day);
+		mo = bcd2bin(mo);
+		yr = bcd2bin(yr);
+		cent = bcd2bin(cent);
 	}
 
 	/* we're trusting the FADT (see above) */
@@ -204,7 +204,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
 {
 	u32 val = CMOS_READ(offset);
 	if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(val);
+		val = bcd2bin(val);
 	return val;
 }
 
@@ -212,7 +212,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
 static void cmos_bcd_write(u32 val, int offset, int rtc_control)
 {
 	if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BIN_TO_BCD(val);
+		val = bin2bcd(val);
 	CMOS_WRITE(val, offset);
 }
 
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 91dec448b3e..24e80fd927e 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -115,7 +115,6 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
 	table_attr->attr.read = acpi_table_show;
 	table_attr->attr.attr.name = table_attr->name;
 	table_attr->attr.attr.mode = 0444;
-	table_attr->attr.attr.owner = THIS_MODULE;
 
 	return;
 }
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 1ee9499bd34..bbb3cae5749 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5373,6 +5373,8 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 
 #ifdef CONFIG_ATA_SFF
 	INIT_DELAYED_WORK(&ap->port_task, ata_pio_task);
+#else
+	INIT_DELAYED_WORK(&ap->port_task, NULL);
 #endif
 	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
 	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index a93247cc395..5d687d7cffa 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1206,7 +1206,10 @@ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
 
 	ata_eh_clear_action(link, dev, ehi, action);
 
-	if (!(ehc->i.flags & ATA_EHI_QUIET))
+	/* About to take EH action, set RECOVERED.  Ignore actions on
+	 * slave links as master will do them again.
+	 */
+	if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link)
 		ap->pflags |= ATA_PFLAG_RECOVERED;
 
 	spin_unlock_irqrestore(ap->lock, flags);
@@ -2010,8 +2013,13 @@ void ata_eh_autopsy(struct ata_port *ap)
 		struct ata_eh_context *mehc = &ap->link.eh_context;
 		struct ata_eh_context *sehc = &ap->slave_link->eh_context;
 
+		/* transfer control flags from master to slave */
+		sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK;
+
+		/* perform autopsy on the slave link */
 		ata_eh_link_autopsy(ap->slave_link);
 
+		/* transfer actions from slave to master and clear slave */
 		ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
 		mehc->i.action		|= sehc->i.action;
 		mehc->i.dev_action[1]	|= sehc->i.dev_action[1];
@@ -2447,14 +2455,14 @@ int ata_eh_reset(struct ata_link *link, int classify,
 		dev->pio_mode = XFER_PIO_0;
 		dev->flags &= ~ATA_DFLAG_SLEEPING;
 
-		if (ata_phys_link_offline(ata_dev_phys_link(dev)))
-			continue;
-
-		/* apply class override */
-		if (lflags & ATA_LFLAG_ASSUME_ATA)
-			classes[dev->devno] = ATA_DEV_ATA;
-		else if (lflags & ATA_LFLAG_ASSUME_SEMB)
-			classes[dev->devno] = ATA_DEV_SEMB_UNSUP; /* not yet */
+		if (!ata_phys_link_offline(ata_dev_phys_link(dev))) {
+			/* apply class override */
+			if (lflags & ATA_LFLAG_ASSUME_ATA)
+				classes[dev->devno] = ATA_DEV_ATA;
+			else if (lflags & ATA_LFLAG_ASSUME_SEMB)
+				classes[dev->devno] = ATA_DEV_SEMB_UNSUP;
+		} else
+			classes[dev->devno] = ATA_DEV_NONE;
 	}
 
 	/* record current link speed */
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 2a4c516894f..4b473948632 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2153,8 +2153,17 @@ void ata_sff_error_handler(struct ata_port *ap)
  */
 void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc)
 {
-	if (qc->ap->ioaddr.bmdma_addr)
+	struct ata_port *ap = qc->ap;
+	unsigned long flags;
+
+	spin_lock_irqsave(ap->lock, flags);
+
+	ap->hsm_task_state = HSM_ST_IDLE;
+
+	if (ap->ioaddr.bmdma_addr)
 		ata_bmdma_stop(qc);
+
+	spin_unlock_irqrestore(ap->lock, flags);
 }
 
 /**
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 1cfa74535d9..5b72e734300 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -70,6 +70,7 @@ enum {
 static int svia_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static int svia_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
 static int svia_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
+static void svia_tf_load(struct ata_port *ap, const struct ata_taskfile *tf);
 static void svia_noop_freeze(struct ata_port *ap);
 static int vt6420_prereset(struct ata_link *link, unsigned long deadline);
 static int vt6421_pata_cable_detect(struct ata_port *ap);
@@ -103,21 +104,26 @@ static struct scsi_host_template svia_sht = {
 	ATA_BMDMA_SHT(DRV_NAME),
 };
 
-static struct ata_port_operations vt6420_sata_ops = {
+static struct ata_port_operations svia_base_ops = {
 	.inherits		= &ata_bmdma_port_ops,
+	.sff_tf_load		= svia_tf_load,
+};
+
+static struct ata_port_operations vt6420_sata_ops = {
+	.inherits		= &svia_base_ops,
 	.freeze			= svia_noop_freeze,
 	.prereset		= vt6420_prereset,
 };
 
 static struct ata_port_operations vt6421_pata_ops = {
-	.inherits		= &ata_bmdma_port_ops,
+	.inherits		= &svia_base_ops,
 	.cable_detect		= vt6421_pata_cable_detect,
 	.set_piomode		= vt6421_set_pio_mode,
 	.set_dmamode		= vt6421_set_dma_mode,
 };
 
 static struct ata_port_operations vt6421_sata_ops = {
-	.inherits		= &ata_bmdma_port_ops,
+	.inherits		= &svia_base_ops,
 	.scr_read		= svia_scr_read,
 	.scr_write		= svia_scr_write,
 };
@@ -168,6 +174,29 @@ static int svia_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 	return 0;
 }
 
+/**
+ *	svia_tf_load - send taskfile registers to host controller
+ *	@ap: Port to which output is sent
+ *	@tf: ATA taskfile register set
+ *
+ *	Outputs ATA taskfile to standard ATA host controller.
+ *
+ *	This is to fix the internal bug of via chipsets, which will
+ *	reset the device register after changing the IEN bit on ctl
+ *	register.
+ */
+static void svia_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
+{
+	struct ata_taskfile ttf;
+
+	if (tf->ctl != ap->last_ctl)  {
+		ttf = *tf;
+		ttf.flags |= ATA_TFLAG_DEVICE;
+		tf = &ttf;
+	}
+	ata_sff_tf_load(ap, tf);
+}
+
 static void svia_noop_freeze(struct ata_port *ap)
 {
 	/* Some VIA controllers choke if ATA_NIEN is manipulated in
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index af0d175c025..5260e9e0df4 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -21,6 +21,8 @@
 #include <linux/memory_hotplug.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/stat.h>
+
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
@@ -325,7 +327,7 @@ memory_probe_store(struct class *class, const char *buf, size_t count)
 
 	return count;
 }
-static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
+static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
 
 static int memory_probe_init(void)
 {
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5116b78c632..f5207090885 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -13,6 +13,7 @@
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
+#include <linux/swap.h>
 
 static struct sysdev_class node_class = {
 	.name = "node",
@@ -61,34 +62,52 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 	si_meminfo_node(&i, nid);
 
 	n = sprintf(buf, "\n"
-		       "Node %d MemTotal:     %8lu kB\n"
-		       "Node %d MemFree:      %8lu kB\n"
-		       "Node %d MemUsed:      %8lu kB\n"
-		       "Node %d Active:       %8lu kB\n"
-		       "Node %d Inactive:     %8lu kB\n"
+		       "Node %d MemTotal:       %8lu kB\n"
+		       "Node %d MemFree:        %8lu kB\n"
+		       "Node %d MemUsed:        %8lu kB\n"
+		       "Node %d Active:         %8lu kB\n"
+		       "Node %d Inactive:       %8lu kB\n"
+		       "Node %d Active(anon):   %8lu kB\n"
+		       "Node %d Inactive(anon): %8lu kB\n"
+		       "Node %d Active(file):   %8lu kB\n"
+		       "Node %d Inactive(file): %8lu kB\n"
+#ifdef CONFIG_UNEVICTABLE_LRU
+		       "Node %d Unevictable:    %8lu kB\n"
+		       "Node %d Mlocked:        %8lu kB\n"
+#endif
 #ifdef CONFIG_HIGHMEM
-		       "Node %d HighTotal:    %8lu kB\n"
-		       "Node %d HighFree:     %8lu kB\n"
-		       "Node %d LowTotal:     %8lu kB\n"
-		       "Node %d LowFree:      %8lu kB\n"
+		       "Node %d HighTotal:      %8lu kB\n"
+		       "Node %d HighFree:       %8lu kB\n"
+		       "Node %d LowTotal:       %8lu kB\n"
+		       "Node %d LowFree:        %8lu kB\n"
 #endif
-		       "Node %d Dirty:        %8lu kB\n"
-		       "Node %d Writeback:    %8lu kB\n"
-		       "Node %d FilePages:    %8lu kB\n"
-		       "Node %d Mapped:       %8lu kB\n"
-		       "Node %d AnonPages:    %8lu kB\n"
-		       "Node %d PageTables:   %8lu kB\n"
-		       "Node %d NFS_Unstable: %8lu kB\n"
-		       "Node %d Bounce:       %8lu kB\n"
-		       "Node %d WritebackTmp: %8lu kB\n"
-		       "Node %d Slab:         %8lu kB\n"
-		       "Node %d SReclaimable: %8lu kB\n"
-		       "Node %d SUnreclaim:   %8lu kB\n",
+		       "Node %d Dirty:          %8lu kB\n"
+		       "Node %d Writeback:      %8lu kB\n"
+		       "Node %d FilePages:      %8lu kB\n"
+		       "Node %d Mapped:         %8lu kB\n"
+		       "Node %d AnonPages:      %8lu kB\n"
+		       "Node %d PageTables:     %8lu kB\n"
+		       "Node %d NFS_Unstable:   %8lu kB\n"
+		       "Node %d Bounce:         %8lu kB\n"
+		       "Node %d WritebackTmp:   %8lu kB\n"
+		       "Node %d Slab:           %8lu kB\n"
+		       "Node %d SReclaimable:   %8lu kB\n"
+		       "Node %d SUnreclaim:     %8lu kB\n",
 		       nid, K(i.totalram),
 		       nid, K(i.freeram),
 		       nid, K(i.totalram - i.freeram),
-		       nid, K(node_page_state(nid, NR_ACTIVE)),
-		       nid, K(node_page_state(nid, NR_INACTIVE)),
+		       nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
+				node_page_state(nid, NR_ACTIVE_FILE)),
+		       nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
+				node_page_state(nid, NR_INACTIVE_FILE)),
+		       nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
+		       nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
+		       nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
+		       nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+		       nid, K(node_page_state(nid, NR_UNEVICTABLE)),
+		       nid, K(node_page_state(nid, NR_MLOCK)),
+#endif
 #ifdef CONFIG_HIGHMEM
 		       nid, K(i.totalhigh),
 		       nid, K(i.freehigh),
@@ -173,6 +192,8 @@ int register_node(struct node *node, int num, struct node *parent)
 		sysdev_create_file(&node->sysdev, &attr_meminfo);
 		sysdev_create_file(&node->sysdev, &attr_numastat);
 		sysdev_create_file(&node->sysdev, &attr_distance);
+
+		scan_unevictable_register_node(node);
 	}
 	return error;
 }
@@ -192,6 +213,8 @@ void unregister_node(struct node *node)
 	sysdev_remove_file(&node->sysdev, &attr_numastat);
 	sysdev_remove_file(&node->sysdev, &attr_distance);
 
+	scan_unevictable_unregister_node(node);
+
 	sysdev_unregister(&node->sysdev);
 }
 
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b82654e883a..d876ad86123 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -90,7 +90,7 @@ static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
 static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
 static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
 static struct device_attribute dev_attr_firmware_version = {
-	.attr = { .name = "firmware-version", .mode = S_IRUGO, .owner = THIS_MODULE },
+	.attr = { .name = "firmware-version", .mode = S_IRUGO },
 	.show = aoedisk_show_fwver,
 };
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7b3351260d5..9034ca585af 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -391,7 +391,7 @@ static ssize_t pid_show(struct device *dev,
 }
 
 static struct device_attribute pid_attr = {
-	.attr = { .name = "pid", .mode = S_IRUGO, .owner = THIS_MODULE },
+	.attr = { .name = "pid", .mode = S_IRUGO},
 	.show = pid_show,
 };
 
diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c
index 31dcd9142d5..dc8d1a90971 100644
--- a/drivers/char/agp/ali-agp.c
+++ b/drivers/char/agp/ali-agp.c
@@ -417,6 +417,6 @@ static void __exit agp_ali_cleanup(void)
 module_init(agp_ali_init);
 module_exit(agp_ali_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
 
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 2812ee2b165..52f4361eb6e 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -772,6 +772,6 @@ module_init(agp_amd64_init);
 module_exit(agp_amd64_cleanup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>, Andi Kleen");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>, Andi Kleen");
 module_param(agp_try_unsupported, bool, 0);
 MODULE_LICENSE("GPL");
diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index ae2791b926b..f1537eece07 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c
@@ -561,6 +561,6 @@ static void __exit agp_ati_cleanup(void)
 module_init(agp_ati_init);
 module_exit(agp_ati_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
 
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 3a3cc03d401..8c617ad7497 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -349,7 +349,7 @@ static __init int agp_setup(char *s)
 __setup("agp=", agp_setup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION("AGP GART driver");
 MODULE_LICENSE("GPL and additional rights");
 MODULE_ALIAS_MISCDEV(AGPGART_MINOR);
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 1108665913e..9cf6e9bb017 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -2390,5 +2390,5 @@ static void __exit agp_intel_cleanup(void)
 module_init(agp_intel_init);
 module_exit(agp_intel_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index 5bbed3d79db..16acee2de11 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -1,7 +1,7 @@
 /*
  * Nvidia AGPGART routines.
  * Based upon a 2.4 agpgart diff by the folks from NVIDIA, and hacked up
- * to work in 2.5 by Dave Jones <davej@codemonkey.org.uk>
+ * to work in 2.5 by Dave Jones <davej@redhat.com>
  */
 
 #include <linux/module.h>
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index f2492ecf082..db60539bf67 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -20,8 +20,8 @@
 #include <linux/agp_backend.h>
 #include <linux/log2.h>
 
-#include <asm-parisc/parisc-device.h>
-#include <asm-parisc/ropes.h>
+#include <asm/parisc-device.h>
+#include <asm/ropes.h>
 
 #include "agp.h"
 
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index 9f4d49e1b59..d3bd243867f 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -595,4 +595,4 @@ module_init(agp_via_init);
 module_exit(agp_via_cleanup);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c
index 5329d482b58..0a826d7be10 100644
--- a/drivers/char/ds1286.c
+++ b/drivers/char/ds1286.c
@@ -210,8 +210,8 @@ static int ds1286_ioctl(struct inode *inode, struct file *file,
 		if (sec != 0)
 			return -EINVAL;
 
-		min = BIN2BCD(min);
-		min = BIN2BCD(hrs);
+		min = bin2bcd(min);
+		min = bin2bcd(hrs);
 
 		spin_lock(&ds1286_lock);
 		rtc_write(hrs, RTC_HOURS_ALARM);
@@ -353,7 +353,7 @@ static int ds1286_proc_output(char *buf)
 
 	ds1286_get_time(&tm);
 	hundredth = rtc_read(RTC_HUNDREDTH_SECOND);
-	BCD_TO_BIN(hundredth);
+	hundredth = bcd2bin(hundredth);
 
 	p += sprintf(p,
 	             "rtc_time\t: %02d:%02d:%02d.%02d\n"
@@ -477,12 +477,12 @@ static void ds1286_get_time(struct rtc_time *rtc_tm)
 	rtc_write(save_control, RTC_CMD);
 	spin_unlock_irqrestore(&ds1286_lock, flags);
 
-	BCD_TO_BIN(rtc_tm->tm_sec);
-	BCD_TO_BIN(rtc_tm->tm_min);
-	BCD_TO_BIN(rtc_tm->tm_hour);
-	BCD_TO_BIN(rtc_tm->tm_mday);
-	BCD_TO_BIN(rtc_tm->tm_mon);
-	BCD_TO_BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	/*
 	 * Account for differences between how the RTC uses the values
@@ -531,12 +531,12 @@ static int ds1286_set_time(struct rtc_time *rtc_tm)
 	if (yrs >= 100)
 		yrs -= 100;
 
-	BIN_TO_BCD(sec);
-	BIN_TO_BCD(min);
-	BIN_TO_BCD(hrs);
-	BIN_TO_BCD(day);
-	BIN_TO_BCD(mon);
-	BIN_TO_BCD(yrs);
+	sec = bin2bcd(sec);
+	min = bin2bcd(min);
+	hrs = bin2bcd(hrs);
+	day = bin2bcd(day);
+	mon = bin2bcd(mon);
+	yrs = bin2bcd(yrs);
 
 	spin_lock_irqsave(&ds1286_lock, flags);
 	save_control = rtc_read(RTC_CMD);
@@ -572,8 +572,8 @@ static void ds1286_get_alm_time(struct rtc_time *alm_tm)
 	cmd = rtc_read(RTC_CMD);
 	spin_unlock_irqrestore(&ds1286_lock, flags);
 
-	BCD_TO_BIN(alm_tm->tm_min);
-	BCD_TO_BIN(alm_tm->tm_hour);
+	alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
+	alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
 	alm_tm->tm_sec = 0;
 }
 
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index c5e67a62395..170693c93c7 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -131,12 +131,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
 
 	local_irq_restore(flags);
 
-	BCD_TO_BIN(rtc_tm->tm_sec);
-	BCD_TO_BIN(rtc_tm->tm_min);
-	BCD_TO_BIN(rtc_tm->tm_hour);
-	BCD_TO_BIN(rtc_tm->tm_mday);
-	BCD_TO_BIN(rtc_tm->tm_mon);
-	BCD_TO_BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	/*
 	 * Account for differences between how the RTC uses the values
@@ -211,12 +211,12 @@ static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			else
 				yrs -= 1900;	/* RTC (70, 71, ... 99) */
 
-			BIN_TO_BCD(sec);
-			BIN_TO_BCD(min);
-			BIN_TO_BCD(hrs);
-			BIN_TO_BCD(day);
-			BIN_TO_BCD(mon);
-			BIN_TO_BCD(yrs);
+			sec = bin2bcd(sec);
+			min = bin2bcd(min);
+			hrs = bin2bcd(hrs);
+			day = bin2bcd(day);
+			mon = bin2bcd(mon);
+			yrs = bin2bcd(yrs);
 
 			lock_kernel();
 			local_irq_save(flags);
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 4998b2761e8..cf2461d34e5 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2477,7 +2477,11 @@ static int pc_send_break(struct tty_struct *tty, int msec)
 	unsigned long flags;
 
 	if (msec == -1)
-		return -EOPNOTSUPP;
+		msec = 0xFFFF;
+	else if (msec > 0xFFFE)
+		msec = 0xFFFE;
+	else if (msec < 1)
+		msec = 1;
 
 	spin_lock_irqsave(&epca_lock, flags);
 	globalwinon(ch);
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index f3cfb4c7612..408f5f92cb4 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -219,7 +219,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
 	for (irq = find_first_bit(&v, HPET_MAX_IRQ); irq < HPET_MAX_IRQ;
 		irq = find_next_bit(&v, HPET_MAX_IRQ, 1 + irq)) {
 
-		if (irq >= NR_IRQS) {
+		if (irq >= nr_irqs) {
 			irq = HPET_MAX_IRQ;
 			break;
 		}
diff --git a/drivers/char/ip27-rtc.c b/drivers/char/ip27-rtc.c
index ec9d0443d92..2abd881b4cb 100644
--- a/drivers/char/ip27-rtc.c
+++ b/drivers/char/ip27-rtc.c
@@ -130,12 +130,12 @@ static long rtc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (yrs >= 100)
 			yrs -= 100;
 
-		sec = BIN2BCD(sec);
-		min = BIN2BCD(min);
-		hrs = BIN2BCD(hrs);
-		day = BIN2BCD(day);
-		mon = BIN2BCD(mon);
-		yrs = BIN2BCD(yrs);
+		sec = bin2bcd(sec);
+		min = bin2bcd(min);
+		hrs = bin2bcd(hrs);
+		day = bin2bcd(day);
+		mon = bin2bcd(mon);
+		yrs = bin2bcd(yrs);
 
 		spin_lock_irq(&rtc_lock);
 		rtc->control |= M48T35_RTC_SET;
@@ -311,12 +311,12 @@ static void get_rtc_time(struct rtc_time *rtc_tm)
 	rtc->control &= ~M48T35_RTC_READ;
 	spin_unlock_irq(&rtc_lock);
 
-	rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec);
-	rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min);
-	rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour);
-	rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday);
-	rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon);
-	rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	/*
 	 * Account for differences between how the RTC uses the values
diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index b930de50407..3f7da8cf3a8 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -41,7 +41,8 @@ static u8 pc8736x_gpio_shadow[4];
 #define SIO_BASE2       0x4E	/* alt command-reg to check */
 
 #define SIO_SID		0x20	/* SuperI/O ID Register */
-#define SIO_SID_VALUE	0xe9	/* Expected value in SuperI/O ID Register */
+#define SIO_SID_PC87365	0xe5	/* Expected value in ID Register for PC87365 */
+#define SIO_SID_PC87366	0xe9	/* Expected value in ID Register for PC87366 */
 
 #define SIO_CF1		0x21	/* chip config, bit0 is chip enable */
 
@@ -91,13 +92,17 @@ static inline int superio_inb(int addr)
 
 static int pc8736x_superio_present(void)
 {
+	int id;
+
 	/* try the 2 possible values, read a hardware reg to verify */
 	superio_cmd = SIO_BASE1;
-	if (superio_inb(SIO_SID) == SIO_SID_VALUE)
+	id = superio_inb(SIO_SID);
+	if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
 		return superio_cmd;
 
 	superio_cmd = SIO_BASE2;
-	if (superio_inb(SIO_SID) == SIO_SID_VALUE)
+	id = superio_inb(SIO_SID);
+	if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
 		return superio_cmd;
 
 	return 0;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index c8752eaad48..705a839f179 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,9 +558,26 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-static struct timer_rand_state input_timer_state;
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
 
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	if (irq >= nr_irqs)
+		return NULL;
+
+	return irq_timer_state[irq];
+}
+
+static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	if (irq >= nr_irqs)
+		return;
+
+	irq_timer_state[irq] = state;
+}
+
+static struct timer_rand_state input_timer_state;
+
 /*
  * This function adds entropy to the entropy "pool" by using timing
  * delays.  It uses the timer_rand_state structure to make an estimate
@@ -648,11 +665,15 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
 
 void add_interrupt_randomness(int irq)
 {
-	if (irq >= NR_IRQS || irq_timer_state[irq] == NULL)
+	struct timer_rand_state *state;
+
+	state = get_timer_rand_state(irq);
+
+	if (state == NULL)
 		return;
 
 	DEBUG_ENT("irq event %d\n", irq);
-	add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
+	add_timer_randomness(state, 0x100 + irq);
 }
 
 #ifdef CONFIG_BLOCK
@@ -912,7 +933,12 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-	if (irq >= NR_IRQS || irq_timer_state[irq])
+	if (irq >= nr_irqs)
+		return;
+
+	state = get_timer_rand_state(irq);
+
+	if (state)
 		return;
 
 	/*
@@ -921,7 +947,7 @@ void rand_initialize_irq(int irq)
 	 */
 	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
 	if (state)
-		irq_timer_state[irq] = state;
+		set_timer_rand_state(irq, state);
 }
 
 #ifdef CONFIG_BLOCK
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 17683de9571..32dc89720d5 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -518,17 +518,17 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
 							RTC_ALWAYS_BCD) {
 			if (sec < 60)
-				BIN_TO_BCD(sec);
+				sec = bin2bcd(sec);
 			else
 				sec = 0xff;
 
 			if (min < 60)
-				BIN_TO_BCD(min);
+				min = bin2bcd(min);
 			else
 				min = 0xff;
 
 			if (hrs < 24)
-				BIN_TO_BCD(hrs);
+				hrs = bin2bcd(hrs);
 			else
 				hrs = 0xff;
 		}
@@ -614,12 +614,12 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
 		    || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(sec);
-			BIN_TO_BCD(min);
-			BIN_TO_BCD(hrs);
-			BIN_TO_BCD(day);
-			BIN_TO_BCD(mon);
-			BIN_TO_BCD(yrs);
+			sec = bin2bcd(sec);
+			min = bin2bcd(min);
+			hrs = bin2bcd(hrs);
+			day = bin2bcd(day);
+			mon = bin2bcd(mon);
+			yrs = bin2bcd(yrs);
 		}
 
 		save_control = CMOS_READ(RTC_CONTROL);
@@ -1099,7 +1099,7 @@ no_irq:
 	spin_unlock_irq(&rtc_lock);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(year);       /* This should never happen... */
+		year = bcd2bin(year);       /* This should never happen... */
 
 	if (year < 20) {
 		epoch = 2000;
@@ -1352,13 +1352,13 @@ static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(rtc_tm->tm_sec);
-		BCD_TO_BIN(rtc_tm->tm_min);
-		BCD_TO_BIN(rtc_tm->tm_hour);
-		BCD_TO_BIN(rtc_tm->tm_mday);
-		BCD_TO_BIN(rtc_tm->tm_mon);
-		BCD_TO_BIN(rtc_tm->tm_year);
-		BCD_TO_BIN(rtc_tm->tm_wday);
+		rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+		rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+		rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+		rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+		rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+		rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
+		rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
 	}
 
 #ifdef CONFIG_MACH_DECSTATION
@@ -1392,9 +1392,9 @@ static void get_rtc_alm_time(struct rtc_time *alm_tm)
 	spin_unlock_irq(&rtc_lock);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(alm_tm->tm_sec);
-		BCD_TO_BIN(alm_tm->tm_min);
-		BCD_TO_BIN(alm_tm->tm_hour);
+		alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
+		alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
+		alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
 	}
 }
 
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 5b8d7a1aa3e..ba4e86281fb 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -2504,7 +2504,7 @@ static void __devexit sx_remove_card(struct sx_board *board,
 		del_timer(&board->timer);
 		if (pdev) {
 #ifdef CONFIG_PCI
-			pci_iounmap(pdev, board->base2);
+			iounmap(board->base2);
 			pci_release_region(pdev, IS_CF_BOARD(board) ? 3 : 2);
 #endif
 		} else {
@@ -2677,7 +2677,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
 	}
 	board->hw_base = pci_resource_start(pdev, reg);
 	board->base2 =
-	board->base = pci_iomap(pdev, reg, WINDOW_LEN(board));
+	board->base = ioremap_nocache(board->hw_base, WINDOW_LEN(board));
 	if (!board->base) {
 		dev_err(&pdev->dev, "ioremap failed\n");
 		goto err_reg;
@@ -2703,7 +2703,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
 
 	return 0;
 err_unmap:
-	pci_iounmap(pdev, board->base2);
+	iounmap(board->base2);
 err_reg:
 	pci_release_region(pdev, reg);
 err_flag:
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index dce4cc0e695..ce0d9da52a8 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
 static struct sysrq_key_op sysrq_show_timers_op = {
 	.handler	= sysrq_handle_show_timers,
 	.help_msg	= "show-all-timers(Q)",
-	.action_msg	= "Show Pending Timers",
+	.action_msg	= "Show clockevent devices & pending hrtimers (no others)",
 };
 
 static void sysrq_handle_mountro(int key, struct tty_struct *tty)
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index e70d13defde..9c47dc48c9f 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -1157,7 +1157,7 @@ EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
  * Once all references to platform device are down to 0,
  * release all allocated structures.
  */
-static void tpm_dev_release(struct device *dev)
+void tpm_dev_release(struct device *dev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(dev);
 
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index ffe9b4e3072..54c837288d1 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -641,7 +641,7 @@ static int __devinit giu_probe(struct platform_device *dev)
 	}
 
 	irq = platform_get_irq(dev, 0);
-	if (irq < 0 || irq >= NR_IRQS)
+	if (irq < 0 || irq >= nr_irqs)
 		return -EBUSY;
 
 	return cascade_irq(irq, giu_get_irq);
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 71d2ac4e3f4..c20171078d1 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -237,9 +237,12 @@ static int __init parse_pmtmr(char *arg)
 
 	if (strict_strtoul(arg, 16, &base))
 		return -EINVAL;
-
+#ifdef CONFIG_X86_64
+	if (base > UINT_MAX)
+		return -ERANGE;
+#endif
 	printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n",
-	       (unsigned int)pmtmr_ioport, base);
+	       pmtmr_ioport, base);
 	pmtmr_ioport = base;
 
 	return 1;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index cd303901eb5..904e57558bb 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -48,13 +48,13 @@ config DW_DMAC
 	  can be integrated in chips such as the Atmel AT32ap7000.
 
 config FSL_DMA
-	bool "Freescale MPC85xx/MPC83xx DMA support"
-	depends on PPC
+	tristate "Freescale Elo and Elo Plus DMA support"
+	depends on FSL_SOC
 	select DMA_ENGINE
 	---help---
-	  Enable support for the Freescale DMA engine. Now, it support
-	  MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
-	  The MPC8349, MPC8360 is also supported.
+	  Enable support for the Freescale Elo and Elo Plus DMA controllers.
+	  The Elo is the DMA controller on some 82xx and 83xx parts, and the
+	  Elo Plus is the DMA controller on 85xx and 86xx parts.
 
 config MV_XOR
 	bool "Marvell XOR engine support"
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index a08d1970474..d1e381e35a9 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -325,7 +325,12 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
 	struct dmatest_thread	*thread;
 	unsigned int		i;
 
-	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC);
+	/* Have we already been told about this channel? */
+	list_for_each_entry(dtc, &dmatest_channels, node)
+		if (dtc->chan == chan)
+			return DMA_DUP;
+
+	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
 	if (!dtc) {
 		pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
 		return DMA_NAK;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index c0059ca5834..0b95dcce447 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -370,7 +370,10 @@ static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
 					struct dma_client *client)
 {
 	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
-	LIST_HEAD(tmp_list);
+
+	/* Has this channel already been allocated? */
+	if (fsl_chan->desc_pool)
+		return 1;
 
 	/* We need the descriptor to be aligned to 32bytes
 	 * for meeting FSL DMA specification requirement.
@@ -410,6 +413,8 @@ static void fsl_dma_free_chan_resources(struct dma_chan *chan)
 	}
 	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
 	dma_pool_destroy(fsl_chan->desc_pool);
+
+	fsl_chan->desc_pool = NULL;
 }
 
 static struct dma_async_tx_descriptor *
@@ -786,159 +791,29 @@ static void dma_do_tasklet(unsigned long data)
 	fsl_chan_ld_cleanup(fsl_chan);
 }
 
-static void fsl_dma_callback_test(void *param)
-{
-	struct fsl_dma_chan *fsl_chan = param;
-	if (fsl_chan)
-		dev_dbg(fsl_chan->dev, "selftest: callback is ok!\n");
-}
-
-static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
-{
-	struct dma_chan *chan;
-	int err = 0;
-	dma_addr_t dma_dest, dma_src;
-	dma_cookie_t cookie;
-	u8 *src, *dest;
-	int i;
-	size_t test_size;
-	struct dma_async_tx_descriptor *tx1, *tx2, *tx3;
-
-	test_size = 4096;
-
-	src = kmalloc(test_size * 2, GFP_KERNEL);
-	if (!src) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc memory for test!\n");
-		return -ENOMEM;
-	}
-
-	dest = src + test_size;
-
-	for (i = 0; i < test_size; i++)
-		src[i] = (u8) i;
-
-	chan = &fsl_chan->common;
-
-	if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc resources for DMA\n");
-		err = -ENODEV;
-		goto out;
-	}
-
-	/* TX 1 */
-	dma_src = dma_map_single(fsl_chan->dev, src, test_size / 2,
-				 DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest, test_size / 2,
-				  DMA_FROM_DEVICE);
-	tx1 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 2, 0);
-	async_tx_ack(tx1);
-
-	cookie = fsl_dma_tx_submit(tx1);
-	fsl_dma_memcpy_issue_pending(chan);
-	msleep(2);
-
-	if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-		dev_err(fsl_chan->dev, "selftest: Time out!\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* Test free and re-alloc channel resources */
-	fsl_dma_free_chan_resources(chan);
-
-	if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc resources for DMA\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* Continue to test
-	 * TX 2
-	 */
-	dma_src = dma_map_single(fsl_chan->dev, src + test_size / 2,
-					test_size / 4, DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest + test_size / 2,
-					test_size / 4, DMA_FROM_DEVICE);
-	tx2 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
-	async_tx_ack(tx2);
-
-	/* TX 3 */
-	dma_src = dma_map_single(fsl_chan->dev, src + test_size * 3 / 4,
-					test_size / 4, DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest + test_size * 3 / 4,
-					test_size / 4, DMA_FROM_DEVICE);
-	tx3 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
-	async_tx_ack(tx3);
-
-	/* Interrupt tx test */
-	tx1 = fsl_dma_prep_interrupt(chan, 0);
-	async_tx_ack(tx1);
-	cookie = fsl_dma_tx_submit(tx1);
-
-	/* Test exchanging the prepared tx sort */
-	cookie = fsl_dma_tx_submit(tx3);
-	cookie = fsl_dma_tx_submit(tx2);
-
-	if (dma_has_cap(DMA_INTERRUPT, ((struct fsl_dma_device *)
-	    dev_get_drvdata(fsl_chan->dev->parent))->common.cap_mask)) {
-		tx3->callback = fsl_dma_callback_test;
-		tx3->callback_param = fsl_chan;
-	}
-	fsl_dma_memcpy_issue_pending(chan);
-	msleep(2);
-
-	if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-		dev_err(fsl_chan->dev, "selftest: Time out!\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	err = memcmp(src, dest, test_size);
-	if (err) {
-		for (i = 0; (*(src + i) == *(dest + i)) && (i < test_size);
-				i++);
-		dev_err(fsl_chan->dev, "selftest: Test failed, data %d/%ld is "
-				"error! src 0x%x, dest 0x%x\n",
-				i, (long)test_size, *(src + i), *(dest + i));
-	}
-
-free_resources:
-	fsl_dma_free_chan_resources(chan);
-out:
-	kfree(src);
-	return err;
-}
-
-static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
-			const struct of_device_id *match)
+static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
+	struct device_node *node, u32 feature, const char *compatible)
 {
-	struct fsl_dma_device *fdev;
 	struct fsl_dma_chan *new_fsl_chan;
 	int err;
 
-	fdev = dev_get_drvdata(dev->dev.parent);
-	BUG_ON(!fdev);
-
 	/* alloc channel */
 	new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
 	if (!new_fsl_chan) {
-		dev_err(&dev->dev, "No free memory for allocating "
+		dev_err(fdev->dev, "No free memory for allocating "
 				"dma channels!\n");
 		return -ENOMEM;
 	}
 
 	/* get dma channel register base */
-	err = of_address_to_resource(dev->node, 0, &new_fsl_chan->reg);
+	err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
 	if (err) {
-		dev_err(&dev->dev, "Can't get %s property 'reg'\n",
-				dev->node->full_name);
+		dev_err(fdev->dev, "Can't get %s property 'reg'\n",
+				node->full_name);
 		goto err_no_reg;
 	}
 
-	new_fsl_chan->feature = *(u32 *)match->data;
+	new_fsl_chan->feature = feature;
 
 	if (!fdev->feature)
 		fdev->feature = new_fsl_chan->feature;
@@ -948,13 +823,13 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 	 */
 	WARN_ON(fdev->feature != new_fsl_chan->feature);
 
-	new_fsl_chan->dev = &dev->dev;
+	new_fsl_chan->dev = &new_fsl_chan->common.dev;
 	new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
 			new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
 
 	new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
 	if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) {
-		dev_err(&dev->dev, "There is no %d channel!\n",
+		dev_err(fdev->dev, "There is no %d channel!\n",
 				new_fsl_chan->id);
 		err = -EINVAL;
 		goto err_no_chan;
@@ -988,29 +863,23 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 			&fdev->common.channels);
 	fdev->common.chancnt++;
 
-	new_fsl_chan->irq = irq_of_parse_and_map(dev->node, 0);
+	new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
 	if (new_fsl_chan->irq != NO_IRQ) {
 		err = request_irq(new_fsl_chan->irq,
 					&fsl_dma_chan_do_interrupt, IRQF_SHARED,
 					"fsldma-channel", new_fsl_chan);
 		if (err) {
-			dev_err(&dev->dev, "DMA channel %s request_irq error "
-				"with return %d\n", dev->node->full_name, err);
+			dev_err(fdev->dev, "DMA channel %s request_irq error "
+				"with return %d\n", node->full_name, err);
 			goto err_no_irq;
 		}
 	}
 
-	err = fsl_dma_self_test(new_fsl_chan);
-	if (err)
-		goto err_self_test;
-
-	dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
-				match->compatible, new_fsl_chan->irq);
+	dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
+				compatible, new_fsl_chan->irq);
 
 	return 0;
 
-err_self_test:
-	free_irq(new_fsl_chan->irq, new_fsl_chan);
 err_no_irq:
 	list_del(&new_fsl_chan->common.device_node);
 err_no_chan:
@@ -1020,38 +889,20 @@ err_no_reg:
 	return err;
 }
 
-const u32 mpc8540_dma_ip_feature = FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN;
-const u32 mpc8349_dma_ip_feature = FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN;
-
-static struct of_device_id of_fsl_dma_chan_ids[] = {
-	{
-		.compatible = "fsl,eloplus-dma-channel",
-		.data = (void *)&mpc8540_dma_ip_feature,
-	},
-	{
-		.compatible = "fsl,elo-dma-channel",
-		.data = (void *)&mpc8349_dma_ip_feature,
-	},
-	{}
-};
-
-static struct of_platform_driver of_fsl_dma_chan_driver = {
-	.name = "of-fsl-dma-channel",
-	.match_table = of_fsl_dma_chan_ids,
-	.probe = of_fsl_dma_chan_probe,
-};
-
-static __init int of_fsl_dma_chan_init(void)
+static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
 {
-	return of_register_platform_driver(&of_fsl_dma_chan_driver);
+	free_irq(fchan->irq, fchan);
+	list_del(&fchan->common.device_node);
+	iounmap(fchan->reg_base);
+	kfree(fchan);
 }
 
 static int __devinit of_fsl_dma_probe(struct of_device *dev,
 			const struct of_device_id *match)
 {
 	int err;
-	unsigned int irq;
 	struct fsl_dma_device *fdev;
+	struct device_node *child;
 
 	fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
 	if (!fdev) {
@@ -1085,9 +936,9 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
 	fdev->common.dev = &dev->dev;
 
-	irq = irq_of_parse_and_map(dev->node, 0);
-	if (irq != NO_IRQ) {
-		err = request_irq(irq, &fsl_dma_do_interrupt, IRQF_SHARED,
+	fdev->irq = irq_of_parse_and_map(dev->node, 0);
+	if (fdev->irq != NO_IRQ) {
+		err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
 					"fsldma-device", fdev);
 		if (err) {
 			dev_err(&dev->dev, "DMA device request_irq error "
@@ -1097,7 +948,21 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 	}
 
 	dev_set_drvdata(&(dev->dev), fdev);
-	of_platform_bus_probe(dev->node, of_fsl_dma_chan_ids, &dev->dev);
+
+	/* We cannot use of_platform_bus_probe() because there is no
+	 * of_platform_bus_remove.  Instead, we manually instantiate every DMA
+	 * channel object.
+	 */
+	for_each_child_of_node(dev->node, child) {
+		if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+				"fsl,eloplus-dma-channel");
+		if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+				"fsl,elo-dma-channel");
+	}
 
 	dma_async_device_register(&fdev->common);
 	return 0;
@@ -1109,6 +974,30 @@ err_no_reg:
 	return err;
 }
 
+static int of_fsl_dma_remove(struct of_device *of_dev)
+{
+	struct fsl_dma_device *fdev;
+	unsigned int i;
+
+	fdev = dev_get_drvdata(&of_dev->dev);
+
+	dma_async_device_unregister(&fdev->common);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
+		if (fdev->chan[i])
+			fsl_dma_chan_remove(fdev->chan[i]);
+
+	if (fdev->irq != NO_IRQ)
+		free_irq(fdev->irq, fdev);
+
+	iounmap(fdev->reg_base);
+
+	kfree(fdev);
+	dev_set_drvdata(&of_dev->dev, NULL);
+
+	return 0;
+}
+
 static struct of_device_id of_fsl_dma_ids[] = {
 	{ .compatible = "fsl,eloplus-dma", },
 	{ .compatible = "fsl,elo-dma", },
@@ -1116,15 +1005,32 @@ static struct of_device_id of_fsl_dma_ids[] = {
 };
 
 static struct of_platform_driver of_fsl_dma_driver = {
-	.name = "of-fsl-dma",
+	.name = "fsl-elo-dma",
 	.match_table = of_fsl_dma_ids,
 	.probe = of_fsl_dma_probe,
+	.remove = of_fsl_dma_remove,
 };
 
 static __init int of_fsl_dma_init(void)
 {
-	return of_register_platform_driver(&of_fsl_dma_driver);
+	int ret;
+
+	pr_info("Freescale Elo / Elo Plus DMA driver\n");
+
+	ret = of_register_platform_driver(&of_fsl_dma_driver);
+	if (ret)
+		pr_err("fsldma: failed to register platform driver\n");
+
+	return ret;
+}
+
+static void __exit of_fsl_dma_exit(void)
+{
+	of_unregister_platform_driver(&of_fsl_dma_driver);
 }
 
-subsys_initcall(of_fsl_dma_chan_init);
 subsys_initcall(of_fsl_dma_init);
+module_exit(of_fsl_dma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 6faf07ba0d0..4f21a512d84 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -114,6 +114,7 @@ struct fsl_dma_device {
 	struct dma_device common;
 	struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
 	u32 feature;		/* The same as DMA channels */
+	int irq;		/* Channel IRQ */
 };
 
 /* Define macros for fsl_dma_chan->feature property */
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index bc8c6e3470c..1ef68b31565 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -971,11 +971,9 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
 	switch (ioat_chan->device->version) {
 	case IOAT_VER_1_2:
 		return ioat1_dma_get_next_descriptor(ioat_chan);
-		break;
 	case IOAT_VER_2_0:
 	case IOAT_VER_3_0:
 		return ioat2_dma_get_next_descriptor(ioat_chan);
-		break;
 	}
 	return NULL;
 }
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 0e024fe2d8c..887072f5dc8 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -142,7 +142,7 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
 		csrow->nr_pages = (r.end - r.start + 1) >> PAGE_SHIFT;
 		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
 		csrow->mtype = MEM_XDR;
-		csrow->edac_mode = EDAC_FLAG_EC | EDAC_FLAG_SECDED;
+		csrow->edac_mode = EDAC_SECDED;
 		dev_dbg(mci->dev,
 			"Initialized on node %d, chanmask=0x%x,"
 			" first_page=0x%lx, nr_pages=0x%x\n",
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index deb154aa47c..4353414a0b7 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -732,7 +732,6 @@ static int __init ibft_create_attribute(struct ibft_kobject *kobj_data,
 
 	attr->attr.name = name;
 	attr->attr.mode = S_IRUSR;
-	attr->attr.owner = THIS_MODULE;
 
 	attr->hdr = hdr;
 	attr->show = show;
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index dbd42d6c93a..7f2ee27fe76 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -127,6 +127,13 @@ config GPIO_PCF857X
 	  This driver provides an in-kernel interface to those GPIOs using
 	  platform-neutral GPIO calls.
 
+config GPIO_TWL4030
+	tristate "TWL4030, TWL5030, and TPS659x0 GPIOs"
+	depends on TWL4030_CORE
+	help
+	  Say yes here to access the GPIO signals of various multi-function
+	  power management chips from Texas Instruments.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_BT8XX
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 01b4bbde195..6aafdeb9ad0 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
+obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
 obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9112830107a..faa1cc66e9c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -248,7 +248,7 @@ static ssize_t gpio_value_show(struct device *dev,
 	if (!test_bit(FLAG_EXPORT, &desc->flags))
 		status = -EIO;
 	else
-		status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+		status = sprintf(buf, "%d\n", !!gpio_get_value_cansleep(gpio));
 
 	mutex_unlock(&sysfs_lock);
 	return status;
@@ -1105,7 +1105,7 @@ int gpio_get_value_cansleep(unsigned gpio)
 
 	might_sleep_if(extra_checks);
 	chip = gpio_to_chip(gpio);
-	return chip->get(chip, gpio - chip->base);
+	return chip->get ? chip->get(chip, gpio - chip->base) : 0;
 }
 EXPORT_SYMBOL_GPL(gpio_get_value_cansleep);
 
@@ -1143,7 +1143,7 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 
 		if (!is_out) {
 			int		irq = gpio_to_irq(gpio);
-			struct irq_desc	*desc = irq_desc + irq;
+			struct irq_desc	*desc = irq_to_desc(irq);
 
 			/* This races with request_irq(), set_irq_type(),
 			 * and set_irq_wake() ... but those are "rare".
diff --git a/drivers/gpio/twl4030-gpio.c b/drivers/gpio/twl4030-gpio.c
new file mode 100644
index 00000000000..37d3eec8730
--- /dev/null
+++ b/drivers/gpio/twl4030-gpio.c
@@ -0,0 +1,521 @@
+/*
+ * twl4030_gpio.c -- access to GPIOs on TWL4030/TPS659x0 chips
+ *
+ * Copyright (C) 2006-2007 Texas Instruments, Inc.
+ * Copyright (C) 2006 MontaVista Software, Inc.
+ *
+ * Code re-arranged and cleaned up by:
+ *	Syed Mohammed Khasim <x0khasim@ti.com>
+ *
+ * Initial Code:
+ *	Andy Lowe / Nishanth Menon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/irq.h>
+#include <linux/gpio.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <linux/i2c/twl4030.h>
+
+
+/*
+ * The GPIO "subchip" supports 18 GPIOs which can be configured as
+ * inputs or outputs, with pullups or pulldowns on each pin.  Each
+ * GPIO can trigger interrupts on either or both edges.
+ *
+ * GPIO interrupts can be fed to either of two IRQ lines; this is
+ * intended to support multiple hosts.
+ *
+ * There are also two LED pins used sometimes as output-only GPIOs.
+ */
+
+
+static struct gpio_chip twl_gpiochip;
+static int twl4030_gpio_irq_base;
+
+/* genirq interfaces are not available to modules */
+#ifdef MODULE
+#define is_module()	true
+#else
+#define is_module()	false
+#endif
+
+/* GPIO_CTRL Fields */
+#define MASK_GPIO_CTRL_GPIO0CD1		BIT(0)
+#define MASK_GPIO_CTRL_GPIO1CD2		BIT(1)
+#define MASK_GPIO_CTRL_GPIO_ON		BIT(2)
+
+/* Mask for GPIO registers when aggregated into a 32-bit integer */
+#define GPIO_32_MASK			0x0003ffff
+
+/* Data structures */
+static DEFINE_MUTEX(gpio_lock);
+
+/* store usage of each GPIO. - each bit represents one GPIO */
+static unsigned int gpio_usage_count;
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * To configure TWL4030 GPIO module registers
+ */
+static inline int gpio_twl4030_write(u8 address, u8 data)
+{
+	return twl4030_i2c_write_u8(TWL4030_MODULE_GPIO, data, address);
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * LED register offsets (use TWL4030_MODULE_{LED,PWMA,PWMB}))
+ * PWMs A and B are dedicated to LEDs A and B, respectively.
+ */
+
+#define TWL4030_LED_LEDEN	0x0
+
+/* LEDEN bits */
+#define LEDEN_LEDAON		BIT(0)
+#define LEDEN_LEDBON		BIT(1)
+#define LEDEN_LEDAEXT		BIT(2)
+#define LEDEN_LEDBEXT		BIT(3)
+#define LEDEN_LEDAPWM		BIT(4)
+#define LEDEN_LEDBPWM		BIT(5)
+#define LEDEN_PWM_LENGTHA	BIT(6)
+#define LEDEN_PWM_LENGTHB	BIT(7)
+
+#define TWL4030_PWMx_PWMxON	0x0
+#define TWL4030_PWMx_PWMxOFF	0x1
+
+#define PWMxON_LENGTH		BIT(7)
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * To read a TWL4030 GPIO module register
+ */
+static inline int gpio_twl4030_read(u8 address)
+{
+	u8 data;
+	int ret = 0;
+
+	ret = twl4030_i2c_read_u8(TWL4030_MODULE_GPIO, &data, address);
+	return (ret < 0) ? ret : data;
+}
+
+/*----------------------------------------------------------------------*/
+
+static u8 cached_leden;		/* protected by gpio_lock */
+
+/* The LED lines are open drain outputs ... a FET pulls to GND, so an
+ * external pullup is needed.  We could also expose the integrated PWM
+ * as a LED brightness control; we initialize it as "always on".
+ */
+static void twl4030_led_set_value(int led, int value)
+{
+	u8 mask = LEDEN_LEDAON | LEDEN_LEDAPWM;
+	int status;
+
+	if (led)
+		mask <<= 1;
+
+	mutex_lock(&gpio_lock);
+	if (value)
+		cached_leden &= ~mask;
+	else
+		cached_leden |= mask;
+	status = twl4030_i2c_write_u8(TWL4030_MODULE_LED, cached_leden,
+			TWL4030_LED_LEDEN);
+	mutex_unlock(&gpio_lock);
+}
+
+static int twl4030_set_gpio_direction(int gpio, int is_input)
+{
+	u8 d_bnk = gpio >> 3;
+	u8 d_msk = BIT(gpio & 0x7);
+	u8 reg = 0;
+	u8 base = REG_GPIODATADIR1 + d_bnk;
+	int ret = 0;
+
+	mutex_lock(&gpio_lock);
+	ret = gpio_twl4030_read(base);
+	if (ret >= 0) {
+		if (is_input)
+			reg = ret & ~d_msk;
+		else
+			reg = ret | d_msk;
+
+		ret = gpio_twl4030_write(base, reg);
+	}
+	mutex_unlock(&gpio_lock);
+	return ret;
+}
+
+static int twl4030_set_gpio_dataout(int gpio, int enable)
+{
+	u8 d_bnk = gpio >> 3;
+	u8 d_msk = BIT(gpio & 0x7);
+	u8 base = 0;
+
+	if (enable)
+		base = REG_SETGPIODATAOUT1 + d_bnk;
+	else
+		base = REG_CLEARGPIODATAOUT1 + d_bnk;
+
+	return gpio_twl4030_write(base, d_msk);
+}
+
+static int twl4030_get_gpio_datain(int gpio)
+{
+	u8 d_bnk = gpio >> 3;
+	u8 d_off = gpio & 0x7;
+	u8 base = 0;
+	int ret = 0;
+
+	if (unlikely((gpio >= TWL4030_GPIO_MAX)
+		|| !(gpio_usage_count & BIT(gpio))))
+		return -EPERM;
+
+	base = REG_GPIODATAIN1 + d_bnk;
+	ret = gpio_twl4030_read(base);
+	if (ret > 0)
+		ret = (ret >> d_off) & 0x1;
+
+	return ret;
+}
+
+/*
+ * Configure debounce timing value for a GPIO pin on TWL4030
+ */
+int twl4030_set_gpio_debounce(int gpio, int enable)
+{
+	u8 d_bnk = gpio >> 3;
+	u8 d_msk = BIT(gpio & 0x7);
+	u8 reg = 0;
+	u8 base = 0;
+	int ret = 0;
+
+	if (unlikely((gpio >= TWL4030_GPIO_MAX)
+		|| !(gpio_usage_count & BIT(gpio))))
+		return -EPERM;
+
+	base = REG_GPIO_DEBEN1 + d_bnk;
+	mutex_lock(&gpio_lock);
+	ret = gpio_twl4030_read(base);
+	if (ret >= 0) {
+		if (enable)
+			reg = ret | d_msk;
+		else
+			reg = ret & ~d_msk;
+
+		ret = gpio_twl4030_write(base, reg);
+	}
+	mutex_unlock(&gpio_lock);
+	return ret;
+}
+EXPORT_SYMBOL(twl4030_set_gpio_debounce);
+
+/*----------------------------------------------------------------------*/
+
+static int twl_request(struct gpio_chip *chip, unsigned offset)
+{
+	int status = 0;
+
+	mutex_lock(&gpio_lock);
+
+	/* Support the two LED outputs as output-only GPIOs. */
+	if (offset >= TWL4030_GPIO_MAX) {
+		u8	ledclr_mask = LEDEN_LEDAON | LEDEN_LEDAEXT
+				| LEDEN_LEDAPWM | LEDEN_PWM_LENGTHA;
+		u8	module = TWL4030_MODULE_PWMA;
+
+		offset -= TWL4030_GPIO_MAX;
+		if (offset) {
+			ledclr_mask <<= 1;
+			module = TWL4030_MODULE_PWMB;
+		}
+
+		/* initialize PWM to always-drive */
+		status = twl4030_i2c_write_u8(module, 0x7f,
+				TWL4030_PWMx_PWMxOFF);
+		if (status < 0)
+			goto done;
+		status = twl4030_i2c_write_u8(module, 0x7f,
+				TWL4030_PWMx_PWMxON);
+		if (status < 0)
+			goto done;
+
+		/* init LED to not-driven (high) */
+		module = TWL4030_MODULE_LED;
+		status = twl4030_i2c_read_u8(module, &cached_leden,
+				TWL4030_LED_LEDEN);
+		if (status < 0)
+			goto done;
+		cached_leden &= ~ledclr_mask;
+		status = twl4030_i2c_write_u8(module, cached_leden,
+				TWL4030_LED_LEDEN);
+		if (status < 0)
+			goto done;
+
+		status = 0;
+		goto done;
+	}
+
+	/* on first use, turn GPIO module "on" */
+	if (!gpio_usage_count) {
+		struct twl4030_gpio_platform_data *pdata;
+		u8 value = MASK_GPIO_CTRL_GPIO_ON;
+
+		/* optionally have the first two GPIOs switch vMMC1
+		 * and vMMC2 power supplies based on card presence.
+		 */
+		pdata = chip->dev->platform_data;
+		value |= pdata->mmc_cd & 0x03;
+
+		status = gpio_twl4030_write(REG_GPIO_CTRL, value);
+	}
+
+	if (!status)
+		gpio_usage_count |= (0x1 << offset);
+
+done:
+	mutex_unlock(&gpio_lock);
+	return status;
+}
+
+static void twl_free(struct gpio_chip *chip, unsigned offset)
+{
+	if (offset >= TWL4030_GPIO_MAX) {
+		twl4030_led_set_value(offset - TWL4030_GPIO_MAX, 1);
+		return;
+	}
+
+	mutex_lock(&gpio_lock);
+
+	gpio_usage_count &= ~BIT(offset);
+
+	/* on last use, switch off GPIO module */
+	if (!gpio_usage_count)
+		gpio_twl4030_write(REG_GPIO_CTRL, 0x0);
+
+	mutex_unlock(&gpio_lock);
+}
+
+static int twl_direction_in(struct gpio_chip *chip, unsigned offset)
+{
+	return (offset < TWL4030_GPIO_MAX)
+		? twl4030_set_gpio_direction(offset, 1)
+		: -EINVAL;
+}
+
+static int twl_get(struct gpio_chip *chip, unsigned offset)
+{
+	int status = 0;
+
+	if (offset < TWL4030_GPIO_MAX)
+		status = twl4030_get_gpio_datain(offset);
+	else if (offset == TWL4030_GPIO_MAX)
+		status = cached_leden & LEDEN_LEDAON;
+	else
+		status = cached_leden & LEDEN_LEDBON;
+	return (status < 0) ? 0 : status;
+}
+
+static int twl_direction_out(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (offset < TWL4030_GPIO_MAX) {
+		twl4030_set_gpio_dataout(offset, value);
+		return twl4030_set_gpio_direction(offset, 0);
+	} else {
+		twl4030_led_set_value(offset - TWL4030_GPIO_MAX, value);
+		return 0;
+	}
+}
+
+static void twl_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (offset < TWL4030_GPIO_MAX)
+		twl4030_set_gpio_dataout(offset, value);
+	else
+		twl4030_led_set_value(offset - TWL4030_GPIO_MAX, value);
+}
+
+static int twl_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	return (twl4030_gpio_irq_base && (offset < TWL4030_GPIO_MAX))
+		? (twl4030_gpio_irq_base + offset)
+		: -EINVAL;
+}
+
+static struct gpio_chip twl_gpiochip = {
+	.label			= "twl4030",
+	.owner			= THIS_MODULE,
+	.request		= twl_request,
+	.free			= twl_free,
+	.direction_input	= twl_direction_in,
+	.get			= twl_get,
+	.direction_output	= twl_direction_out,
+	.set			= twl_set,
+	.to_irq			= twl_to_irq,
+	.can_sleep		= 1,
+};
+
+/*----------------------------------------------------------------------*/
+
+static int __devinit gpio_twl4030_pulls(u32 ups, u32 downs)
+{
+	u8		message[6];
+	unsigned	i, gpio_bit;
+
+	/* For most pins, a pulldown was enabled by default.
+	 * We should have data that's specific to this board.
+	 */
+	for (gpio_bit = 1, i = 1; i < 6; i++) {
+		u8		bit_mask;
+		unsigned	j;
+
+		for (bit_mask = 0, j = 0; j < 8; j += 2, gpio_bit <<= 1) {
+			if (ups & gpio_bit)
+				bit_mask |= 1 << (j + 1);
+			else if (downs & gpio_bit)
+				bit_mask |= 1 << (j + 0);
+		}
+		message[i] = bit_mask;
+	}
+
+	return twl4030_i2c_write(TWL4030_MODULE_GPIO, message,
+				REG_GPIOPUPDCTR1, 5);
+}
+
+static int gpio_twl4030_remove(struct platform_device *pdev);
+
+static int __devinit gpio_twl4030_probe(struct platform_device *pdev)
+{
+	struct twl4030_gpio_platform_data *pdata = pdev->dev.platform_data;
+	int ret;
+
+	/* maybe setup IRQs */
+	if (pdata->irq_base) {
+		if (is_module()) {
+			dev_err(&pdev->dev,
+				"can't dispatch IRQs from modules\n");
+			goto no_irqs;
+		}
+		ret = twl4030_sih_setup(TWL4030_MODULE_GPIO);
+		if (ret < 0)
+			return ret;
+		WARN_ON(ret != pdata->irq_base);
+		twl4030_gpio_irq_base = ret;
+	}
+
+no_irqs:
+	/*
+	 * NOTE:  boards may waste power if they don't set pullups
+	 * and pulldowns correctly ... default for non-ULPI pins is
+	 * pulldown, and some other pins may have external pullups
+	 * or pulldowns.  Careful!
+	 */
+	ret = gpio_twl4030_pulls(pdata->pullups, pdata->pulldowns);
+	if (ret)
+		dev_dbg(&pdev->dev, "pullups %.05x %.05x --> %d\n",
+				pdata->pullups, pdata->pulldowns,
+				ret);
+
+	twl_gpiochip.base = pdata->gpio_base;
+	twl_gpiochip.ngpio = TWL4030_GPIO_MAX;
+	twl_gpiochip.dev = &pdev->dev;
+
+	/* NOTE: we assume VIBRA_CTL.VIBRA_EN, in MODULE_AUDIO_VOICE,
+	 * is (still) clear if use_leds is set.
+	 */
+	if (pdata->use_leds)
+		twl_gpiochip.ngpio += 2;
+
+	ret = gpiochip_add(&twl_gpiochip);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+				"could not register gpiochip, %d\n",
+				ret);
+		twl_gpiochip.ngpio = 0;
+		gpio_twl4030_remove(pdev);
+	} else if (pdata->setup) {
+		int status;
+
+		status = pdata->setup(&pdev->dev,
+				pdata->gpio_base, TWL4030_GPIO_MAX);
+		if (status)
+			dev_dbg(&pdev->dev, "setup --> %d\n", status);
+	}
+
+	return ret;
+}
+
+static int __devexit gpio_twl4030_remove(struct platform_device *pdev)
+{
+	struct twl4030_gpio_platform_data *pdata = pdev->dev.platform_data;
+	int status;
+
+	if (pdata->teardown) {
+		status = pdata->teardown(&pdev->dev,
+				pdata->gpio_base, TWL4030_GPIO_MAX);
+		if (status) {
+			dev_dbg(&pdev->dev, "teardown --> %d\n", status);
+			return status;
+		}
+	}
+
+	status = gpiochip_remove(&twl_gpiochip);
+	if (status < 0)
+		return status;
+
+	if (is_module())
+		return 0;
+
+	/* REVISIT no support yet for deregistering all the IRQs */
+	WARN_ON(1);
+	return -EIO;
+}
+
+/* Note:  this hardware lives inside an I2C-based multi-function device. */
+MODULE_ALIAS("platform:twl4030_gpio");
+
+static struct platform_driver gpio_twl4030_driver = {
+	.driver.name	= "twl4030_gpio",
+	.driver.owner	= THIS_MODULE,
+	.probe		= gpio_twl4030_probe,
+	.remove		= __devexit_p(gpio_twl4030_remove),
+};
+
+static int __init gpio_twl4030_init(void)
+{
+	return platform_driver_register(&gpio_twl4030_driver);
+}
+subsys_initcall(gpio_twl4030_init);
+
+static void __exit gpio_twl4030_exit(void)
+{
+	platform_driver_unregister(&gpio_twl4030_driver);
+}
+module_exit(gpio_twl4030_exit);
+
+MODULE_AUTHOR("Texas Instruments, Inc.");
+MODULE_DESCRIPTION("GPIO interface for TWL4030");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 9097500de5f..a8b33c2ec8d 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -6,7 +6,7 @@
 #
 menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
-	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && SHMEM
+	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && MMU
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index d490db4c0de..ae73b7f7249 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -522,12 +522,12 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data)
 	struct drm_gem_object *obj = ptr;
 	struct drm_gem_name_info_data   *nid = data;
 
-	DRM_INFO("name %d size %d\n", obj->name, obj->size);
+	DRM_INFO("name %d size %zd\n", obj->name, obj->size);
 	if (nid->eof)
 		return 0;
 
 	nid->len += sprintf(&nid->buf[nid->len],
-			    "%6d%9d%8d%9d\n",
+			    "%6d %8zd %7d %8d\n",
 			    obj->name, obj->size,
 			    atomic_read(&obj->handlecount.refcount),
 			    atomic_read(&obj->refcount.refcount));
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ac73dd1b42..dc2e6fdb6ca 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -171,6 +171,37 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+/*
+ * Try to write quickly with an atomic kmap. Return true on success.
+ *
+ * If this fails (which includes a partial write), we'll redo the whole
+ * thing with the slow version.
+ *
+ * This is a workaround for the low performance of iounmap (approximate
+ * 10% cpu cost on normal 3D workloads).  kmap_atomic on HIGHMEM kernels
+ * happens to let us map card memory without taking IPIs.  When the vmap
+ * rework lands we should be able to dump this hack.
+ */
+static inline int fast_user_write(unsigned long pfn, char __user *user_data,
+				  int l, int o)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long unwritten;
+	char *vaddr_atomic;
+
+	vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
+#if WATCH_PWRITE
+	DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
+		 i, o, l, pfn, vaddr_atomic);
+#endif
+	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l);
+	kunmap_atomic(vaddr_atomic, KM_USER0);
+	return !unwritten;
+#else
+	return 0;
+#endif
+}
+
 static int
 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 		    struct drm_i915_gem_pwrite *args,
@@ -180,12 +211,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	ssize_t remain;
 	loff_t offset;
 	char __user *user_data;
-	char __iomem *vaddr;
-	char *vaddr_atomic;
-	int i, o, l;
 	int ret = 0;
-	unsigned long pfn;
-	unsigned long unwritten;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
@@ -209,6 +235,9 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	obj_priv->dirty = 1;
 
 	while (remain > 0) {
+		unsigned long pfn;
+		int i, o, l;
+
 		/* Operation in this page
 		 *
 		 * i = page number
@@ -223,25 +252,10 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 
 		pfn = (dev->agp->base >> PAGE_SHIFT) + i;
 
-#ifdef CONFIG_HIGHMEM
-		/* This is a workaround for the low performance of iounmap
-		 * (approximate 10% cpu cost on normal 3D workloads).
-		 * kmap_atomic on HIGHMEM kernels happens to let us map card
-		 * memory without taking IPIs.  When the vmap rework lands
-		 * we should be able to dump this hack.
-		 */
-		vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
-#if WATCH_PWRITE
-		DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
-			 i, o, l, pfn, vaddr_atomic);
-#endif
-		unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
-							      user_data, l);
-		kunmap_atomic(vaddr_atomic, KM_USER0);
+		if (!fast_user_write(pfn, user_data, l, o)) {
+			unsigned long unwritten;
+			char __iomem *vaddr;
 
-		if (unwritten)
-#endif /* CONFIG_HIGHMEM */
-		{
 			vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
 #if WATCH_PWRITE
 			DRM_INFO("pwrite slow i %d o %d l %d "
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index b06b8e090a2..bc011da79e1 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -49,6 +49,9 @@
 
 #define APPLESMC_MAX_DATA_LENGTH 32
 
+#define APPLESMC_MIN_WAIT	0x0040
+#define APPLESMC_MAX_WAIT	0x8000
+
 #define APPLESMC_STATUS_MASK	0x0f
 #define APPLESMC_READ_CMD	0x10
 #define APPLESMC_WRITE_CMD	0x11
@@ -57,8 +60,8 @@
 
 #define KEY_COUNT_KEY		"#KEY" /* r-o ui32 */
 
-#define LIGHT_SENSOR_LEFT_KEY	"ALV0" /* r-o {alv (6 bytes) */
-#define LIGHT_SENSOR_RIGHT_KEY	"ALV1" /* r-o {alv (6 bytes) */
+#define LIGHT_SENSOR_LEFT_KEY	"ALV0" /* r-o {alv (6-10 bytes) */
+#define LIGHT_SENSOR_RIGHT_KEY	"ALV1" /* r-o {alv (6-10 bytes) */
 #define BACKLIGHT_KEY		"LKSB" /* w-o {lkb (2 bytes) */
 
 #define CLAMSHELL_KEY		"MSLD" /* r-o ui8 (unused) */
@@ -104,6 +107,15 @@ static const char* temperature_sensors_sets[][36] = {
 /* Set 6: Macbook3 set */
 	{ "TB0T", "TC0D", "TC0P", "TM0P", "TN0P", "TTF0", "TW0P", "Th0H",
 	  "Th0S", "Th1H", NULL },
+/* Set 7: Macbook Air */
+	{ "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TC0P", "TCFP",
+	  "TTF0", "TW0P", "Th0H", "Tp0P", "TpFP", "Ts0P", "Ts0S", NULL },
+/* Set 8: Macbook Pro 4,1 (Penryn) */
+	{ "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P", "Th0H",
+	  "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
+/* Set 9: Macbook Pro 3,1 (Santa Rosa) */
+	{ "TALP", "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P",
+	  "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
 };
 
 /* List of keys used to read/write fan speeds */
@@ -163,25 +175,25 @@ static unsigned int key_at_index;
 static struct workqueue_struct *applesmc_led_wq;
 
 /*
- * __wait_status - Wait up to 2ms for the status port to get a certain value
+ * __wait_status - Wait up to 32ms for the status port to get a certain value
  * (masked with 0x0f), returning zero if the value is obtained.  Callers must
  * hold applesmc_lock.
  */
 static int __wait_status(u8 val)
 {
-	unsigned int i;
+	int us;
 
 	val = val & APPLESMC_STATUS_MASK;
 
-	for (i = 0; i < 200; i++) {
+	for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
+		udelay(us);
 		if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == val) {
 			if (debug)
 				printk(KERN_DEBUG
-						"Waited %d us for status %x\n",
-						i*10, val);
+					"Waited %d us for status %x\n",
+					2 * us - APPLESMC_MIN_WAIT, val);
 			return 0;
 		}
-		udelay(10);
 	}
 
 	printk(KERN_WARNING "applesmc: wait status failed: %x != %x\n",
@@ -191,6 +203,25 @@ static int __wait_status(u8 val)
 }
 
 /*
+ * special treatment of command port - on newer macbooks, it seems necessary
+ * to resend the command byte before polling the status again. Callers must
+ * hold applesmc_lock.
+ */
+static int send_command(u8 cmd)
+{
+	int us;
+	for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
+		outb(cmd, APPLESMC_CMD_PORT);
+		udelay(us);
+		if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == 0x0c)
+			return 0;
+	}
+	printk(KERN_WARNING "applesmc: command failed: %x -> %x\n",
+		cmd, inb(APPLESMC_CMD_PORT));
+	return -EIO;
+}
+
+/*
  * applesmc_read_key - reads len bytes from a given key, and put them in buffer.
  * Returns zero on success or a negative error on failure. Callers must
  * hold applesmc_lock.
@@ -205,8 +236,7 @@ static int applesmc_read_key(const char* key, u8* buffer, u8 len)
 		return -EINVAL;
 	}
 
-	outb(APPLESMC_READ_CMD, APPLESMC_CMD_PORT);
-	if (__wait_status(0x0c))
+	if (send_command(APPLESMC_READ_CMD))
 		return -EIO;
 
 	for (i = 0; i < 4; i++) {
@@ -249,8 +279,7 @@ static int applesmc_write_key(const char* key, u8* buffer, u8 len)
 		return -EINVAL;
 	}
 
-	outb(APPLESMC_WRITE_CMD, APPLESMC_CMD_PORT);
-	if (__wait_status(0x0c))
+	if (send_command(APPLESMC_WRITE_CMD))
 		return -EIO;
 
 	for (i = 0; i < 4; i++) {
@@ -284,8 +313,7 @@ static int applesmc_get_key_at_index(int index, char* key)
 	readkey[2] = index >> 8;
 	readkey[3] = index;
 
-	outb(APPLESMC_GET_KEY_BY_INDEX_CMD, APPLESMC_CMD_PORT);
-	if (__wait_status(0x0c))
+	if (send_command(APPLESMC_GET_KEY_BY_INDEX_CMD))
 		return -EIO;
 
 	for (i = 0; i < 4; i++) {
@@ -315,8 +343,7 @@ static int applesmc_get_key_type(char* key, char* type)
 {
 	int i;
 
-	outb(APPLESMC_GET_KEY_TYPE_CMD, APPLESMC_CMD_PORT);
-	if (__wait_status(0x0c))
+	if (send_command(APPLESMC_GET_KEY_TYPE_CMD))
 		return -EIO;
 
 	for (i = 0; i < 4; i++) {
@@ -325,7 +352,7 @@ static int applesmc_get_key_type(char* key, char* type)
 			return -EIO;
 	}
 
-	outb(5, APPLESMC_DATA_PORT);
+	outb(6, APPLESMC_DATA_PORT);
 
 	for (i = 0; i < 6; i++) {
 		if (__wait_status(0x05))
@@ -527,17 +554,27 @@ out:
 static ssize_t applesmc_light_show(struct device *dev,
 				struct device_attribute *attr, char *sysfsbuf)
 {
+	static int data_length;
 	int ret;
 	u8 left = 0, right = 0;
-	u8 buffer[6];
+	u8 buffer[10], query[6];
 
 	mutex_lock(&applesmc_lock);
 
-	ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, 6);
+	if (!data_length) {
+		ret = applesmc_get_key_type(LIGHT_SENSOR_LEFT_KEY, query);
+		if (ret)
+			goto out;
+		data_length = clamp_val(query[0], 0, 10);
+		printk(KERN_INFO "applesmc: light sensor data length set to "
+			"%d\n", data_length);
+	}
+
+	ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
 	left = buffer[2];
 	if (ret)
 		goto out;
-	ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, 6);
+	ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
 	right = buffer[2];
 
 out:
@@ -1233,39 +1270,57 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = {
 	{ .accelerometer = 0, .light = 0, .temperature_set = 5 },
 /* MacBook3: accelerometer and temperature set 6 */
 	{ .accelerometer = 1, .light = 0, .temperature_set = 6 },
+/* MacBook Air: accelerometer, backlight and temperature set 7 */
+	{ .accelerometer = 1, .light = 1, .temperature_set = 7 },
+/* MacBook Pro 4: accelerometer, backlight and temperature set 8 */
+	{ .accelerometer = 1, .light = 1, .temperature_set = 8 },
+/* MacBook Pro 3: accelerometer, backlight and temperature set 9 */
+	{ .accelerometer = 1, .light = 1, .temperature_set = 9 },
 };
 
 /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
  * So we need to put "Apple MacBook Pro" before "Apple MacBook". */
 static __initdata struct dmi_system_id applesmc_whitelist[] = {
+	{ applesmc_dmi_match, "Apple MacBook Air", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
+		&applesmc_dmi_data[7]},
+	{ applesmc_dmi_match, "Apple MacBook Pro 4", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4") },
+		&applesmc_dmi_data[8]},
+	{ applesmc_dmi_match, "Apple MacBook Pro 3", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3") },
+		&applesmc_dmi_data[9]},
 	{ applesmc_dmi_match, "Apple MacBook Pro", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") },
-		(void*)&applesmc_dmi_data[0]},
+		&applesmc_dmi_data[0]},
 	{ applesmc_dmi_match, "Apple MacBook (v2)", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBook2") },
-		(void*)&applesmc_dmi_data[1]},
+		&applesmc_dmi_data[1]},
 	{ applesmc_dmi_match, "Apple MacBook (v3)", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBook3") },
-		(void*)&applesmc_dmi_data[6]},
+		&applesmc_dmi_data[6]},
 	{ applesmc_dmi_match, "Apple MacBook", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBook") },
-		(void*)&applesmc_dmi_data[2]},
+		&applesmc_dmi_data[2]},
 	{ applesmc_dmi_match, "Apple Macmini", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"Macmini") },
-		(void*)&applesmc_dmi_data[3]},
+		&applesmc_dmi_data[3]},
 	{ applesmc_dmi_match, "Apple MacPro2", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") },
-		(void*)&applesmc_dmi_data[4]},
+		&applesmc_dmi_data[4]},
 	{ applesmc_dmi_match, "Apple iMac", {
 	  DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"iMac") },
-		(void*)&applesmc_dmi_data[5]},
+		&applesmc_dmi_data[5]},
 	{ .ident = NULL }
 };
 
diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 9b462bb13fa..5fbfa34c110 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -75,7 +75,8 @@ MODULE_PARM_DESC(force_id, "Override the detected device ID");
 #define FSCM	0x09	/* Logical device: fans */
 #define VLM	0x0d	/* Logical device: voltages */
 #define TMS	0x0e	/* Logical device: temperatures */
-static const u8 logdev[3] = { FSCM, VLM, TMS };
+#define LDNI_MAX 3
+static const u8 logdev[LDNI_MAX] = { FSCM, VLM, TMS };
 
 #define LD_FAN		0
 #define LD_IN		1
@@ -489,11 +490,66 @@ static struct sensor_device_attribute in_max[] = {
 	SENSOR_ATTR(in10_max, S_IWUSR | S_IRUGO, show_in_max, set_in_max, 10),
 };
 
+/* (temp & vin) channel status register alarm bits (pdf sec.11.5.12) */
+#define CHAN_ALM_MIN	0x02	/* min limit crossed */
+#define CHAN_ALM_MAX	0x04	/* max limit exceeded */
+#define TEMP_ALM_CRIT	0x08	/* temp crit exceeded (temp only) */
+
+/* show_in_min/max_alarm() reads data from the per-channel status
+   register (sec 11.5.12), not the vin event status registers (sec
+   11.5.2) that (legacy) show_in_alarm() resds (via data->in_alarms) */
+
+static ssize_t show_in_min_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_in_max_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
+}
+
+static struct sensor_device_attribute in_min_alarm[] = {
+	SENSOR_ATTR(in0_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 0),
+	SENSOR_ATTR(in1_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 1),
+	SENSOR_ATTR(in2_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 2),
+	SENSOR_ATTR(in3_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 3),
+	SENSOR_ATTR(in4_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 4),
+	SENSOR_ATTR(in5_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 5),
+	SENSOR_ATTR(in6_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 6),
+	SENSOR_ATTR(in7_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 7),
+	SENSOR_ATTR(in8_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 8),
+	SENSOR_ATTR(in9_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 9),
+	SENSOR_ATTR(in10_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 10),
+};
+static struct sensor_device_attribute in_max_alarm[] = {
+	SENSOR_ATTR(in0_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 0),
+	SENSOR_ATTR(in1_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 1),
+	SENSOR_ATTR(in2_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 2),
+	SENSOR_ATTR(in3_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 3),
+	SENSOR_ATTR(in4_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 4),
+	SENSOR_ATTR(in5_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 5),
+	SENSOR_ATTR(in6_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 6),
+	SENSOR_ATTR(in7_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 7),
+	SENSOR_ATTR(in8_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 8),
+	SENSOR_ATTR(in9_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 9),
+	SENSOR_ATTR(in10_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 10),
+};
+
 #define VIN_UNIT_ATTRS(X) \
 	&in_input[X].dev_attr.attr,	\
 	&in_status[X].dev_attr.attr,	\
 	&in_min[X].dev_attr.attr,	\
-	&in_max[X].dev_attr.attr
+	&in_max[X].dev_attr.attr,	\
+	&in_min_alarm[X].dev_attr.attr,	\
+	&in_max_alarm[X].dev_attr.attr
 
 static ssize_t show_vid(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -658,12 +714,68 @@ static struct sensor_device_attribute therm_crit[] = {
 		    show_therm_crit, set_therm_crit, 2+11),
 };
 
+/* show_therm_min/max_alarm() reads data from the per-channel voltage
+   status register (sec 11.5.12) */
+
+static ssize_t show_therm_min_alarm(struct device *dev,
+				struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_therm_max_alarm(struct device *dev,
+				struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
+}
+static ssize_t show_therm_crit_alarm(struct device *dev,
+				struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->in_status[nr] & TEMP_ALM_CRIT));
+}
+
+static struct sensor_device_attribute therm_min_alarm[] = {
+	SENSOR_ATTR(temp4_min_alarm, S_IRUGO,
+		    show_therm_min_alarm, NULL, 0+11),
+	SENSOR_ATTR(temp5_min_alarm, S_IRUGO,
+		    show_therm_min_alarm, NULL, 1+11),
+	SENSOR_ATTR(temp6_min_alarm, S_IRUGO,
+		    show_therm_min_alarm, NULL, 2+11),
+};
+static struct sensor_device_attribute therm_max_alarm[] = {
+	SENSOR_ATTR(temp4_max_alarm, S_IRUGO,
+		    show_therm_max_alarm, NULL, 0+11),
+	SENSOR_ATTR(temp5_max_alarm, S_IRUGO,
+		    show_therm_max_alarm, NULL, 1+11),
+	SENSOR_ATTR(temp6_max_alarm, S_IRUGO,
+		    show_therm_max_alarm, NULL, 2+11),
+};
+static struct sensor_device_attribute therm_crit_alarm[] = {
+	SENSOR_ATTR(temp4_crit_alarm, S_IRUGO,
+		    show_therm_crit_alarm, NULL, 0+11),
+	SENSOR_ATTR(temp5_crit_alarm, S_IRUGO,
+		    show_therm_crit_alarm, NULL, 1+11),
+	SENSOR_ATTR(temp6_crit_alarm, S_IRUGO,
+		    show_therm_crit_alarm, NULL, 2+11),
+};
+
 #define THERM_UNIT_ATTRS(X) \
 	&therm_input[X].dev_attr.attr,	\
 	&therm_status[X].dev_attr.attr,	\
 	&therm_min[X].dev_attr.attr,	\
 	&therm_max[X].dev_attr.attr,	\
-	&therm_crit[X].dev_attr.attr
+	&therm_crit[X].dev_attr.attr,	\
+	&therm_min_alarm[X].dev_attr.attr, \
+	&therm_max_alarm[X].dev_attr.attr, \
+	&therm_crit_alarm[X].dev_attr.attr
 
 static struct attribute * pc8736x_therm_attr_array[] = {
 	THERM_UNIT_ATTRS(0),
@@ -790,12 +902,76 @@ static ssize_t show_temp_alarms(struct device *dev, struct device_attribute *att
 }
 static DEVICE_ATTR(alarms_temp, S_IRUGO, show_temp_alarms, NULL);
 
+/* show_temp_min/max_alarm() reads data from the per-channel status
+   register (sec 12.3.7), not the temp event status registers (sec
+   12.3.2) that show_temp_alarm() reads (via data->temp_alarms) */
+
+static ssize_t show_temp_min_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_temp_max_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MAX));
+}
+static ssize_t show_temp_crit_alarm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_ALM_CRIT));
+}
+
+static struct sensor_device_attribute temp_min_alarm[] = {
+	SENSOR_ATTR(temp1_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 0),
+	SENSOR_ATTR(temp2_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 1),
+	SENSOR_ATTR(temp3_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 2),
+};
+static struct sensor_device_attribute temp_max_alarm[] = {
+	SENSOR_ATTR(temp1_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 0),
+	SENSOR_ATTR(temp2_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 1),
+	SENSOR_ATTR(temp3_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 2),
+};
+static struct sensor_device_attribute temp_crit_alarm[] = {
+	SENSOR_ATTR(temp1_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 0),
+	SENSOR_ATTR(temp2_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 1),
+	SENSOR_ATTR(temp3_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 2),
+};
+
+#define TEMP_FAULT	0x40	/* open diode */
+static ssize_t show_temp_fault(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	struct pc87360_data *data = pc87360_update_device(dev);
+	unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+	return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_FAULT));
+}
+static struct sensor_device_attribute temp_fault[] = {
+	SENSOR_ATTR(temp1_fault, S_IRUGO, show_temp_fault, NULL, 0),
+	SENSOR_ATTR(temp2_fault, S_IRUGO, show_temp_fault, NULL, 1),
+	SENSOR_ATTR(temp3_fault, S_IRUGO, show_temp_fault, NULL, 2),
+};
+
 #define TEMP_UNIT_ATTRS(X) \
 	&temp_input[X].dev_attr.attr,	\
 	&temp_status[X].dev_attr.attr,	\
 	&temp_min[X].dev_attr.attr,	\
 	&temp_max[X].dev_attr.attr,	\
-	&temp_crit[X].dev_attr.attr
+	&temp_crit[X].dev_attr.attr,	\
+	&temp_min_alarm[X].dev_attr.attr, \
+	&temp_max_alarm[X].dev_attr.attr, \
+	&temp_crit_alarm[X].dev_attr.attr, \
+	&temp_fault[X].dev_attr.attr
 
 static struct attribute * pc8736x_temp_attr_array[] = {
 	TEMP_UNIT_ATTRS(0),
@@ -809,8 +985,8 @@ static const struct attribute_group pc8736x_temp_group = {
 	.attrs = pc8736x_temp_attr_array,
 };
 
-static ssize_t show_name(struct device *dev, struct device_attribute
-			 *devattr, char *buf)
+static ssize_t show_name(struct device *dev,
+			struct device_attribute *devattr, char *buf)
 {
 	struct pc87360_data *data = dev_get_drvdata(dev);
 	return sprintf(buf, "%s\n", data->name);
@@ -955,7 +1131,7 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
 	mutex_init(&data->update_lock);
 	platform_set_drvdata(pdev, data);
 
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < LDNI_MAX; i++) {
 		if (((data->address[i] = extra_isa[i]))
 		 && !request_region(extra_isa[i], PC87360_EXTENT,
 		 		    pc87360_driver.driver.name)) {
@@ -1031,7 +1207,15 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
 			    || (err = device_create_file(dev,
 					&temp_crit[i].dev_attr))
 			    || (err = device_create_file(dev,
-					&temp_status[i].dev_attr)))
+					&temp_status[i].dev_attr))
+			    || (err = device_create_file(dev,
+					&temp_min_alarm[i].dev_attr))
+			    || (err = device_create_file(dev,
+					&temp_max_alarm[i].dev_attr))
+			    || (err = device_create_file(dev,
+					&temp_crit_alarm[i].dev_attr))
+			    || (err = device_create_file(dev,
+					&temp_fault[i].dev_attr)))
 				goto ERROR3;
 		}
 		if ((err = device_create_file(dev, &dev_attr_alarms_temp)))
@@ -1131,6 +1315,16 @@ static void pc87360_write_value(struct pc87360_data *data, u8 ldi, u8 bank,
 	mutex_unlock(&(data->lock));
 }
 
+/* (temp & vin) channel conversion status register flags (pdf sec.11.5.12) */
+#define CHAN_CNVRTD	0x80	/* new data ready */
+#define CHAN_ENA	0x01	/* enabled channel (temp or vin) */
+#define CHAN_ALM_ENA	0x10	/* propagate to alarms-reg ?? (chk val!) */
+#define CHAN_READY	(CHAN_ENA|CHAN_CNVRTD) /* sample ready mask */
+
+#define TEMP_OTS_OE	0x20	/* OTS Output Enable */
+#define VIN_RW1C_MASK	(CHAN_READY|CHAN_ALM_MAX|CHAN_ALM_MIN)   /* 0x87 */
+#define TEMP_RW1C_MASK	(VIN_RW1C_MASK|TEMP_ALM_CRIT|TEMP_FAULT) /* 0xCF */
+
 static void pc87360_init_device(struct platform_device *pdev,
 				int use_thermistors)
 {
@@ -1152,11 +1346,12 @@ static void pc87360_init_device(struct platform_device *pdev,
 
 	nr = data->innr < 11 ? data->innr : 11;
 	for (i = 0; i < nr; i++) {
+		reg = pc87360_read_value(data, LD_IN, i,
+					 PC87365_REG_IN_STATUS);
+		dev_dbg(&pdev->dev, "bios in%d status:0x%02x\n", i, reg);
 		if (init >= init_in[i]) {
 			/* Forcibly enable voltage channel */
-			reg = pc87360_read_value(data, LD_IN, i,
-						 PC87365_REG_IN_STATUS);
-			if (!(reg & 0x01)) {
+			if (!(reg & CHAN_ENA)) {
 				dev_dbg(&pdev->dev, "Forcibly "
 					"enabling in%d\n", i);
 				pc87360_write_value(data, LD_IN, i,
@@ -1168,19 +1363,24 @@ static void pc87360_init_device(struct platform_device *pdev,
 
 	/* We can't blindly trust the Super-I/O space configuration bit,
 	   most BIOS won't set it properly */
+	dev_dbg(&pdev->dev, "bios thermistors:%d\n", use_thermistors);
 	for (i = 11; i < data->innr; i++) {
 		reg = pc87360_read_value(data, LD_IN, i,
 					 PC87365_REG_TEMP_STATUS);
-		use_thermistors = use_thermistors || (reg & 0x01);
+		use_thermistors = use_thermistors || (reg & CHAN_ENA);
+		/* thermistors are temp[4-6], measured on vin[11-14] */
+		dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i-7, reg);
 	}
+	dev_dbg(&pdev->dev, "using thermistors:%d\n", use_thermistors);
 
 	i = use_thermistors ? 2 : 0;
 	for (; i < data->tempnr; i++) {
+		reg = pc87360_read_value(data, LD_TEMP, i,
+					 PC87365_REG_TEMP_STATUS);
+		dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i+1, reg);
 		if (init >= init_temp[i]) {
 			/* Forcibly enable temperature channel */
-			reg = pc87360_read_value(data, LD_TEMP, i,
-						 PC87365_REG_TEMP_STATUS);
-			if (!(reg & 0x01)) {
+			if (!(reg & CHAN_ENA)) {
 				dev_dbg(&pdev->dev, "Forcibly "
 					"enabling temp%d\n", i+1);
 				pc87360_write_value(data, LD_TEMP, i,
@@ -1197,7 +1397,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 				   diodes */
 				reg = pc87360_read_value(data, LD_TEMP,
 				      (i-11)/2, PC87365_REG_TEMP_STATUS);
-				if (reg & 0x01) {
+				if (reg & CHAN_ENA) {
 					dev_dbg(&pdev->dev, "Skipping "
 						"temp%d, pin already in use "
 						"by temp%d\n", i-7, (i-11)/2);
@@ -1207,7 +1407,7 @@ static void pc87360_init_device(struct platform_device *pdev,
 				/* Forcibly enable thermistor channel */
 				reg = pc87360_read_value(data, LD_IN, i,
 							 PC87365_REG_IN_STATUS);
-				if (!(reg & 0x01)) {
+				if (!(reg & CHAN_ENA)) {
 					dev_dbg(&pdev->dev, "Forcibly "
 						"enabling temp%d\n", i-7);
 					pc87360_write_value(data, LD_IN, i,
@@ -1221,7 +1421,8 @@ static void pc87360_init_device(struct platform_device *pdev,
 	if (data->innr) {
 		reg = pc87360_read_value(data, LD_IN, NO_BANK,
 					 PC87365_REG_IN_CONFIG);
-		if (reg & 0x01) {
+		dev_dbg(&pdev->dev, "bios vin-cfg:0x%02x\n", reg);
+		if (reg & CHAN_ENA) {
 			dev_dbg(&pdev->dev, "Forcibly "
 				"enabling monitoring (VLM)\n");
 			pc87360_write_value(data, LD_IN, NO_BANK,
@@ -1233,7 +1434,8 @@ static void pc87360_init_device(struct platform_device *pdev,
 	if (data->tempnr) {
 		reg = pc87360_read_value(data, LD_TEMP, NO_BANK,
 					 PC87365_REG_TEMP_CONFIG);
-		if (reg & 0x01) {
+		dev_dbg(&pdev->dev, "bios temp-cfg:0x%02x\n", reg);
+		if (reg & CHAN_ENA) {
 			dev_dbg(&pdev->dev, "Forcibly enabling "
 				"monitoring (TMS)\n");
 			pc87360_write_value(data, LD_TEMP, NO_BANK,
@@ -1336,11 +1538,11 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
 			pc87360_write_value(data, LD_IN, i,
 					    PC87365_REG_IN_STATUS,
 					    data->in_status[i]);
-			if ((data->in_status[i] & 0x81) == 0x81) {
+			if ((data->in_status[i] & CHAN_READY) == CHAN_READY) {
 				data->in[i] = pc87360_read_value(data, LD_IN,
 					      i, PC87365_REG_IN);
 			}
-			if (data->in_status[i] & 0x01) {
+			if (data->in_status[i] & CHAN_ENA) {
 				data->in_min[i] = pc87360_read_value(data,
 						  LD_IN, i,
 						  PC87365_REG_IN_MIN);
@@ -1373,12 +1575,12 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
 			pc87360_write_value(data, LD_TEMP, i,
 					    PC87365_REG_TEMP_STATUS,
 					    data->temp_status[i]);
-			if ((data->temp_status[i] & 0x81) == 0x81) {
+			if ((data->temp_status[i] & CHAN_READY) == CHAN_READY) {
 				data->temp[i] = pc87360_read_value(data,
 						LD_TEMP, i,
 						PC87365_REG_TEMP);
 			}
-			if (data->temp_status[i] & 0x01) {
+			if (data->temp_status[i] & CHAN_ENA) {
 				data->temp_min[i] = pc87360_read_value(data,
 						    LD_TEMP, i,
 						    PC87365_REG_TEMP_MIN);
diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c
index 1ea39254dac..424dad6f18d 100644
--- a/drivers/i2c/busses/i2c-amd756.c
+++ b/drivers/i2c/busses/i2c-amd756.c
@@ -332,10 +332,6 @@ static int __devinit amd756_probe(struct pci_dev *pdev,
 	int error;
 	u8 temp;
 	
-	/* driver_data might come from user-space, so check it */
-	if (id->driver_data >= ARRAY_SIZE(chipname))
-		return -EINVAL;
-
 	if (amd756_ioport) {
 		dev_err(&pdev->dev, "Only one device supported "
 		       "(you have a strange motherboard, btw)\n");
@@ -412,7 +408,6 @@ static struct pci_driver amd756_driver = {
 	.id_table	= amd756_ids,
 	.probe		= amd756_probe,
 	.remove		= __devexit_p(amd756_remove),
-	.dynids.use_driver_data = 1,
 };
 
 static int __init amd756_init(void)
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 73dc52e114e..9f194d9efd9 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -332,10 +332,6 @@ static int __devinit vt596_probe(struct pci_dev *pdev,
 	unsigned char temp;
 	int error = -ENODEV;
 
-	/* driver_data might come from user-space, so check it */
-	if (id->driver_data & 1 || id->driver_data > 0xff)
-		return -EINVAL;
-
 	/* Determine the address of the SMBus areas */
 	if (force_addr) {
 		vt596_smba = force_addr & 0xfff0;
@@ -483,7 +479,6 @@ static struct pci_driver vt596_driver = {
 	.name		= "vt596_smbus",
 	.id_table	= vt596_ids,
 	.probe		= vt596_probe,
-	.dynids.use_driver_data = 1,
 };
 
 static int __init i2c_vt596_init(void)
diff --git a/drivers/i2c/chips/at24.c b/drivers/i2c/chips/at24.c
index 2a4acb26956..d4775528abc 100644
--- a/drivers/i2c/chips/at24.c
+++ b/drivers/i2c/chips/at24.c
@@ -460,7 +460,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	 */
 	at24->bin.attr.name = "eeprom";
 	at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
-	at24->bin.attr.owner = THIS_MODULE;
 	at24->bin.read = at24_bin_read;
 	at24->bin.size = chip.byte_len;
 
diff --git a/drivers/i2c/chips/ds1682.c b/drivers/i2c/chips/ds1682.c
index 23be4d42cb0..f3ee4a1abb7 100644
--- a/drivers/i2c/chips/ds1682.c
+++ b/drivers/i2c/chips/ds1682.c
@@ -190,7 +190,6 @@ static struct bin_attribute ds1682_eeprom_attr = {
 	.attr = {
 		.name = "eeprom",
 		.mode = S_IRUGO | S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.size = DS1682_EEPROM_SIZE,
 	.read = ds1682_eeprom_read,
diff --git a/drivers/i2c/chips/menelaus.c b/drivers/i2c/chips/menelaus.c
index 176126d3a01..4b364bae6b3 100644
--- a/drivers/i2c/chips/menelaus.c
+++ b/drivers/i2c/chips/menelaus.c
@@ -832,52 +832,52 @@ static irqreturn_t menelaus_irq(int irq, void *_menelaus)
 
 static void menelaus_to_time(char *regs, struct rtc_time *t)
 {
-	t->tm_sec = BCD2BIN(regs[0]);
-	t->tm_min = BCD2BIN(regs[1]);
+	t->tm_sec = bcd2bin(regs[0]);
+	t->tm_min = bcd2bin(regs[1]);
 	if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
-		t->tm_hour = BCD2BIN(regs[2] & 0x1f) - 1;
+		t->tm_hour = bcd2bin(regs[2] & 0x1f) - 1;
 		if (regs[2] & RTC_HR_PM)
 			t->tm_hour += 12;
 	} else
-		t->tm_hour = BCD2BIN(regs[2] & 0x3f);
-	t->tm_mday = BCD2BIN(regs[3]);
-	t->tm_mon = BCD2BIN(regs[4]) - 1;
-	t->tm_year = BCD2BIN(regs[5]) + 100;
+		t->tm_hour = bcd2bin(regs[2] & 0x3f);
+	t->tm_mday = bcd2bin(regs[3]);
+	t->tm_mon = bcd2bin(regs[4]) - 1;
+	t->tm_year = bcd2bin(regs[5]) + 100;
 }
 
 static int time_to_menelaus(struct rtc_time *t, int regnum)
 {
 	int	hour, status;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_sec));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_sec));
 	if (status < 0)
 		goto fail;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_min));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_min));
 	if (status < 0)
 		goto fail;
 
 	if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
 		hour = t->tm_hour + 1;
 		if (hour > 12)
-			hour = RTC_HR_PM | BIN2BCD(hour - 12);
+			hour = RTC_HR_PM | bin2bcd(hour - 12);
 		else
-			hour = BIN2BCD(hour);
+			hour = bin2bcd(hour);
 	} else
-		hour = BIN2BCD(t->tm_hour);
+		hour = bin2bcd(t->tm_hour);
 	status = menelaus_write_reg(regnum++, hour);
 	if (status < 0)
 		goto fail;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mday));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mday));
 	if (status < 0)
 		goto fail;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mon + 1));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mon + 1));
 	if (status < 0)
 		goto fail;
 
-	status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_year - 100));
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_year - 100));
 	if (status < 0)
 		goto fail;
 
@@ -914,7 +914,7 @@ static int menelaus_read_time(struct device *dev, struct rtc_time *t)
 	}
 
 	menelaus_to_time(regs, t);
-	t->tm_wday = BCD2BIN(regs[6]);
+	t->tm_wday = bcd2bin(regs[6]);
 
 	return 0;
 }
@@ -927,7 +927,7 @@ static int menelaus_set_time(struct device *dev, struct rtc_time *t)
 	status = time_to_menelaus(t, MENELAUS_RTC_SEC);
 	if (status < 0)
 		return status;
-	status = menelaus_write_reg(MENELAUS_RTC_WKDAY, BIN2BCD(t->tm_wday));
+	status = menelaus_write_reg(MENELAUS_RTC_WKDAY, bin2bcd(t->tm_wday));
 	if (status < 0) {
 		dev_err(&the_menelaus->client->dev, "rtc write reg %02x "
 				"err %d\n", MENELAUS_RTC_WKDAY, status);
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 74a369a6116..a820ca6fc32 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -84,21 +84,40 @@ config BLK_DEV_IDE_SATA
 
 	  If unsure, say N.
 
-config BLK_DEV_IDEDISK
-	tristate "Include IDE/ATA-2 DISK support"
-	---help---
-	  This will include enhanced support for MFM/RLL/IDE hard disks.  If
-	  you have a MFM/RLL/IDE disk, and there is no special reason to use
-	  the old hard disk driver instead, say Y.  If you have an SCSI-only
-	  system, you can say N here.
+config IDE_GD
+	tristate "generic ATA/ATAPI disk support"
+	default y
+	help
+	  Support for ATA/ATAPI disks (including ATAPI floppy drives).
 
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-disk.
-	  Do not compile this driver as a module if your root file system
-	  (the one containing the directory /) is located on the IDE disk.
+	  To compile this driver as a module, choose M here.
+	  The module will be called ide-gd_mod.
+
+	  If unsure, say Y.
+
+config IDE_GD_ATA
+	bool "ATA disk support"
+	depends on IDE_GD
+	default y
+	help
+	  This will include support for ATA hard disks.
 
 	  If unsure, say Y.
 
+config IDE_GD_ATAPI
+	bool "ATAPI floppy support"
+	depends on IDE_GD
+	select IDE_ATAPI
+	help
+	  This will include support for ATAPI floppy drives
+	  (i.e. Iomega ZIP or MKE LS-120).
+
+	  For information about jumper settings and the question
+	  of when a ZIP drive uses a partition table, see
+	  <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
+
+	  If unsure, say N.
+
 config BLK_DEV_IDECS
 	tristate "PCMCIA IDE support"
 	depends on PCMCIA
@@ -163,29 +182,6 @@ config BLK_DEV_IDETAPE
 	  To compile this driver as a module, choose M here: the
 	  module will be called ide-tape.
 
-config BLK_DEV_IDEFLOPPY
-	tristate "Include IDE/ATAPI FLOPPY support"
-	select IDE_ATAPI
-	---help---
-	  If you have an IDE floppy drive which uses the ATAPI protocol,
-	  answer Y.  ATAPI is a newer protocol used by IDE CD-ROM/tape/floppy
-	  drives, similar to the SCSI protocol.
-
-	  The LS-120 and the IDE/ATAPI Iomega ZIP drive are also supported by
-	  this driver. For information about jumper settings and the question
-	  of when a ZIP drive uses a partition table, see
-	  <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
-	  (ATAPI PD-CD/CDR drives are not supported by this driver; support
-	  for PD-CD/CDR drives is available if you answer Y to
-	  "SCSI emulation support", below).
-
-	  If you say Y here, the FLOPPY drive will be identified along with
-	  other IDE devices, as "hdb" or "hdc", or something similar (check
-	  the boot messages with dmesg).
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-floppy.
-
 config BLK_DEV_IDESCSI
 	tristate "SCSI emulation support (DEPRECATED)"
 	depends on SCSI
@@ -332,7 +328,7 @@ config IDEPCI_PCIBUS_ORDER
 # TODO: split it on per host driver config options (or module parameters)
 config BLK_DEV_OFFBOARD
 	bool "Boot off-board chipsets first support (DEPRECATED)"
-	depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT34X || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
+	depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
 	help
 	  Normally, IDE controllers built into the motherboard (on-board
 	  controllers) are assigned to ide0 and ide1 while those on add-in PCI
@@ -482,28 +478,6 @@ config BLK_DEV_CS5535
 
 	  It is safe to say Y to this question.
 
-config BLK_DEV_HPT34X
-	tristate "HPT34X chipset support"
-	depends on BROKEN
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds up to 4 more EIDE devices sharing a single
-	  interrupt. The HPT343 chipset in its current form is a non-bootable
-	  controller; the HPT345/HPT363 chipset is a bootable (needs BIOS FIX)
-	  PCI UDMA controllers. This driver requires dynamic tuning of the
-	  chipset during the ide-probe at boot time. It is reported to support
-	  DVD II drives, by the manufacturer.
-
-config HPT34X_AUTODMA
-	bool "HPT34X AUTODMA support (EXPERIMENTAL)"
-	depends on BLK_DEV_HPT34X && EXPERIMENTAL
-	help
-	  This is a dangerous thing to attempt currently! Please read the
-	  comments at the top of <file:drivers/ide/pci/hpt34x.c>.  If you say Y
-	  here, then say Y to "Use DMA by default when available" as well.
-
-	  If unsure, say N.
-
 config BLK_DEV_HPT366
 	tristate "HPT36X/37X chipset support"
 	select BLK_DEV_IDEDMA_PCI
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index ceaf779054e..093d3248ca8 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -37,18 +37,25 @@ obj-$(CONFIG_IDE_H8300)			+= h8300/
 obj-$(CONFIG_IDE_GENERIC)		+= ide-generic.o
 obj-$(CONFIG_BLK_DEV_IDEPNP)		+= ide-pnp.o
 
-ide-disk_mod-y += ide-disk.o ide-disk_ioctl.o
+ide-gd_mod-y += ide-gd.o
 ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
-ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o
 
+ifeq ($(CONFIG_IDE_GD_ATA), y)
+	ide-gd_mod-y += ide-disk.o ide-disk_ioctl.o
 ifeq ($(CONFIG_IDE_PROC_FS), y)
-	ide-disk_mod-y += ide-disk_proc.o
-	ide-floppy_mod-y += ide-floppy_proc.o
+	ide-gd_mod-y += ide-disk_proc.o
+endif
+endif
+
+ifeq ($(CONFIG_IDE_GD_ATAPI), y)
+	ide-gd_mod-y += ide-floppy.o ide-floppy_ioctl.o
+ifeq ($(CONFIG_IDE_PROC_FS), y)
+	ide-gd_mod-y += ide-floppy_proc.o
+endif
 endif
 
-obj-$(CONFIG_BLK_DEV_IDEDISK)		+= ide-disk_mod.o
+obj-$(CONFIG_IDE_GD)			+= ide-gd_mod.o
 obj-$(CONFIG_BLK_DEV_IDECD)		+= ide-cd_mod.o
-obj-$(CONFIG_BLK_DEV_IDEFLOPPY)		+= ide-floppy_mod.o
 obj-$(CONFIG_BLK_DEV_IDETAPE)		+= ide-tape.o
 
 ifeq ($(CONFIG_BLK_DEV_IDECS), y)
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2e305714c20..4e58b9e7a58 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -191,7 +191,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
 {
 	struct ide_atapi_pc pc;
 
-	if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK)
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
 		return 0;
 
 	ide_init_pc(&pc);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 3308b1cd3a3..13265a8827d 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -99,7 +99,7 @@ static void ide_cd_put(struct cdrom_info *cd)
 /* Mark that we've seen a media change and invalidate our internal buffers. */
 static void cdrom_saw_media_change(ide_drive_t *drive)
 {
-	drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
+	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 	drive->atapi_flags &= ~IDE_AFLAG_TOC_VALID;
 }
 
@@ -340,8 +340,8 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 	}
 
 	ide_debug_log(IDE_DBG_RQ, "%s: stat: 0x%x, good_stat: 0x%x, "
-		      "rq->cmd_type: 0x%x, err: 0x%x\n", __func__, stat,
-		      good_stat, rq->cmd_type, err);
+		      "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x, err: 0x%x\n",
+		      __func__, stat, good_stat, rq->cmd[0], rq->cmd_type, err);
 
 	if (blk_sense_request(rq)) {
 		/*
@@ -843,13 +843,10 @@ static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq)
 	rq->q->prep_rq_fn(rq->q, rq);
 }
 
-/*
- * All other packet commands.
- */
 static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq)
 {
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+	ide_debug_log(IDE_DBG_FUNC, "Call %s, rq->cmd[0]: 0x%x\n",
+		      __func__, rq->cmd[0]);
 
 	/*
 	 * Some of the trailing request sense fields are optional,
@@ -876,7 +873,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 	if (!sense)
 		sense = &local_sense;
 
-	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
+	ide_debug_log(IDE_DBG_PC, "Call %s, cmd[0]: 0x%x, write: 0x%x, "
 		      "timeout: %d, cmd_flags: 0x%x\n", __func__, cmd[0], write,
 		      timeout, cmd_flags);
 
@@ -1177,8 +1174,9 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
 	unsigned short sectors_per_frame =
 		queue_hardsect_size(drive->queue) >> SECTOR_BITS;
 
-	ide_debug_log(IDE_DBG_RQ, "Call %s, write: 0x%x, secs_per_frame: %u\n",
-		      __func__, write, sectors_per_frame);
+	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
+		      "secs_per_frame: %u\n",
+		      __func__, rq->cmd[0], write, sectors_per_frame);
 
 	if (write) {
 		/* disk has become write protected */
@@ -1221,7 +1219,8 @@ static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive)
 static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 
-	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd_type: 0x%x\n", __func__,
+	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, "
+		      "rq->cmd_type: 0x%x\n", __func__, rq->cmd[0],
 		      rq->cmd_type);
 
 	if (blk_pc_request(rq))
@@ -1257,9 +1256,6 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 	}
 }
 
-/*
- * cdrom driver request routine.
- */
 static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
@@ -1267,8 +1263,10 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 	ide_handler_t *fn;
 	int xferlen;
 
-	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd_type: 0x%x, block: %llu\n",
-		      __func__, rq->cmd_type, (unsigned long long)block);
+	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, "
+		      "rq->cmd_type: 0x%x, block: %llu\n",
+		      __func__, rq->cmd[0], rq->cmd_type,
+		      (unsigned long long)block);
 
 	if (blk_fs_request(rq)) {
 		if (drive->atapi_flags & IDE_AFLAG_SEEKING) {
@@ -1412,6 +1410,10 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 
 	*capacity = 1 + be32_to_cpu(capbuf.lba);
 	*sectors_per_frame = blocklen >> SECTOR_BITS;
+
+	ide_debug_log(IDE_DBG_PROBE, "%s: cap: %lu, sectors_per_frame: %lu\n",
+		      __func__, *capacity, *sectors_per_frame);
+
 	return 0;
 }
 
@@ -1643,6 +1645,9 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf)
 		maxspeed = be16_to_cpup((__be16 *)&buf[8 + 8]);
 	}
 
+	ide_debug_log(IDE_DBG_PROBE, "%s: curspeed: %u, maxspeed: %u\n",
+		      __func__, curspeed, maxspeed);
+
 	cd->current_speed = (curspeed + (176/2)) / 176;
 	cd->max_speed = (maxspeed + (176/2)) / 176;
 }
@@ -1732,7 +1737,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 		return 0;
 
 	if ((buf[8 + 6] & 0x01) == 0)
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 	if (buf[8 + 6] & 0x08)
 		drive->atapi_flags &= ~IDE_AFLAG_NO_EJECT;
 	if (buf[8 + 3] & 0x01)
@@ -1777,7 +1782,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 	if ((cdi->mask & CDC_DVD_R) == 0 || (cdi->mask & CDC_DVD_RAM) == 0)
 		printk(KERN_CONT " DVD%s%s",
 				 (cdi->mask & CDC_DVD_R) ? "" : "-R",
-				 (cdi->mask & CDC_DVD_RAM) ? "" : "-RAM");
+				 (cdi->mask & CDC_DVD_RAM) ? "" : "/RAM");
 
 	if ((cdi->mask & CDC_CD_R) == 0 || (cdi->mask & CDC_CD_RW) == 0)
 		printk(KERN_CONT " CD%s%s",
@@ -1908,6 +1913,16 @@ static const struct ide_proc_devset idecd_settings[] = {
 	IDE_PROC_DEVSET(dsc_overlap, 0, 1),
 	{ 0 },
 };
+
+static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive)
+{
+	return idecd_proc;
+}
+
+static const struct ide_proc_devset *ide_cd_proc_devsets(ide_drive_t *drive)
+{
+	return idecd_settings;
+}
 #endif
 
 static const struct cd_list_entry ide_cd_quirks_list[] = {
@@ -1986,8 +2001,8 @@ static int ide_cdrom_setup(ide_drive_t *drive)
 	if (!drive->queue->unplug_delay)
 		drive->queue->unplug_delay = 1;
 
-	drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT |
-		       ide_cd_flags(id);
+	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
+	drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
 
 	if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) &&
 	    fw_rev[4] == '1' && fw_rev[6] <= '2')
@@ -2069,8 +2084,8 @@ static ide_driver_t ide_cdrom_driver = {
 	.end_request		= ide_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idecd_proc,
-	.settings		= idecd_settings,
+	.proc_entries		= ide_cd_proc_entries,
+	.proc_devsets		= ide_cd_proc_devsets,
 #endif
 };
 
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 74231b41f61..df3df0041eb 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -86,8 +86,8 @@ int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
 
 	if (slot_nr == CDSL_CURRENT) {
 		(void) cdrom_check_status(drive, NULL);
-		retval = (drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED) ? 1 : 0;
-		drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
+		retval = (drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED) ? 1 : 0;
+		drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
 		return retval;
 	} else {
 		return -EINVAL;
@@ -136,7 +136,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
 		sense = &my_sense;
 
 	/* If the drive cannot lock the door, just pretend. */
-	if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK) {
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) {
 		stat = 0;
 	} else {
 		unsigned char cmd[BLK_MAX_CDB];
@@ -157,7 +157,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
 	    (sense->asc == 0x24 || sense->asc == 0x20)) {
 		printk(KERN_ERR "%s: door locking not supported\n",
 			drive->name);
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 		stat = 0;
 	}
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 3853bde8eed..223750c1b5a 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -14,9 +14,6 @@
  * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
  */
 
-#define IDEDISK_VERSION	"1.18"
-
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
@@ -39,46 +36,8 @@
 #include <asm/io.h>
 #include <asm/div64.h>
 
-#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
-#define IDE_DISK_MINORS		(1 << PARTN_BITS)
-#else
-#define IDE_DISK_MINORS		0
-#endif
-
 #include "ide-disk.h"
 
-static DEFINE_MUTEX(idedisk_ref_mutex);
-
-#define to_ide_disk(obj) container_of(obj, struct ide_disk_obj, kref)
-
-static void ide_disk_release(struct kref *);
-
-static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = NULL;
-
-	mutex_lock(&idedisk_ref_mutex);
-	idkp = ide_disk_g(disk);
-	if (idkp) {
-		if (ide_device_get(idkp->drive))
-			idkp = NULL;
-		else
-			kref_get(&idkp->kref);
-	}
-	mutex_unlock(&idedisk_ref_mutex);
-	return idkp;
-}
-
-static void ide_disk_put(struct ide_disk_obj *idkp)
-{
-	ide_drive_t *drive = idkp->drive;
-
-	mutex_lock(&idedisk_ref_mutex);
-	kref_put(&idkp->kref, ide_disk_release);
-	ide_device_put(drive);
-	mutex_unlock(&idedisk_ref_mutex);
-}
-
 static const u8 ide_rw_cmds[] = {
 	ATA_CMD_READ_MULTI,
 	ATA_CMD_WRITE_MULTI,
@@ -374,7 +333,7 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 	}
 }
 
-static void init_idedisk_capacity(ide_drive_t *drive)
+static int ide_disk_get_capacity(ide_drive_t *drive)
 {
 	u16 *id = drive->id;
 	int lba;
@@ -403,11 +362,28 @@ static void init_idedisk_capacity(ide_drive_t *drive)
 		if (ata_id_hpa_enabled(id))
 			idedisk_check_hpa(drive);
 	}
-}
 
-sector_t ide_disk_capacity(ide_drive_t *drive)
-{
-	return drive->capacity64;
+	/* limit drive capacity to 137GB if LBA48 cannot be used */
+	if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
+	    drive->capacity64 > 1ULL << 28) {
+		printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
+		       "%llu sectors (%llu MB)\n",
+		       drive->name, (unsigned long long)drive->capacity64,
+		       sectors_to_MB(drive->capacity64));
+		drive->capacity64 = 1ULL << 28;
+	}
+
+	if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
+	    (drive->dev_flags & IDE_DFLAG_LBA48)) {
+		if (drive->capacity64 > 1ULL << 28) {
+			printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
+					 " will be used for accessing sectors "
+					 "> %u\n", drive->name, 1 << 28);
+		} else
+			drive->dev_flags &= ~IDE_DFLAG_LBA48;
+	}
+
+	return 0;
 }
 
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
@@ -508,7 +484,7 @@ static void update_ordered(ide_drive_t *drive)
 		 * time we have trimmed the drive capacity if LBA48 is
 		 * not available so we don't need to recheck that.
 		 */
-		capacity = ide_disk_capacity(drive);
+		capacity = ide_gd_capacity(drive);
 		barrier = ata_id_flush_enabled(id) &&
 			(drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 &&
 			((drive->dev_flags & IDE_DFLAG_LBA48) == 0 ||
@@ -616,7 +592,12 @@ ide_ext_devset_rw(wcache, wcache);
 
 ide_ext_devset_rw_sync(nowerr, nowerr);
 
-static void idedisk_setup(ide_drive_t *drive)
+static int ide_disk_check(ide_drive_t *drive, const char *s)
+{
+	return 1;
+}
+
+static void ide_disk_setup(ide_drive_t *drive)
 {
 	struct ide_disk_obj *idkp = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
@@ -652,33 +633,13 @@ static void idedisk_setup(ide_drive_t *drive)
 			 drive->queue->max_sectors / 2);
 
 	/* calculate drive capacity, and select LBA if possible */
-	init_idedisk_capacity(drive);
-
-	/* limit drive capacity to 137GB if LBA48 cannot be used */
-	if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
-	    drive->capacity64 > 1ULL << 28) {
-		printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
-		       "%llu sectors (%llu MB)\n",
-		       drive->name, (unsigned long long)drive->capacity64,
-		       sectors_to_MB(drive->capacity64));
-		drive->capacity64 = 1ULL << 28;
-	}
-
-	if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
-	    (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		if (drive->capacity64 > 1ULL << 28) {
-			printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
-					 " will be used for accessing sectors "
-					 "> %u\n", drive->name, 1 << 28);
-		} else
-			drive->dev_flags &= ~IDE_DFLAG_LBA48;
-	}
+	ide_disk_get_capacity(drive);
 
 	/*
 	 * if possible, give fdisk access to more of the drive,
 	 * by correcting bios_cyls:
 	 */
-	capacity = ide_disk_capacity(drive);
+	capacity = ide_gd_capacity(drive);
 
 	if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) {
 		if (ata_id_lba48_enabled(drive->id)) {
@@ -718,9 +679,17 @@ static void idedisk_setup(ide_drive_t *drive)
 		drive->dev_flags |= IDE_DFLAG_WCACHE;
 
 	set_wcache(drive, 1);
+
+	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
+	    (drive->head == 0 || drive->head > 16)) {
+		printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n",
+			drive->name, drive->head);
+		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+	} else
+		drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
-static void ide_cacheflush_p(ide_drive_t *drive)
+static void ide_disk_flush(ide_drive_t *drive)
 {
 	if (ata_id_flush_enabled(drive->id) == 0 ||
 	    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
@@ -730,267 +699,40 @@ static void ide_cacheflush_p(ide_drive_t *drive)
 		printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
 }
 
-static void ide_disk_remove(ide_drive_t *drive)
-{
-	struct ide_disk_obj *idkp = drive->driver_data;
-	struct gendisk *g = idkp->disk;
-
-	ide_proc_unregister_driver(drive, idkp->driver);
-
-	del_gendisk(g);
-
-	ide_cacheflush_p(drive);
-
-	ide_disk_put(idkp);
-}
-
-static void ide_disk_release(struct kref *kref)
-{
-	struct ide_disk_obj *idkp = to_ide_disk(kref);
-	ide_drive_t *drive = idkp->drive;
-	struct gendisk *g = idkp->disk;
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(idkp);
-}
-
-static int ide_disk_probe(ide_drive_t *drive);
-
-/*
- * On HPA drives the capacity needs to be
- * reinitilized on resume otherwise the disk
- * can not be used and a hard reset is required
- */
-static void ide_disk_resume(ide_drive_t *drive)
+static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk)
 {
-	if (ata_id_hpa_enabled(drive->id))
-		init_idedisk_capacity(drive);
-}
-
-static void ide_device_shutdown(ide_drive_t *drive)
-{
-#ifdef	CONFIG_ALPHA
-	/* On Alpha, halt(8) doesn't actually turn the machine off,
-	   it puts you into the sort of firmware monitor. Typically,
-	   it's used to boot another kernel image, so it's not much
-	   different from reboot(8). Therefore, we don't need to
-	   spin down the disk in this case, especially since Alpha
-	   firmware doesn't handle disks in standby mode properly.
-	   On the other hand, it's reasonably safe to turn the power
-	   off when the shutdown process reaches the firmware prompt,
-	   as the firmware initialization takes rather long time -
-	   at least 10 seconds, which should be sufficient for
-	   the disk to expire its write cache. */
-	if (system_state != SYSTEM_POWER_OFF) {
-#else
-	if (system_state == SYSTEM_RESTART) {
-#endif
-		ide_cacheflush_p(drive);
-		return;
-	}
-
-	printk(KERN_INFO "Shutdown: %s\n", drive->name);
-
-	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
+	return 0;
 }
 
-static ide_driver_t idedisk_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-disk",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_disk_probe,
-	.remove			= ide_disk_remove,
-	.resume			= ide_disk_resume,
-	.shutdown		= ide_device_shutdown,
-	.version		= IDEDISK_VERSION,
-	.do_request		= ide_do_rw_disk,
-	.end_request		= ide_end_request,
-	.error			= __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_disk_proc,
-	.settings		= ide_disk_settings,
-#endif
-};
-
-static int idedisk_set_doorlock(ide_drive_t *drive, int on)
+static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
+				 int on)
 {
 	ide_task_t task;
+	int ret;
+
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
+		return 0;
 
 	memset(&task, 0, sizeof(task));
 	task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
 	task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
 
-	return ide_no_data_taskfile(drive, &task);
-}
-
-static int idedisk_open(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_disk_obj *idkp;
-	ide_drive_t *drive;
-
-	idkp = ide_disk_get(disk);
-	if (idkp == NULL)
-		return -ENXIO;
-
-	drive = idkp->drive;
-
-	idkp->openers++;
-
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		check_disk_change(inode->i_bdev);
-		/*
-		 * Ignore the return code from door_lock,
-		 * since the open() has already succeeded,
-		 * and the door_lock is irrelevant at this point.
-		 */
-		if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
-		    idedisk_set_doorlock(drive, 1))
-			drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-	}
-	return 0;
-}
-
-static int idedisk_release(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
-	ide_drive_t *drive = idkp->drive;
-
-	if (idkp->openers == 1)
-		ide_cacheflush_p(drive);
-
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
-		    idedisk_set_doorlock(drive, 0))
-			drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-	}
+	ret = ide_no_data_taskfile(drive, &task);
 
-	idkp->openers--;
+	if (ret)
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 
-	ide_disk_put(idkp);
-
-	return 0;
-}
-
-static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
-	ide_drive_t *drive = idkp->drive;
-
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
+	return ret;
 }
 
-static int idedisk_media_changed(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
-	ide_drive_t *drive = idkp->drive;
-
-	/* do not scan partitions twice if this is a removable device */
-	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-		return 0;
-	}
-
-	/* if removable, always assume it was changed */
-	return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
-}
-
-static int idedisk_revalidate_disk(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
-	set_capacity(disk, ide_disk_capacity(idkp->drive));
-	return 0;
-}
-
-static struct block_device_operations idedisk_ops = {
-	.owner			= THIS_MODULE,
-	.open			= idedisk_open,
-	.release		= idedisk_release,
-	.ioctl			= ide_disk_ioctl,
-	.getgeo			= idedisk_getgeo,
-	.media_changed		= idedisk_media_changed,
-	.revalidate_disk	= idedisk_revalidate_disk
+const struct ide_disk_ops ide_ata_disk_ops = {
+	.check		= ide_disk_check,
+	.get_capacity	= ide_disk_get_capacity,
+	.setup		= ide_disk_setup,
+	.flush		= ide_disk_flush,
+	.init_media	= ide_disk_init_media,
+	.set_doorlock	= ide_disk_set_doorlock,
+	.do_request	= ide_do_rw_disk,
+	.end_request	= ide_end_request,
+	.ioctl		= ide_disk_ioctl,
 };
-
-MODULE_DESCRIPTION("ATA DISK Driver");
-
-static int ide_disk_probe(ide_drive_t *drive)
-{
-	struct ide_disk_obj *idkp;
-	struct gendisk *g;
-
-	/* strstr("foo", "") is non-NULL */
-	if (!strstr("ide-disk", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_disk)
-		goto failed;
-
-	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-	if (!idkp)
-		goto failed;
-
-	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
-	if (!g)
-		goto out_free_idkp;
-
-	ide_init_disk(g, drive);
-
-	kref_init(&idkp->kref);
-
-	idkp->drive = drive;
-	idkp->driver = &idedisk_driver;
-	idkp->disk = g;
-
-	g->private_data = &idkp->driver;
-
-	drive->driver_data = idkp;
-
-	idedisk_setup(drive);
-	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
-	    (drive->head == 0 || drive->head > 16)) {
-		printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n",
-			drive->name, drive->head);
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-	} else
-		drive->dev_flags |= IDE_DFLAG_ATTACH;
-
-	g->minors = IDE_DISK_MINORS;
-	g->driverfs_dev = &drive->gendev;
-	g->flags |= GENHD_FL_EXT_DEVT;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	set_capacity(g, ide_disk_capacity(drive));
-	g->fops = &idedisk_ops;
-	add_disk(g);
-	return 0;
-
-out_free_idkp:
-	kfree(idkp);
-failed:
-	return -ENODEV;
-}
-
-static void __exit idedisk_exit(void)
-{
-	driver_unregister(&idedisk_driver.gen_driver);
-}
-
-static int __init idedisk_init(void)
-{
-	return driver_register(&idedisk_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-disk*");
-MODULE_ALIAS("ide-disk");
-module_init(idedisk_init);
-module_exit(idedisk_exit);
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
index a82fa435566..b234b0feaf7 100644
--- a/drivers/ide/ide-disk.h
+++ b/drivers/ide/ide-disk.h
@@ -1,19 +1,11 @@
 #ifndef __IDE_DISK_H
 #define __IDE_DISK_H
 
-struct ide_disk_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
-	unsigned int	openers;	/* protected by BKL for now */
-};
-
-#define ide_disk_g(disk) \
-	container_of((disk)->private_data, struct ide_disk_obj, driver)
+#include "ide-gd.h"
 
+#ifdef CONFIG_IDE_GD_ATA
 /* ide-disk.c */
-sector_t ide_disk_capacity(ide_drive_t *);
+extern const struct ide_disk_ops ide_ata_disk_ops;
 ide_decl_devset(address);
 ide_decl_devset(multcount);
 ide_decl_devset(nowerr);
@@ -21,12 +13,17 @@ ide_decl_devset(wcache);
 ide_decl_devset(acoustic);
 
 /* ide-disk_ioctl.c */
-int ide_disk_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+int ide_disk_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
+		   unsigned long);
 
 #ifdef CONFIG_IDE_PROC_FS
 /* ide-disk_proc.c */
 extern ide_proc_entry_t ide_disk_proc[];
 extern const struct ide_proc_devset ide_disk_settings[];
 #endif
+#else
+#define ide_disk_proc		NULL
+#define ide_disk_settings	NULL
+#endif
 
 #endif /* __IDE_DISK_H */
diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c
index a6cf1a03a80..a49698bcf96 100644
--- a/drivers/ide/ide-disk_ioctl.c
+++ b/drivers/ide/ide-disk_ioctl.c
@@ -13,12 +13,10 @@ static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = {
 { 0 }
 };
 
-int ide_disk_ioctl(struct inode *inode, struct file *file,
+int ide_disk_ioctl(ide_drive_t *drive, struct inode *inode, struct file *file,
 		   unsigned int cmd, unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
-	ide_drive_t *drive = idkp->drive;
 	int err;
 
 	err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 4724976afe7..1146f4204c6 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -56,7 +56,7 @@ static int proc_idedisk_read_capacity
 	ide_drive_t*drive = (ide_drive_t *)data;
 	int len;
 
-	len = sprintf(page, "%llu\n", (long long)ide_disk_capacity(drive));
+	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
 
 	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
 }
diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index 0903782689e..cac431f0df1 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -130,7 +130,7 @@ int ide_build_dmatable(ide_drive_t *drive, struct request *rq)
 			xcount = bcount & 0xffff;
 			if (is_trm290)
 				xcount = ((xcount >> 2) - 1) << 16;
-			if (xcount == 0x0000) {
+			else if (xcount == 0x0000) {
 				if (count++ >= PRD_ENTRIES)
 					goto use_pio_instead;
 				*table++ = cpu_to_le32(0x8000);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index cf0aa25470e..aeb1ad782f5 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -15,12 +15,6 @@
  * Documentation/ide/ChangeLog.ide-floppy.1996-2002
  */
 
-#define DRV_NAME "ide-floppy"
-#define PFX DRV_NAME ": "
-
-#define IDEFLOPPY_VERSION "1.00"
-
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
@@ -49,19 +43,6 @@
 
 #include "ide-floppy.h"
 
-/* module parameters */
-static unsigned long debug_mask;
-module_param(debug_mask, ulong, 0644);
-
-/* define to see debug info */
-#define IDEFLOPPY_DEBUG_LOG	0
-
-#if IDEFLOPPY_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
 /*
  * After each failed packet command we issue a request sense command and retry
  * the packet command IDEFLOPPY_MAX_PC_RETRIES times.
@@ -83,43 +64,13 @@ module_param(debug_mask, ulong, 0644);
 /* Error code returned in rq->errors to the higher part of the driver. */
 #define	IDEFLOPPY_ERROR_GENERAL		101
 
-static DEFINE_MUTEX(idefloppy_ref_mutex);
-
-static void idefloppy_cleanup_obj(struct kref *);
-
-static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = NULL;
-
-	mutex_lock(&idefloppy_ref_mutex);
-	floppy = ide_drv_g(disk, ide_floppy_obj);
-	if (floppy) {
-		if (ide_device_get(floppy->drive))
-			floppy = NULL;
-		else
-			kref_get(&floppy->kref);
-	}
-	mutex_unlock(&idefloppy_ref_mutex);
-	return floppy;
-}
-
-static void ide_floppy_put(struct ide_floppy_obj *floppy)
-{
-	ide_drive_t *drive = floppy->drive;
-
-	mutex_lock(&idefloppy_ref_mutex);
-	kref_put(&floppy->kref, idefloppy_cleanup_obj);
-	ide_device_put(drive);
-	mutex_unlock(&idefloppy_ref_mutex);
-}
-
 /*
  * Used to finish servicing a request. For read/write requests, we will call
  * ide_end_request to pass to the next buffer.
  */
-static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
+static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct request *rq = HWGROUP(drive)->rq;
 	int error;
 
@@ -161,12 +112,12 @@ static void idefloppy_update_buffers(ide_drive_t *drive,
 	struct bio *bio = rq->bio;
 
 	while ((bio = rq->bio) != NULL)
-		idefloppy_end_request(drive, 1, 0);
+		ide_floppy_end_request(drive, 1, 0);
 }
 
 static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc *pc = drive->pc;
 	int uptodate = pc->error ? 0 : 1;
 
@@ -200,10 +151,10 @@ static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 			       "Aborting request!\n");
 	}
 
-	idefloppy_end_request(drive, uptodate, 0);
+	ide_floppy_end_request(drive, uptodate, 0);
 }
 
-static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
+static void ide_floppy_report_error(struct ide_disk_obj *floppy,
 				    struct ide_atapi_pc *pc)
 {
 	/* supress error messages resulting from Medium not present */
@@ -222,7 +173,7 @@ static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
 static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
 		struct ide_atapi_pc *pc)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 
 	if (floppy->failed_pc == NULL &&
 	    pc->c[0] != GPCMD_REQUEST_SENSE)
@@ -286,7 +237,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 				    struct ide_atapi_pc *pc, struct request *rq,
 				    unsigned long sector)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	int block = sector / floppy->bs_factor;
 	int blocks = rq->nr_sectors / floppy->bs_factor;
 	int cmd = rq_data_dir(rq);
@@ -310,7 +261,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 	pc->flags |= PC_FLAG_DMA_OK;
 }
 
-static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
+static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
 		struct ide_atapi_pc *pc, struct request *rq)
 {
 	ide_init_pc(pc);
@@ -329,13 +280,12 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
 	pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
-static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
-		struct request *rq, sector_t block_s)
+static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
+					     struct request *rq, sector_t block)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
 	struct ide_atapi_pc *pc;
-	unsigned long block = (unsigned long)block_s;
 
 	ide_debug_log(IDE_DBG_FUNC, "%s: dev: %s, cmd: 0x%x, cmd_type: %x, "
 		      "errors: %d\n",
@@ -353,7 +303,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		else
 			printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
 
-		idefloppy_end_request(drive, 0, 0);
+		ide_floppy_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
 	if (blk_fs_request(rq)) {
@@ -361,11 +311,11 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		    (rq->nr_sectors % floppy->bs_factor)) {
 			printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
 				drive->name);
-			idefloppy_end_request(drive, 0, 0);
+			ide_floppy_end_request(drive, 0, 0);
 			return ide_stopped;
 		}
 		pc = &floppy->queued_pc;
-		idefloppy_create_rw_cmd(drive, pc, rq, block);
+		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
 	} else if (blk_special_request(rq)) {
 		pc = (struct ide_atapi_pc *) rq->buffer;
 	} else if (blk_pc_request(rq)) {
@@ -373,7 +323,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		idefloppy_blockpc_cmd(floppy, pc, rq);
 	} else {
 		blk_dump_rq_flags(rq, PFX "unsupported command in queue");
-		idefloppy_end_request(drive, 0, 0);
+		ide_floppy_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
 
@@ -394,7 +344,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
  */
 static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	struct ide_atapi_pc pc;
 	u8 *page;
@@ -410,11 +360,11 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 	}
 
 	if (pc.buf[3] & 0x80)
-		drive->atapi_flags |= IDE_AFLAG_WP;
+		drive->dev_flags |= IDE_DFLAG_WP;
 	else
-		drive->atapi_flags &= ~IDE_AFLAG_WP;
+		drive->dev_flags &= ~IDE_DFLAG_WP;
 
-	set_disk_ro(disk, !!(drive->atapi_flags & IDE_AFLAG_WP));
+	set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP));
 
 	page = &pc.buf[8];
 
@@ -445,7 +395,9 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 			drive->name, lba_capacity, capacity);
 		floppy->blocks = floppy->block_size ?
 			capacity / floppy->block_size : 0;
+		drive->capacity64 = floppy->blocks * floppy->bs_factor;
 	}
+
 	return 0;
 }
 
@@ -455,7 +407,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
  */
 static int ide_floppy_get_capacity(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	struct ide_atapi_pc pc;
 	u8 *cap_desc;
@@ -466,7 +418,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	drive->bios_head = drive->bios_sect = 0;
 	floppy->blocks = 0;
 	floppy->bs_factor = 1;
-	set_capacity(floppy->disk, 0);
+	drive->capacity64 = 0;
 
 	ide_floppy_create_read_capacity_cmd(&pc);
 	if (ide_queue_pc_tail(drive, disk, &pc)) {
@@ -523,6 +475,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 					       "non 512 bytes block size not "
 					       "fully supported\n",
 					       drive->name);
+				drive->capacity64 =
+					floppy->blocks * floppy->bs_factor;
 				rc = 0;
 			}
 			break;
@@ -547,21 +501,12 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
 		(void) ide_floppy_get_flexible_disk_page(drive);
 
-	set_capacity(disk, floppy->blocks * floppy->bs_factor);
-
 	return rc;
 }
 
-sector_t ide_floppy_capacity(ide_drive_t *drive)
-{
-	idefloppy_floppy_t *floppy = drive->driver_data;
-	unsigned long capacity = floppy->blocks * floppy->bs_factor;
-
-	return capacity;
-}
-
-static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
+static void ide_floppy_setup(ide_drive_t *drive)
 {
+	struct ide_disk_obj *floppy = drive->driver_data;
 	u16 *id = drive->id;
 
 	drive->pc_callback	 = ide_floppy_callback;
@@ -592,252 +537,42 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
 		blk_queue_max_sectors(drive->queue, 64);
 		drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
 		/* IOMEGA Clik! drives do not support lock/unlock commands */
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 	}
 
 	(void) ide_floppy_get_capacity(drive);
 
 	ide_proc_register_driver(drive, floppy->driver);
-}
 
-static void ide_floppy_remove(ide_drive_t *drive)
-{
-	idefloppy_floppy_t *floppy = drive->driver_data;
-	struct gendisk *g = floppy->disk;
-
-	ide_proc_unregister_driver(drive, floppy->driver);
-
-	del_gendisk(g);
-
-	ide_floppy_put(floppy);
+	drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
-static void idefloppy_cleanup_obj(struct kref *kref)
+static void ide_floppy_flush(ide_drive_t *drive)
 {
-	struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-	struct gendisk *g = floppy->disk;
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(floppy);
 }
 
-static int ide_floppy_probe(ide_drive_t *);
-
-static ide_driver_t idefloppy_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-floppy",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_floppy_probe,
-	.remove			= ide_floppy_remove,
-	.version		= IDEFLOPPY_VERSION,
-	.do_request		= idefloppy_do_request,
-	.end_request		= idefloppy_end_request,
-	.error			= __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_floppy_proc,
-	.settings		= ide_floppy_settings,
-#endif
-};
-
-static int idefloppy_open(struct inode *inode, struct file *filp)
+static int ide_floppy_init_media(ide_drive_t *drive, struct gendisk *disk)
 {
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy;
-	ide_drive_t *drive;
 	int ret = 0;
 
-	floppy = ide_floppy_get(disk);
-	if (!floppy)
-		return -ENXIO;
-
-	drive = floppy->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	floppy->openers++;
-
-	if (floppy->openers == 1) {
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
-		/* Just in case */
-
-		if (ide_do_test_unit_ready(drive, disk))
-			ide_do_start_stop(drive, disk, 1);
-
-		if (ide_floppy_get_capacity(drive)
-		   && (filp->f_flags & O_NDELAY) == 0
-		    /*
-		     * Allow O_NDELAY to open a drive without a disk, or with an
-		     * unreadable disk, so that we can get the format capacity
-		     * of the drive or begin the format - Sam
-		     */
-		    ) {
-			ret = -EIO;
-			goto out_put_floppy;
-		}
-
-		if ((drive->atapi_flags & IDE_AFLAG_WP) && (filp->f_mode & 2)) {
-			ret = -EROFS;
-			goto out_put_floppy;
-		}
-
-		drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
-		ide_set_media_lock(drive, disk, 1);
-		check_disk_change(inode->i_bdev);
-	} else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) {
-		ret = -EBUSY;
-		goto out_put_floppy;
-	}
-	return 0;
-
-out_put_floppy:
-	floppy->openers--;
-	ide_floppy_put(floppy);
-	return ret;
-}
-
-static int idefloppy_release(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	if (floppy->openers == 1) {
-		ide_set_media_lock(drive, disk, 0);
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
-	}
-
-	floppy->openers--;
-
-	ide_floppy_put(floppy);
-
-	return 0;
-}
-
-static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-						     ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
+	if (ide_do_test_unit_ready(drive, disk))
+		ide_do_start_stop(drive, disk, 1);
 
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
-}
+	ret = ide_floppy_get_capacity(drive);
 
-static int idefloppy_media_changed(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-	int ret;
+	set_capacity(disk, ide_gd_capacity(drive));
 
-	/* do not scan partitions twice if this is a removable device */
-	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-		return 0;
-	}
-	ret = !!(drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED);
-	drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
 	return ret;
 }
 
-static int idefloppy_revalidate_disk(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	set_capacity(disk, ide_floppy_capacity(floppy->drive));
-	return 0;
-}
-
-static struct block_device_operations idefloppy_ops = {
-	.owner			= THIS_MODULE,
-	.open			= idefloppy_open,
-	.release		= idefloppy_release,
-	.ioctl			= ide_floppy_ioctl,
-	.getgeo			= idefloppy_getgeo,
-	.media_changed		= idefloppy_media_changed,
-	.revalidate_disk	= idefloppy_revalidate_disk
+const struct ide_disk_ops ide_atapi_disk_ops = {
+	.check		= ide_check_atapi_device,
+	.get_capacity	= ide_floppy_get_capacity,
+	.setup		= ide_floppy_setup,
+	.flush		= ide_floppy_flush,
+	.init_media	= ide_floppy_init_media,
+	.set_doorlock	= ide_set_media_lock,
+	.do_request	= ide_floppy_do_request,
+	.end_request	= ide_floppy_end_request,
+	.ioctl		= ide_floppy_ioctl,
 };
-
-static int ide_floppy_probe(ide_drive_t *drive)
-{
-	idefloppy_floppy_t *floppy;
-	struct gendisk *g;
-
-	if (!strstr("ide-floppy", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_floppy)
-		goto failed;
-
-	if (!ide_check_atapi_device(drive, DRV_NAME)) {
-		printk(KERN_ERR PFX "%s: not supported by this version of "
-		       DRV_NAME "\n", drive->name);
-		goto failed;
-	}
-	floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
-	if (!floppy) {
-		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
-		       drive->name);
-		goto failed;
-	}
-
-	g = alloc_disk(1 << PARTN_BITS);
-	if (!g)
-		goto out_free_floppy;
-
-	ide_init_disk(g, drive);
-
-	kref_init(&floppy->kref);
-
-	floppy->drive = drive;
-	floppy->driver = &idefloppy_driver;
-	floppy->disk = g;
-
-	g->private_data = &floppy->driver;
-
-	drive->driver_data = floppy;
-
-	drive->debug_mask = debug_mask;
-
-	idefloppy_setup(drive, floppy);
-	drive->dev_flags |= IDE_DFLAG_ATTACH;
-
-	g->minors = 1 << PARTN_BITS;
-	g->driverfs_dev = &drive->gendev;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &idefloppy_ops;
-	add_disk(g);
-	return 0;
-
-out_free_floppy:
-	kfree(floppy);
-failed:
-	return -ENODEV;
-}
-
-static void __exit idefloppy_exit(void)
-{
-	driver_unregister(&idefloppy_driver.gen_driver);
-}
-
-static int __init idefloppy_init(void)
-{
-	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
-	return driver_register(&idefloppy_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-floppy*");
-MODULE_ALIAS("ide-floppy");
-module_init(idefloppy_init);
-module_exit(idefloppy_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
-
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index 17cf865e583..c17124dd607 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -1,37 +1,9 @@
 #ifndef __IDE_FLOPPY_H
 #define __IDE_FLOPPY_H
 
-/*
- * Most of our global data which we need to save even as we leave the driver
- * due to an interrupt or a timer event is stored in a variable of type
- * idefloppy_floppy_t, defined below.
- */
-typedef struct ide_floppy_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
-	unsigned int	openers;	/* protected by BKL for now */
-
-	/* Last failed packet command */
-	struct ide_atapi_pc *failed_pc;
-	/* used for blk_{fs,pc}_request() requests */
-	struct ide_atapi_pc queued_pc;
-
-	/* Last error information */
-	u8 sense_key, asc, ascq;
-
-	int progress_indication;
-
-	/* Device information */
-	/* Current format */
-	int blocks, block_size, bs_factor;
-	/* Last format capacity descriptor */
-	u8 cap_desc[8];
-	/* Copy of the flexible disk page */
-	u8 flexible_disk_page[32];
-} idefloppy_floppy_t;
+#include "ide-gd.h"
 
+#ifdef CONFIG_IDE_GD_ATAPI
 /*
  * Pages of the SELECT SENSE / MODE SENSE packet commands.
  * See SFF-8070i spec.
@@ -46,17 +18,22 @@ typedef struct ide_floppy_obj {
 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS	0x4603
 
 /* ide-floppy.c */
+extern const struct ide_disk_ops ide_atapi_disk_ops;
 void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
-sector_t ide_floppy_capacity(ide_drive_t *);
 
 /* ide-floppy_ioctl.c */
-int ide_floppy_ioctl(struct inode *, struct file *, unsigned, unsigned long);
+int ide_floppy_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
+		     unsigned long);
 
 #ifdef CONFIG_IDE_PROC_FS
 /* ide-floppy_proc.c */
 extern ide_proc_entry_t ide_floppy_proc[];
 extern const struct ide_proc_devset ide_floppy_settings[];
 #endif
+#else
+#define ide_floppy_proc		NULL
+#define ide_floppy_settings	NULL
+#endif
 
 #endif /*__IDE_FLOPPY_H */
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index a3a7a0809e2..409e4c15f9b 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -33,7 +33,7 @@
 
 static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
 {
-	struct ide_floppy_obj *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	u8 header_len, desc_cnt;
 	int i, blocks, length, u_array_size, u_index;
@@ -113,7 +113,7 @@ static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b,
 
 static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 
 	drive->atapi_flags &= ~IDE_AFLAG_SRFP;
@@ -132,17 +132,17 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
 
 static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	int blocks, length, flags, err = 0;
 
 	if (floppy->openers > 1) {
 		/* Don't format if someone is using the disk */
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 		return -EBUSY;
 	}
 
-	drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS;
+	drive->dev_flags |= IDE_DFLAG_FORMAT_IN_PROGRESS;
 
 	/*
 	 * Send ATAPI_FORMAT_UNIT to the drive.
@@ -174,7 +174,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 
 out:
 	if (err)
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 	return err;
 }
 
@@ -190,7 +190,7 @@ out:
 
 static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	int progress_indication = 0x10000;
 
@@ -226,7 +226,7 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
 static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc,
 			       unsigned long arg, unsigned int cmd)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0;
 
@@ -260,13 +260,10 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file,
 	}
 }
 
-int ide_floppy_ioctl(struct inode *inode, struct file *file,
-		    unsigned int cmd, unsigned long arg)
+int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode,
+		     struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-						     ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
 	struct ide_atapi_pc pc;
 	void __user *argp = (void __user *)arg;
 	int err;
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index 76f0c6c4eca..3ec762cb60a 100644
--- a/drivers/ide/ide-floppy_proc.c
+++ b/drivers/ide/ide-floppy_proc.c
@@ -9,7 +9,7 @@ static int proc_idefloppy_read_capacity(char *page, char **start, off_t off,
 	ide_drive_t*drive = (ide_drive_t *)data;
 	int len;
 
-	len = sprintf(page, "%llu\n", (long long)ide_floppy_capacity(drive));
+	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
 	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
 }
 
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
new file mode 100644
index 00000000000..d44898f46c3
--- /dev/null
+++ b/drivers/ide/ide-gd.c
@@ -0,0 +1,398 @@
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/genhd.h>
+#include <linux/mutex.h>
+#include <linux/ide.h>
+#include <linux/hdreg.h>
+
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
+#define IDE_DISK_MINORS		(1 << PARTN_BITS)
+#else
+#define IDE_DISK_MINORS		0
+#endif
+
+#include "ide-disk.h"
+#include "ide-floppy.h"
+
+#define IDE_GD_VERSION	"1.18"
+
+/* module parameters */
+static unsigned long debug_mask;
+module_param(debug_mask, ulong, 0644);
+
+static DEFINE_MUTEX(ide_disk_ref_mutex);
+
+static void ide_disk_release(struct kref *);
+
+static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = NULL;
+
+	mutex_lock(&ide_disk_ref_mutex);
+	idkp = ide_drv_g(disk, ide_disk_obj);
+	if (idkp) {
+		if (ide_device_get(idkp->drive))
+			idkp = NULL;
+		else
+			kref_get(&idkp->kref);
+	}
+	mutex_unlock(&ide_disk_ref_mutex);
+	return idkp;
+}
+
+static void ide_disk_put(struct ide_disk_obj *idkp)
+{
+	ide_drive_t *drive = idkp->drive;
+
+	mutex_lock(&ide_disk_ref_mutex);
+	kref_put(&idkp->kref, ide_disk_release);
+	ide_device_put(drive);
+	mutex_unlock(&ide_disk_ref_mutex);
+}
+
+sector_t ide_gd_capacity(ide_drive_t *drive)
+{
+	return drive->capacity64;
+}
+
+static int ide_gd_probe(ide_drive_t *);
+
+static void ide_gd_remove(ide_drive_t *drive)
+{
+	struct ide_disk_obj *idkp = drive->driver_data;
+	struct gendisk *g = idkp->disk;
+
+	ide_proc_unregister_driver(drive, idkp->driver);
+
+	del_gendisk(g);
+
+	drive->disk_ops->flush(drive);
+
+	ide_disk_put(idkp);
+}
+
+static void ide_disk_release(struct kref *kref)
+{
+	struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+	struct gendisk *g = idkp->disk;
+
+	drive->disk_ops = NULL;
+	drive->driver_data = NULL;
+	g->private_data = NULL;
+	put_disk(g);
+	kfree(idkp);
+}
+
+/*
+ * On HPA drives the capacity needs to be
+ * reinitilized on resume otherwise the disk
+ * can not be used and a hard reset is required
+ */
+static void ide_gd_resume(ide_drive_t *drive)
+{
+	if (ata_id_hpa_enabled(drive->id))
+		(void)drive->disk_ops->get_capacity(drive);
+}
+
+static void ide_gd_shutdown(ide_drive_t *drive)
+{
+#ifdef	CONFIG_ALPHA
+	/* On Alpha, halt(8) doesn't actually turn the machine off,
+	   it puts you into the sort of firmware monitor. Typically,
+	   it's used to boot another kernel image, so it's not much
+	   different from reboot(8). Therefore, we don't need to
+	   spin down the disk in this case, especially since Alpha
+	   firmware doesn't handle disks in standby mode properly.
+	   On the other hand, it's reasonably safe to turn the power
+	   off when the shutdown process reaches the firmware prompt,
+	   as the firmware initialization takes rather long time -
+	   at least 10 seconds, which should be sufficient for
+	   the disk to expire its write cache. */
+	if (system_state != SYSTEM_POWER_OFF) {
+#else
+	if (system_state == SYSTEM_RESTART) {
+#endif
+		drive->disk_ops->flush(drive);
+		return;
+	}
+
+	printk(KERN_INFO "Shutdown: %s\n", drive->name);
+
+	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
+}
+
+#ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t *ide_disk_proc_entries(ide_drive_t *drive)
+{
+	return (drive->media == ide_disk) ? ide_disk_proc : ide_floppy_proc;
+}
+
+static const struct ide_proc_devset *ide_disk_proc_devsets(ide_drive_t *drive)
+{
+	return (drive->media == ide_disk) ? ide_disk_settings
+					  : ide_floppy_settings;
+}
+#endif
+
+static ide_startstop_t ide_gd_do_request(ide_drive_t *drive,
+					 struct request *rq, sector_t sector)
+{
+	return drive->disk_ops->do_request(drive, rq, sector);
+}
+
+static int ide_gd_end_request(ide_drive_t *drive, int uptodate, int nrsecs)
+{
+	return drive->disk_ops->end_request(drive, uptodate, nrsecs);
+}
+
+static ide_driver_t ide_gd_driver = {
+	.gen_driver = {
+		.owner		= THIS_MODULE,
+		.name		= "ide-gd",
+		.bus		= &ide_bus_type,
+	},
+	.probe			= ide_gd_probe,
+	.remove			= ide_gd_remove,
+	.resume			= ide_gd_resume,
+	.shutdown		= ide_gd_shutdown,
+	.version		= IDE_GD_VERSION,
+	.do_request		= ide_gd_do_request,
+	.end_request		= ide_gd_end_request,
+	.error			= __ide_error,
+#ifdef CONFIG_IDE_PROC_FS
+	.proc_entries		= ide_disk_proc_entries,
+	.proc_devsets		= ide_disk_proc_devsets,
+#endif
+};
+
+static int ide_gd_open(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_disk_obj *idkp;
+	ide_drive_t *drive;
+	int ret = 0;
+
+	idkp = ide_disk_get(disk);
+	if (idkp == NULL)
+		return -ENXIO;
+
+	drive = idkp->drive;
+
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
+	idkp->openers++;
+
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+		/* Just in case */
+
+		ret = drive->disk_ops->init_media(drive, disk);
+
+		/*
+		 * Allow O_NDELAY to open a drive without a disk, or with an
+		 * unreadable disk, so that we can get the format capacity
+		 * of the drive or begin the format - Sam
+		 */
+		if (ret && (filp->f_flags & O_NDELAY) == 0) {
+			ret = -EIO;
+			goto out_put_idkp;
+		}
+
+		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
+			ret = -EROFS;
+			goto out_put_idkp;
+		}
+
+		/*
+		 * Ignore the return code from door_lock,
+		 * since the open() has already succeeded,
+		 * and the door_lock is irrelevant at this point.
+		 */
+		drive->disk_ops->set_doorlock(drive, disk, 1);
+		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
+		check_disk_change(inode->i_bdev);
+	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
+		ret = -EBUSY;
+		goto out_put_idkp;
+	}
+	return 0;
+
+out_put_idkp:
+	idkp->openers--;
+	ide_disk_put(idkp);
+	return ret;
+}
+
+static int ide_gd_release(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
+	if (idkp->openers == 1)
+		drive->disk_ops->flush(drive);
+
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+		drive->disk_ops->set_doorlock(drive, disk, 0);
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+	}
+
+	idkp->openers--;
+
+	ide_disk_put(idkp);
+
+	return 0;
+}
+
+static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	geo->heads = drive->bios_head;
+	geo->sectors = drive->bios_sect;
+	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
+	return 0;
+}
+
+static int ide_gd_media_changed(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+	int ret;
+
+	/* do not scan partitions twice if this is a removable device */
+	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
+		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+		return 0;
+	}
+
+	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
+
+	return ret;
+}
+
+static int ide_gd_revalidate_disk(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	set_capacity(disk, ide_gd_capacity(idkp->drive));
+	return 0;
+}
+
+static int ide_gd_ioctl(struct inode *inode, struct file *file,
+			     unsigned int cmd, unsigned long arg)
+{
+	struct block_device *bdev = inode->i_bdev;
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	return drive->disk_ops->ioctl(drive, inode, file, cmd, arg);
+}
+
+static struct block_device_operations ide_gd_ops = {
+	.owner			= THIS_MODULE,
+	.open			= ide_gd_open,
+	.release		= ide_gd_release,
+	.ioctl			= ide_gd_ioctl,
+	.getgeo			= ide_gd_getgeo,
+	.media_changed		= ide_gd_media_changed,
+	.revalidate_disk	= ide_gd_revalidate_disk
+};
+
+static int ide_gd_probe(ide_drive_t *drive)
+{
+	const struct ide_disk_ops *disk_ops = NULL;
+	struct ide_disk_obj *idkp;
+	struct gendisk *g;
+
+	/* strstr("foo", "") is non-NULL */
+	if (!strstr("ide-gd", drive->driver_req))
+		goto failed;
+
+#ifdef CONFIG_IDE_GD_ATA
+	if (drive->media == ide_disk)
+		disk_ops = &ide_ata_disk_ops;
+#endif
+#ifdef CONFIG_IDE_GD_ATAPI
+	if (drive->media == ide_floppy)
+		disk_ops = &ide_atapi_disk_ops;
+#endif
+	if (disk_ops == NULL)
+		goto failed;
+
+	if (disk_ops->check(drive, DRV_NAME) == 0) {
+		printk(KERN_ERR PFX "%s: not supported by this driver\n",
+			drive->name);
+		goto failed;
+	}
+
+	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
+	if (!idkp) {
+		printk(KERN_ERR PFX "%s: can't allocate a disk structure\n",
+			drive->name);
+		goto failed;
+	}
+
+	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
+	if (!g)
+		goto out_free_idkp;
+
+	ide_init_disk(g, drive);
+
+	kref_init(&idkp->kref);
+
+	idkp->drive = drive;
+	idkp->driver = &ide_gd_driver;
+	idkp->disk = g;
+
+	g->private_data = &idkp->driver;
+
+	drive->driver_data = idkp;
+	drive->debug_mask = debug_mask;
+	drive->disk_ops = disk_ops;
+
+	disk_ops->setup(drive);
+
+	set_capacity(g, ide_gd_capacity(drive));
+
+	g->minors = IDE_DISK_MINORS;
+	g->driverfs_dev = &drive->gendev;
+	g->flags |= GENHD_FL_EXT_DEVT;
+	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
+		g->flags = GENHD_FL_REMOVABLE;
+	g->fops = &ide_gd_ops;
+	add_disk(g);
+	return 0;
+
+out_free_idkp:
+	kfree(idkp);
+failed:
+	return -ENODEV;
+}
+
+static int __init ide_gd_init(void)
+{
+	printk(KERN_INFO DRV_NAME " driver " IDE_GD_VERSION "\n");
+	return driver_register(&ide_gd_driver.gen_driver);
+}
+
+static void __exit ide_gd_exit(void)
+{
+	driver_unregister(&ide_gd_driver.gen_driver);
+}
+
+MODULE_ALIAS("ide:*m-disk*");
+MODULE_ALIAS("ide-disk");
+MODULE_ALIAS("ide:*m-floppy*");
+MODULE_ALIAS("ide-floppy");
+module_init(ide_gd_init);
+module_exit(ide_gd_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("generic ATA/ATAPI disk driver");
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
new file mode 100644
index 00000000000..7d3d101713e
--- /dev/null
+++ b/drivers/ide/ide-gd.h
@@ -0,0 +1,44 @@
+#ifndef __IDE_GD_H
+#define __IDE_GD_H
+
+#define DRV_NAME "ide-gd"
+#define PFX DRV_NAME ": "
+
+/* define to see debug info */
+#define IDE_GD_DEBUG_LOG	0
+
+#if IDE_GD_DEBUG_LOG
+#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
+#else
+#define ide_debug_log(lvl, fmt, args...) do {} while (0)
+#endif
+
+struct ide_disk_obj {
+	ide_drive_t	*drive;
+	ide_driver_t	*driver;
+	struct gendisk	*disk;
+	struct kref	kref;
+	unsigned int	openers;	/* protected by BKL for now */
+
+	/* Last failed packet command */
+	struct ide_atapi_pc *failed_pc;
+	/* used for blk_{fs,pc}_request() requests */
+	struct ide_atapi_pc queued_pc;
+
+	/* Last error information */
+	u8 sense_key, asc, ascq;
+
+	int progress_indication;
+
+	/* Device information */
+	/* Current format */
+	int blocks, block_size, bs_factor;
+	/* Last format capacity descriptor */
+	u8 cap_desc[8];
+	/* Copy of the flexible disk page */
+	u8 flexible_disk_page[32];
+};
+
+sector_t ide_gd_capacity(ide_drive_t *);
+
+#endif /* __IDE_GD_H */
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index b762deb2dac..bb7a1ed8094 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -755,7 +755,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 	 
 	udelay(1);
 	SELECT_DRIVE(drive);
-	SELECT_MASK(drive, 0);
+	SELECT_MASK(drive, 1);
 	udelay(1);
 	tp_ops->set_irq(hwif, 0);
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 19f8c7770a2..1649ea54f76 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -208,6 +208,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 		drive->ready_stat = 0;
 		if (ata_id_cdb_intr(id))
 			drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
+		drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
 		/* we don't do head unloading on ATAPI devices */
 		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
 		return;
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index b26926487cc..c31d0dd7a53 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -567,10 +567,10 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t
 void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver)
 {
 	mutex_lock(&ide_setting_mtx);
-	drive->settings = driver->settings;
+	drive->settings = driver->proc_devsets(drive);
 	mutex_unlock(&ide_setting_mtx);
 
-	ide_add_proc_entries(drive->proc, driver->proc, drive);
+	ide_add_proc_entries(drive->proc, driver->proc_entries(drive), drive);
 }
 
 EXPORT_SYMBOL(ide_proc_register_driver);
@@ -591,7 +591,7 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
 {
 	unsigned long flags;
 
-	ide_remove_proc_entries(drive->proc, driver->proc);
+	ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
 
 	mutex_lock(&ide_setting_mtx);
 	spin_lock_irqsave(&ide_lock, flags);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d879c7797cd..b2b2e5e8d38 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2108,7 +2108,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
 
 	/* device lacks locking support according to capabilities page */
 	if ((caps[6] & 1) == 0)
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 
 	if (caps[7] & 0x02)
 		tape->blk_size = 512;
@@ -2298,6 +2298,16 @@ static ide_proc_entry_t idetape_proc[] = {
 	{ "name",	S_IFREG|S_IRUGO,	proc_idetape_read_name,	NULL },
 	{ NULL, 0, NULL, NULL }
 };
+
+static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive)
+{
+	return idetape_proc;
+}
+
+static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive)
+{
+	return idetape_settings;
+}
 #endif
 
 static int ide_tape_probe(ide_drive_t *);
@@ -2315,8 +2325,8 @@ static ide_driver_t idetape_driver = {
 	.end_request		= idetape_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idetape_proc,
-	.settings		= idetape_settings,
+	.proc_entries		= ide_tape_proc_entries,
+	.proc_devsets		= ide_tape_proc_devsets,
 #endif
 };
 
diff --git a/drivers/ide/pci/Makefile b/drivers/ide/pci/Makefile
index 02e6ee7d751..ab44a1f5f5a 100644
--- a/drivers/ide/pci/Makefile
+++ b/drivers/ide/pci/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_BLK_DEV_CS5535)		+= cs5535.o
 obj-$(CONFIG_BLK_DEV_SC1200)		+= sc1200.o
 obj-$(CONFIG_BLK_DEV_CY82C693)		+= cy82c693.o
 obj-$(CONFIG_BLK_DEV_DELKIN)		+= delkin_cb.o
-obj-$(CONFIG_BLK_DEV_HPT34X)		+= hpt34x.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
 obj-$(CONFIG_BLK_DEV_IT8213)		+= it8213.o
 obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c
index 8689a706f53..8f1b2d9f051 100644
--- a/drivers/ide/pci/delkin_cb.c
+++ b/drivers/ide/pci/delkin_cb.c
@@ -46,10 +46,27 @@ static const struct ide_port_ops delkin_cb_port_ops = {
 	.quirkproc		= ide_undecoded_slave,
 };
 
+static unsigned int delkin_cb_init_chipset(struct pci_dev *dev)
+{
+	unsigned long base = pci_resource_start(dev, 0);
+	int i;
+
+	outb(0x02, base + 0x1e);	/* set nIEN to block interrupts */
+	inb(base + 0x17);		/* read status to clear interrupts */
+
+	for (i = 0; i < sizeof(setup); ++i) {
+		if (setup[i])
+			outb(setup[i], base + i);
+	}
+
+	return 0;
+}
+
 static const struct ide_port_info delkin_cb_port_info = {
 	.port_ops		= &delkin_cb_port_ops,
 	.host_flags		= IDE_HFLAG_IO_32BIT | IDE_HFLAG_UNMASK_IRQS |
 				  IDE_HFLAG_NO_DMA,
+	.init_chipset		= delkin_cb_init_chipset,
 };
 
 static int __devinit
@@ -57,7 +74,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct ide_host *host;
 	unsigned long base;
-	int i, rc;
+	int rc;
 	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	rc = pci_enable_device(dev);
@@ -72,12 +89,8 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 		return rc;
 	}
 	base = pci_resource_start(dev, 0);
-	outb(0x02, base + 0x1e);	/* set nIEN to block interrupts */
-	inb(base + 0x17);		/* read status to clear interrupts */
-	for (i = 0; i < sizeof(setup); ++i) {
-		if (setup[i])
-			outb(setup[i], base + i);
-	}
+
+	delkin_cb_init_chipset(dev);
 
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
@@ -110,6 +123,40 @@ delkin_cb_remove (struct pci_dev *dev)
 	pci_disable_device(dev);
 }
 
+#ifdef CONFIG_PM
+static int delkin_cb_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	pci_save_state(dev);
+	pci_disable_device(dev);
+	pci_set_power_state(dev, pci_choose_state(dev, state));
+
+	return 0;
+}
+
+static int delkin_cb_resume(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	int rc;
+
+	pci_set_power_state(dev, PCI_D0);
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	pci_restore_state(dev);
+	pci_set_master(dev);
+
+	if (host->init_chipset)
+		host->init_chipset(dev);
+
+	return 0;
+}
+#else
+#define delkin_cb_suspend NULL
+#define delkin_cb_resume NULL
+#endif
+
 static struct pci_device_id delkin_cb_pci_tbl[] __devinitdata = {
 	{ 0x1145, 0xf021, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{ 0x1145, 0xf024, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
@@ -122,6 +169,8 @@ static struct pci_driver delkin_cb_pci_driver = {
 	.id_table	= delkin_cb_pci_tbl,
 	.probe		= delkin_cb_probe,
 	.remove		= delkin_cb_remove,
+	.suspend	= delkin_cb_suspend,
+	.resume		= delkin_cb_resume,
 };
 
 static int __init delkin_cb_init(void)
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
deleted file mode 100644
index fb1a3aa57f0..00000000000
--- a/drivers/ide/pci/hpt34x.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 1998-2000	Andre Hedrick <andre@linux-ide.org>
- *
- * May be copied or modified under the terms of the GNU General Public License
- *
- *
- * 00:12.0 Unknown mass storage controller:
- * Triones Technologies, Inc.
- * Unknown device 0003 (rev 01)
- *
- * hde: UDMA 2 (0x0000 0x0002) (0x0000 0x0010)
- * hdf: UDMA 2 (0x0002 0x0012) (0x0010 0x0030)
- * hde: DMA 2  (0x0000 0x0002) (0x0000 0x0010)
- * hdf: DMA 2  (0x0002 0x0012) (0x0010 0x0030)
- * hdg: DMA 1  (0x0012 0x0052) (0x0030 0x0070)
- * hdh: DMA 1  (0x0052 0x0252) (0x0070 0x00f0)
- *
- * ide-pci.c reference
- *
- * Since there are two cards that report almost identically,
- * the only discernable difference is the values reported in pcicmd.
- * Booting-BIOS card or HPT363 :: pcicmd == 0x07
- * Non-bootable card or HPT343 :: pcicmd == 0x05
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-
-#define DRV_NAME "hpt34x"
-
-#define HPT343_DEBUG_DRIVE_INFO		0
-
-static void hpt34x_set_mode(ide_drive_t *drive, const u8 speed)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u32 reg1= 0, tmp1 = 0, reg2 = 0, tmp2 = 0;
-	u8			hi_speed, lo_speed;
-
-	hi_speed = speed >> 4;
-	lo_speed = speed & 0x0f;
-
-	if (hi_speed & 7) {
-		hi_speed = (hi_speed & 4) ? 0x01 : 0x10;
-	} else {
-		lo_speed <<= 5;
-		lo_speed >>= 5;
-	}
-
-	pci_read_config_dword(dev, 0x44, &reg1);
-	pci_read_config_dword(dev, 0x48, &reg2);
-	tmp1 = ((lo_speed << (3*drive->dn)) | (reg1 & ~(7 << (3*drive->dn))));
-	tmp2 = ((hi_speed << drive->dn) | (reg2 & ~(0x11 << drive->dn)));
-	pci_write_config_dword(dev, 0x44, tmp1);
-	pci_write_config_dword(dev, 0x48, tmp2);
-
-#if HPT343_DEBUG_DRIVE_INFO
-	printk("%s: %s drive%d (0x%04x 0x%04x) (0x%04x 0x%04x)" \
-		" (0x%02x 0x%02x)\n",
-		drive->name, ide_xfer_verbose(speed),
-		drive->dn, reg1, tmp1, reg2, tmp2,
-		hi_speed, lo_speed);
-#endif /* HPT343_DEBUG_DRIVE_INFO */
-}
-
-static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio)
-{
-	hpt34x_set_mode(drive, XFER_PIO_0 + pio);
-}
-
-/*
- * If the BIOS does not set the IO base addaress to XX00, 343 will fail.
- */
-#define	HPT34X_PCI_INIT_REG		0x80
-
-static unsigned int init_chipset_hpt34x(struct pci_dev *dev)
-{
-	int i = 0;
-	unsigned long hpt34xIoBase = pci_resource_start(dev, 4);
-	unsigned long hpt_addr[4] = { 0x20, 0x34, 0x28, 0x3c };
-	unsigned long hpt_addr_len[4] = { 7, 3, 7, 3 };
-	u16 cmd;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	pci_write_config_byte(dev, HPT34X_PCI_INIT_REG, 0x00);
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-
-	if (cmd & PCI_COMMAND_MEMORY)
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF0);
-	else
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
-
-	/*
-	 * Since 20-23 can be assigned and are R/W, we correct them.
-	 */
-	pci_write_config_word(dev, PCI_COMMAND, cmd & ~PCI_COMMAND_IO);
-	for(i=0; i<4; i++) {
-		dev->resource[i].start = (hpt34xIoBase + hpt_addr[i]);
-		dev->resource[i].end = dev->resource[i].start + hpt_addr_len[i];
-		dev->resource[i].flags = IORESOURCE_IO;
-		pci_write_config_dword(dev,
-				(PCI_BASE_ADDRESS_0 + (i * 4)),
-				dev->resource[i].start);
-	}
-	pci_write_config_word(dev, PCI_COMMAND, cmd);
-
-	local_irq_restore(flags);
-
-	return dev->irq;
-}
-
-static const struct ide_port_ops hpt34x_port_ops = {
-	.set_pio_mode		= hpt34x_set_pio_mode,
-	.set_dma_mode		= hpt34x_set_mode,
-};
-
-#define IDE_HFLAGS_HPT34X \
-	(IDE_HFLAG_NO_ATAPI_DMA | \
-	 IDE_HFLAG_NO_DSC | \
-	 IDE_HFLAG_NO_AUTODMA)
-
-static const struct ide_port_info hpt34x_chipsets[] __devinitdata = {
-	{ /* 0: HPT343 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_hpt34x,
-		.port_ops	= &hpt34x_port_ops,
-		.host_flags	= IDE_HFLAGS_HPT34X | IDE_HFLAG_NON_BOOTABLE,
-		.pio_mask	= ATA_PIO5,
-	},
-	{ /* 1: HPT345 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_hpt34x,
-		.port_ops	= &hpt34x_port_ops,
-		.host_flags	= IDE_HFLAGS_HPT34X | IDE_HFLAG_OFF_BOARD,
-		.pio_mask	= ATA_PIO5,
-#ifdef CONFIG_HPT34X_AUTODMA
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA2,
-#endif
-	}
-};
-
-static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	const struct ide_port_info *d;
-	u16 pcicmd = 0;
-
-	pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
-
-	d = &hpt34x_chipsets[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0];
-
-	return ide_pci_init_one(dev, d, NULL);
-}
-
-static const struct pci_device_id hpt34x_pci_tbl[] = {
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT343), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, hpt34x_pci_tbl);
-
-static struct pci_driver hpt34x_pci_driver = {
-	.name		= "HPT34x_IDE",
-	.id_table	= hpt34x_pci_tbl,
-	.probe		= hpt34x_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init hpt34x_ide_init(void)
-{
-	return ide_pci_register_driver(&hpt34x_pci_driver);
-}
-
-static void __exit hpt34x_ide_exit(void)
-{
-	pci_unregister_driver(&hpt34x_pci_driver);
-}
-
-module_init(hpt34x_ide_init);
-module_exit(hpt34x_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for Highpoint 34x IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 9cf171cb937..a7909e9c720 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -3,7 +3,7 @@
  * Portions Copyright (C) 2001	        Sun Microsystems, Inc.
  * Portions Copyright (C) 2003		Red Hat Inc
  * Portions Copyright (C) 2007		Bartlomiej Zolnierkiewicz
- * Portions Copyright (C) 2005-2007	MontaVista Software, Inc.
+ * Portions Copyright (C) 2005-2008	MontaVista Software, Inc.
  *
  * Thanks to HighPoint Technologies for their assistance, and hardware.
  * Special Thanks to Jon Burchmore in SanDiego for the deep pockets, his
@@ -748,26 +748,24 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
-	if (drive->quirk_list) {
-		if (info->chip_type >= HPT370) {
-			u8 scr1 = 0;
-
-			pci_read_config_byte(dev, 0x5a, &scr1);
-			if (((scr1 & 0x10) >> 4) != mask) {
-				if (mask)
-					scr1 |=  0x10;
-				else
-					scr1 &= ~0x10;
-				pci_write_config_byte(dev, 0x5a, scr1);
-			}
-		} else {
+	if (drive->quirk_list == 0)
+		return;
+
+	if (info->chip_type >= HPT370) {
+		u8 scr1 = 0;
+
+		pci_read_config_byte(dev, 0x5a, &scr1);
+		if (((scr1 & 0x10) >> 4) != mask) {
 			if (mask)
-				disable_irq(hwif->irq);
+				scr1 |=  0x10;
 			else
-				enable_irq (hwif->irq);
+				scr1 &= ~0x10;
+			pci_write_config_byte(dev, 0x5a, scr1);
 		}
-	} else
-		outb(ATA_DEVCTL_OBS | (mask ? 2 : 0), hwif->io_ports.ctl_addr);
+	} else if (mask)
+		disable_irq(hwif->irq);
+	else
+		enable_irq(hwif->irq);
 }
 
 /*
@@ -1289,7 +1287,6 @@ static u8 hpt3xx_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 	int serialize		= HPT_SERIALIZE_IO;
 	u8  chip_type		= info->chip_type;
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index 9ce1d805992..49f163aa51e 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -617,7 +617,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
 	unsigned long intmask_port;
 	unsigned long mode_port;
 	unsigned long ecmode_port;
-	unsigned long dma_status_port;
 	u32 reg = 0;
 	struct scc_ports *ports;
 	int rc;
@@ -637,7 +636,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
 	intmask_port = dma_base + 0x010;
 	mode_port = ctl_base + 0x024;
 	ecmode_port = ctl_base + 0xf00;
-	dma_status_port = dma_base + 0x004;
 
 	/* controller initialization */
 	reg = 0;
@@ -843,8 +841,6 @@ static u8 scc_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_scc(ide_hwif_t *hwif)
 {
-	struct scc_ports *ports = ide_get_hwifdata(hwif);
-
 	/* PTERADD */
 	out_be32((void __iomem *)(hwif->dma_base + 0x018), hwif->dmatable_dma);
 
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index dd634541ce3..8af9b23499f 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -101,18 +101,8 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
 	for (i = 0; i <= 7; i++)
 		hw->io_ports_array[i] = reg + i * 4;
 
-	if (ctrl_port)
-		hw->io_ports.ctl_addr = ctrl_port;
-
-	if (irq_port)
-		hw->io_ports.irq_addr = irq_port;
-}
-
-static void
-sgiioc4_maskproc(ide_drive_t * drive, int mask)
-{
-	writeb(ATA_DEVCTL_OBS | (mask ? 2 : 0),
-	       (void __iomem *)drive->hwif->io_ports.ctl_addr);
+	hw->io_ports.ctl_addr = ctrl_port;
+	hw->io_ports.irq_addr = irq_port;
 }
 
 static int
@@ -310,16 +300,14 @@ static u8 sgiioc4_read_status(ide_hwif_t *hwif)
 	unsigned long port = hwif->io_ports.status_addr;
 	u8 reg = (u8) readb((void __iomem *) port);
 
-	if ((port & 0xFFF) == 0x11C) {	/* Status register of IOC4 */
-		if (!(reg & ATA_BUSY)) { /* Not busy... check for interrupt */
-			unsigned long other_ir = port - 0x110;
-			unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
+	if (!(reg & ATA_BUSY)) {	/* Not busy... check for interrupt */
+		unsigned long other_ir = port - 0x110;
+		unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
 
-			/* Clear the Interrupt, Error bits on the IOC4 */
-			if (intr_reg & 0x03) {
-				writel(0x03, (void __iomem *) other_ir);
-				intr_reg = (u32) readl((void __iomem *) other_ir);
-			}
+		/* Clear the Interrupt, Error bits on the IOC4 */
+		if (intr_reg & 0x03) {
+			writel(0x03, (void __iomem *) other_ir);
+			intr_reg = (u32) readl((void __iomem *) other_ir);
 		}
 	}
 
@@ -332,13 +320,9 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long dma_base = pci_resource_start(dev, 0) + IOC4_DMA_OFFSET;
-	void __iomem *virt_dma_base;
 	int num_ports = sizeof (ioc4_dma_regs_t);
 	void *pad;
 
-	if (dma_base == 0)
-		return -1;
-
 	printk(KERN_INFO "    %s: MMIO-DMA\n", hwif->name);
 
 	if (request_mem_region(dma_base, num_ports, hwif->name) == NULL) {
@@ -348,14 +332,8 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 		return -1;
 	}
 
-	virt_dma_base = ioremap(dma_base, num_ports);
-	if (virt_dma_base == NULL) {
-		printk(KERN_ERR "%s(%s) -- ERROR: unable to map addresses "
-		       "0x%lx to 0x%lx\n", __func__, hwif->name,
-		       dma_base, dma_base + num_ports - 1);
-		goto dma_remap_failure;
-	}
-	hwif->dma_base = (unsigned long) virt_dma_base;
+	hwif->dma_base = (unsigned long)hwif->io_ports.irq_addr +
+			 IOC4_DMA_OFFSET;
 
 	hwif->sg_max_nents = IOC4_PRD_ENTRIES;
 
@@ -379,9 +357,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 	printk(KERN_INFO "%s: changing from DMA to PIO mode", hwif->name);
 
 dma_pci_alloc_failure:
-	iounmap(virt_dma_base);
-
-dma_remap_failure:
 	release_mem_region(dma_base, num_ports);
 
 	return -1;
@@ -563,8 +538,6 @@ static const struct ide_port_ops sgiioc4_port_ops = {
 	.set_dma_mode		= sgiioc4_set_dma_mode,
 	/* reset DMA engine, clear IRQs */
 	.resetproc		= sgiioc4_resetproc,
-	/* mask on/off NIEN register */
-	.maskproc		= sgiioc4_maskproc,
 };
 
 static const struct ide_dma_ops sgiioc4_dma_ops = {
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index a78d35aecee..f1e82a92e61 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -122,7 +122,7 @@ struct cm_counter_attribute {
 
 #define CM_COUNTER_ATTR(_name, _index) \
 struct cm_counter_attribute cm_##_name##_counter_attr = { \
-	.attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
+	.attr = { .name = __stringify(_name), .mode = 0444 }, \
 	.index = _index \
 }
 
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 49c45feccd5..5c54fc2350b 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -406,19 +406,15 @@ static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
 
 	if (i == qp_info->snoop_table_size) {
 		/* Grow table. */
-		new_snoop_table = kmalloc(sizeof mad_snoop_priv *
-					  qp_info->snoop_table_size + 1,
-					  GFP_ATOMIC);
+		new_snoop_table = krealloc(qp_info->snoop_table,
+					   sizeof mad_snoop_priv *
+					   (qp_info->snoop_table_size + 1),
+					   GFP_ATOMIC);
 		if (!new_snoop_table) {
 			i = -ENOMEM;
 			goto out;
 		}
-		if (qp_info->snoop_table) {
-			memcpy(new_snoop_table, qp_info->snoop_table,
-			       sizeof mad_snoop_priv *
-			       qp_info->snoop_table_size);
-			kfree(qp_info->snoop_table);
-		}
+
 		qp_info->snoop_table = new_snoop_table;
 		qp_info->snoop_table_size++;
 	}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 3ddacf39b7b..4346a24568f 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -904,8 +904,8 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,
 
 	mutex_lock(&file->mut);
 	mc = ucma_alloc_multicast(ctx);
-	if (IS_ERR(mc)) {
-		ret = PTR_ERR(mc);
+	if (!mc) {
+		ret = -ENOMEM;
 		goto err1;
 	}
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index c325c44807e..44e936e48a3 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1942,6 +1942,7 @@ fail4:
 fail3:
 	cxgb3_free_atid(ep->com.tdev, ep->atid);
 fail2:
+	cm_id->rem_ref(cm_id);
 	put_ep(&ep->com);
 out:
 	return err;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 5d7b7855afb..4df887af66a 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -128,6 +128,8 @@ struct ehca_shca {
 	/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
 	u32 hca_cap_mr_pgsize;
 	int max_mtu;
+	int max_num_qps;
+	int max_num_cqs;
 	atomic_t num_cqs;
 	atomic_t num_qps;
 };
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 33647a95eb9..2f4c28a3027 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -132,9 +132,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
 	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
 		return ERR_PTR(-EINVAL);
 
-	if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) {
+	if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
 		ehca_err(device, "Unable to create CQ, max number of %i "
-			"CQs reached.", ehca_max_cq);
+			"CQs reached.", shca->max_num_cqs);
 		ehca_err(device, "To increase the maximum number of CQs "
 			"use the number_of_cqs module parameter.\n");
 		return ERR_PTR(-ENOSPC);
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 598844d2edc..bb02a86aa52 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -44,6 +44,8 @@
 #include <linux/slab.h>
 #endif
 
+#include <linux/notifier.h>
+#include <linux/memory.h>
 #include "ehca_classes.h"
 #include "ehca_iverbs.h"
 #include "ehca_mrmw.h"
@@ -366,22 +368,23 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
 			shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
 
 	/* Set maximum number of CQs and QPs to calculate EQ size */
-	if (ehca_max_qp == -1)
-		ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES);
-	else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) {
-		ehca_gen_err("Requested number of QPs is out of range (1 - %i) "
-			"specified by HW", rblock->max_qp);
-		ret = -EINVAL;
-		goto sense_attributes1;
+	if (shca->max_num_qps == -1)
+		shca->max_num_qps = min_t(int, rblock->max_qp,
+					  EHCA_MAX_NUM_QUEUES);
+	else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
+		ehca_gen_warn("The requested number of QPs is out of range "
+			      "(1 - %i) specified by HW. Value is set to %i",
+			      rblock->max_qp, rblock->max_qp);
+		shca->max_num_qps = rblock->max_qp;
 	}
 
-	if (ehca_max_cq == -1)
-		ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES);
-	else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) {
-		ehca_gen_err("Requested number of CQs is out of range (1 - %i) "
-			"specified by HW", rblock->max_cq);
-		ret = -EINVAL;
-		goto sense_attributes1;
+	if (shca->max_num_cqs == -1)
+		shca->max_num_cqs = min_t(int, rblock->max_cq,
+					  EHCA_MAX_NUM_QUEUES);
+	else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
+		ehca_gen_warn("The requested number of CQs is out of range "
+			      "(1 - %i) specified by HW. Value is set to %i",
+			      rblock->max_cq, rblock->max_cq);
 	}
 
 	/* query max MTU from first port -- it's the same for all ports */
@@ -733,9 +736,13 @@ static int __devinit ehca_probe(struct of_device *dev,
 		ehca_gen_err("Cannot allocate shca memory.");
 		return -ENOMEM;
 	}
+
 	mutex_init(&shca->modify_mutex);
 	atomic_set(&shca->num_cqs, 0);
 	atomic_set(&shca->num_qps, 0);
+	shca->max_num_qps = ehca_max_qp;
+	shca->max_num_cqs = ehca_max_cq;
+
 	for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
 		spin_lock_init(&shca->sport[i].mod_sqp_lock);
 
@@ -755,7 +762,7 @@ static int __devinit ehca_probe(struct of_device *dev,
 		goto probe1;
 	}
 
-	eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp;
+	eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
 	/* create event queues */
 	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
 	if (ret) {
@@ -964,6 +971,41 @@ void ehca_poll_eqs(unsigned long data)
 	spin_unlock(&shca_list_lock);
 }
 
+static int ehca_mem_notifier(struct notifier_block *nb,
+			     unsigned long action, void *data)
+{
+	static unsigned long ehca_dmem_warn_time;
+
+	switch (action) {
+	case MEM_CANCEL_OFFLINE:
+	case MEM_CANCEL_ONLINE:
+	case MEM_ONLINE:
+	case MEM_OFFLINE:
+		return NOTIFY_OK;
+	case MEM_GOING_ONLINE:
+	case MEM_GOING_OFFLINE:
+		/* only ok if no hca is attached to the lpar */
+		spin_lock(&shca_list_lock);
+		if (list_empty(&shca_list)) {
+			spin_unlock(&shca_list_lock);
+			return NOTIFY_OK;
+		} else {
+			spin_unlock(&shca_list_lock);
+			if (printk_timed_ratelimit(&ehca_dmem_warn_time,
+						   30 * 1000))
+				ehca_gen_err("DMEM operations are not allowed"
+					     "as long as an ehca adapter is"
+					     "attached to the LPAR");
+			return NOTIFY_BAD;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ehca_mem_nb = {
+	.notifier_call = ehca_mem_notifier,
+};
+
 static int __init ehca_module_init(void)
 {
 	int ret;
@@ -991,6 +1033,12 @@ static int __init ehca_module_init(void)
 		goto module_init2;
 	}
 
+	ret = register_memory_notifier(&ehca_mem_nb);
+	if (ret) {
+		ehca_gen_err("Failed registering memory add/remove notifier");
+		goto module_init3;
+	}
+
 	if (ehca_poll_all_eqs != 1) {
 		ehca_gen_err("WARNING!!!");
 		ehca_gen_err("It is possible to lose interrupts.");
@@ -1003,6 +1051,9 @@ static int __init ehca_module_init(void)
 
 	return 0;
 
+module_init3:
+	ibmebus_unregister_driver(&ehca_driver);
+
 module_init2:
 	ehca_destroy_slab_caches();
 
@@ -1018,6 +1069,8 @@ static void __exit ehca_module_exit(void)
 
 	ibmebus_unregister_driver(&ehca_driver);
 
+	unregister_memory_notifier(&ehca_mem_nb);
+
 	ehca_destroy_slab_caches();
 
 	ehca_destroy_comp_pool();
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 4dbe2870e01..4d54b9f6456 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -465,9 +465,9 @@ static struct ehca_qp *internal_create_qp(
 	u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
 	unsigned long flags;
 
-	if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) {
+	if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
 		ehca_err(pd->device, "Unable to create QP, max number of %i "
-			 "QPs reached.", ehca_max_qp);
+			 "QPs reached.", shca->max_num_qps);
 		ehca_err(pd->device, "To increase the maximum number of QPs "
 			 "use the number_of_qps module parameter.\n");
 		return ERR_PTR(-ENOSPC);
@@ -502,6 +502,12 @@ static struct ehca_qp *internal_create_qp(
 	if (init_attr->srq) {
 		my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
 
+		if (qp_type == IB_QPT_UC) {
+			ehca_err(pd->device, "UC with SRQ not supported");
+			atomic_dec(&shca->num_qps);
+			return ERR_PTR(-EINVAL);
+		}
+
 		has_srq = 1;
 		parms.ext_type = EQPT_SRQBASE;
 		parms.srq_qpn = my_srq->real_qp_num;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index cdca3a511e1..606f1e2ef28 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -298,7 +298,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
 	int p, q;
 	int ret;
 
-	for (p = 0; p < dev->dev->caps.num_ports; ++p)
+	for (p = 0; p < dev->num_ports; ++p)
 		for (q = 0; q <= 1; ++q) {
 			agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
 						      q ? IB_QPT_GSI : IB_QPT_SMI,
@@ -314,7 +314,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
 	return 0;
 
 err:
-	for (p = 0; p < dev->dev->caps.num_ports; ++p)
+	for (p = 0; p < dev->num_ports; ++p)
 		for (q = 0; q <= 1; ++q)
 			if (dev->send_agent[p][q])
 				ib_unregister_mad_agent(dev->send_agent[p][q]);
@@ -327,7 +327,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
 	struct ib_mad_agent *agent;
 	int p, q;
 
-	for (p = 0; p < dev->dev->caps.num_ports; ++p) {
+	for (p = 0; p < dev->num_ports; ++p) {
 		for (q = 0; q <= 1; ++q) {
 			agent = dev->send_agent[p][q];
 			dev->send_agent[p][q] = NULL;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index a3c2851c054..2e80f8f47b0 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -574,7 +574,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.owner		= THIS_MODULE;
 	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
 	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
-	ibdev->ib_dev.phys_port_cnt	= dev->caps.num_ports;
+	ibdev->num_ports = 0;
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+		ibdev->num_ports++;
+	ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
 	ibdev->ib_dev.num_comp_vectors	= 1;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
 
@@ -691,7 +694,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 	struct mlx4_ib_dev *ibdev = ibdev_ptr;
 	int p;
 
-	for (p = 1; p <= dev->caps.num_ports; ++p)
+	for (p = 1; p <= ibdev->num_ports; ++p)
 		mlx4_CLOSE_PORT(dev, p);
 
 	mlx4_ib_mad_cleanup(ibdev);
@@ -706,6 +709,10 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
 			  enum mlx4_dev_event event, int port)
 {
 	struct ib_event ibev;
+	struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
+
+	if (port > ibdev->num_ports)
+		return;
 
 	switch (event) {
 	case MLX4_DEV_EVENT_PORT_UP:
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6e2b0dc21b6..9974e886b8d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -162,6 +162,7 @@ struct mlx4_ib_ah {
 struct mlx4_ib_dev {
 	struct ib_device	ib_dev;
 	struct mlx4_dev	       *dev;
+	int			num_ports;
 	void __iomem	       *uar_map;
 
 	struct mlx4_uar		priv_uar;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index baa01deb243..39167a797f9 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -451,6 +451,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
 			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
 {
+	int qpn;
 	int err;
 
 	mutex_init(&qp->mutex);
@@ -545,9 +546,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		}
 	}
 
-	err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
+	if (sqpn) {
+		qpn = sqpn;
+	} else {
+		err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
+		if (err)
+			goto err_wrid;
+	}
+
+	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
 	if (err)
-		goto err_wrid;
+		goto err_qpn;
 
 	/*
 	 * Hardware wants QPN written in big-endian order (after
@@ -560,6 +569,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 
 	return 0;
 
+err_qpn:
+	if (!sqpn)
+		mlx4_qp_release_range(dev->dev, qpn, 1);
+
 err_wrid:
 	if (pd->uobject) {
 		if (!init_attr->srq)
@@ -655,6 +668,10 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 	mlx4_ib_unlock_cqs(send_cq, recv_cq);
 
 	mlx4_qp_free(dev->dev, &qp->mqp);
+
+	if (!is_sqp(dev, qp))
+		mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+
 	mlx4_mtt_cleanup(dev->dev, &qp->mtt);
 
 	if (is_user) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 68ba5c3482e..e0c7dfabf2b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -507,6 +507,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev);
 void ipoib_drain_cq(struct net_device *dev);
 
 void ipoib_set_ethtool_ops(struct net_device *dev);
+int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
 
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 66af5c1a76e..e9795f60e5d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -42,6 +42,13 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
 	strncpy(drvinfo->driver, "ipoib", sizeof(drvinfo->driver) - 1);
 }
 
+static u32 ipoib_get_rx_csum(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	return test_bit(IPOIB_FLAG_CSUM, &priv->flags) &&
+		!test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+}
+
 static int ipoib_get_coalesce(struct net_device *dev,
 			      struct ethtool_coalesce *coal)
 {
@@ -129,7 +136,7 @@ static void ipoib_get_ethtool_stats(struct net_device *dev,
 
 static const struct ethtool_ops ipoib_ethtool_ops = {
 	.get_drvinfo		= ipoib_get_drvinfo,
-	.get_tso		= ethtool_op_get_tso,
+	.get_rx_csum		= ipoib_get_rx_csum,
 	.get_coalesce		= ipoib_get_coalesce,
 	.set_coalesce		= ipoib_set_coalesce,
 	.get_flags		= ethtool_op_get_flags,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 0e748aeeae9..28eb6f03c58 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -685,10 +685,6 @@ int ipoib_ib_dev_open(struct net_device *dev)
 	queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
 			   round_jiffies_relative(HZ));
 
-	init_timer(&priv->poll_timer);
-	priv->poll_timer.function = ipoib_ib_tx_timer_func;
-	priv->poll_timer.data = (unsigned long)dev;
-
 	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
 
 	return 0;
@@ -906,6 +902,9 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 		return -ENODEV;
 	}
 
+	setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
+		    (unsigned long) dev);
+
 	if (dev->flags & IFF_UP) {
 		if (ipoib_ib_dev_open(dev)) {
 			ipoib_transport_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c0ee514396d..fddded7900d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1173,11 +1173,48 @@ int ipoib_add_pkey_attr(struct net_device *dev)
 	return device_create_file(&dev->dev, &dev_attr_pkey);
 }
 
+int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
+{
+	struct ib_device_attr *device_attr;
+	int result = -ENOMEM;
+
+	device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
+	if (!device_attr) {
+		printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
+		       hca->name, sizeof *device_attr);
+		return result;
+	}
+
+	result = ib_query_device(hca, device_attr);
+	if (result) {
+		printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
+		       hca->name, result);
+		kfree(device_attr);
+		return result;
+	}
+	priv->hca_caps = device_attr->device_cap_flags;
+
+	kfree(device_attr);
+
+	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
+		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
+		priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+	}
+
+	if (lro)
+		priv->dev->features |= NETIF_F_LRO;
+
+	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
+		priv->dev->features |= NETIF_F_TSO;
+
+	return 0;
+}
+
+
 static struct net_device *ipoib_add_port(const char *format,
 					 struct ib_device *hca, u8 port)
 {
 	struct ipoib_dev_priv *priv;
-	struct ib_device_attr *device_attr;
 	struct ib_port_attr attr;
 	int result = -ENOMEM;
 
@@ -1206,31 +1243,8 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto device_init_failed;
 	}
 
-	device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
-	if (!device_attr) {
-		printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
-		       hca->name, sizeof *device_attr);
+	if (ipoib_set_dev_features(priv, hca))
 		goto device_init_failed;
-	}
-
-	result = ib_query_device(hca, device_attr);
-	if (result) {
-		printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
-		       hca->name, result);
-		kfree(device_attr);
-		goto device_init_failed;
-	}
-	priv->hca_caps = device_attr->device_cap_flags;
-
-	kfree(device_attr);
-
-	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
-		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
-		priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
-	}
-
-	if (lro)
-		priv->dev->features |= NETIF_F_LRO;
 
 	/*
 	 * Set the full membership bit, so that we join the right
@@ -1266,9 +1280,6 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto event_failed;
 	}
 
-	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
-		priv->dev->features |= NETIF_F_TSO;
-
 	result = register_netdev(priv->dev);
 	if (result) {
 		printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index b08eb56196d..2cf1a408871 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -93,6 +93,10 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
 	set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
 
+	result = ipoib_set_dev_features(priv, ppriv->ca);
+	if (result)
+		goto device_init_failed;
+
 	priv->pkey = pkey;
 
 	memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 6e1e8c624f9..8317fdef169 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -219,7 +219,7 @@ config TOUCHSCREEN_ATMEL_TSADCC
 
 config TOUCHSCREEN_UCB1400
 	tristate "Philips UCB1400 touchscreen"
-	select AC97_BUS
+	depends on AC97_BUS
 	depends on UCB1400_CORE
 	help
 	  This enables support for the Philips UCB1400 touchscreen interface.
diff --git a/drivers/input/touchscreen/hp680_ts_input.c b/drivers/input/touchscreen/hp680_ts_input.c
index c38d4e0f95c..a89700e7ace 100644
--- a/drivers/input/touchscreen/hp680_ts_input.c
+++ b/drivers/input/touchscreen/hp680_ts_input.c
@@ -5,7 +5,7 @@
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/adc.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 
 #define MODNAME "hp680_ts_input"
 
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index e3e40427e00..c7ff1e11ea8 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -179,7 +179,7 @@ config LEDS_TRIGGER_TIMER
 
 config LEDS_TRIGGER_IDE_DISK
 	bool "LED IDE Disk Trigger"
-	depends on LEDS_TRIGGERS && BLK_DEV_IDEDISK
+	depends on LEDS_TRIGGERS && IDE_GD_ATA
 	help
 	  This allows LEDs to be controlled by IDE disk activity.
 	  If unsure, say Y.
diff --git a/drivers/leds/leds-hp6xx.c b/drivers/leds/leds-hp6xx.c
index 844d5979c90..e8fb1baf8a5 100644
--- a/drivers/leds/leds-hp6xx.c
+++ b/drivers/leds/leds-hp6xx.c
@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/leds.h>
 #include <asm/hd64461.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 
 static void hp6xxled_green_set(struct led_classdev *led_cdev,
 			       enum led_brightness value)
diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c
index c7c770c2898..aa1ff524256 100644
--- a/drivers/media/dvb/ttpci/av7110.c
+++ b/drivers/media/dvb/ttpci/av7110.c
@@ -36,7 +36,6 @@
 #include <linux/fs.h>
 #include <linux/timer.h>
 #include <linux/poll.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/smp_lock.h>
 
 #include <linux/kernel.h>
@@ -52,6 +51,7 @@
 #include <linux/i2c.h>
 #include <linux/kthread.h>
 #include <asm/unaligned.h>
+#include <asm/byteorder.h>
 
 #include <asm/system.h>
 
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index fa8be0731a3..a4b1708fafe 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <asm/byteorder.h>
 
 #include <linux/dvb/video.h>
 #include <linux/dvb/audio.h>
diff --git a/drivers/media/video/ivtv/ivtv-driver.h b/drivers/media/video/ivtv/ivtv-driver.h
index bc29436e8a3..3733b2afec5 100644
--- a/drivers/media/video/ivtv/ivtv-driver.h
+++ b/drivers/media/video/ivtv/ivtv-driver.h
@@ -55,6 +55,7 @@
 #include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
+#include <asm/byteorder.h>
 
 #include <linux/dvb/video.h>
 #include <linux/dvb/audio.h>
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 6e291bf8237..5263913e0c6 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1044,7 +1044,6 @@ static int mspro_block_read_attributes(struct memstick_dev *card)
 
 		s_attr->dev_attr.attr.name = s_attr->name;
 		s_attr->dev_attr.attr.mode = S_IRUGO;
-		s_attr->dev_attr.attr.owner = THIS_MODULE;
 		s_attr->dev_attr.show = mspro_block_attr_show(s_attr->id);
 
 		if (!rc)
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 5eff8ad834d..5a79d2d4cda 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -52,6 +52,8 @@ config HTC_PASIC3
 
 config UCB1400_CORE
 	tristate "Philips UCB1400 Core driver"
+	depends on AC97_BUS
+	depends on GPIOLIB
 	help
 	  This enables support for the Philips UCB1400 core functions.
 	  The UCB1400 is an AC97 audio codec.
@@ -59,6 +61,20 @@ config UCB1400_CORE
 	  To compile this driver as a module, choose M here: the
 	  module will be called ucb1400_core.
 
+config TWL4030_CORE
+	bool "Texas Instruments TWL4030/TPS659x0 Support"
+	depends on I2C=y && GENERIC_HARDIRQS && (ARCH_OMAP2 || ARCH_OMAP3)
+	help
+	  Say yes here if you have TWL4030 family chip on your board.
+	  This core driver provides register access and IRQ handling
+	  facilities, and registers devices for the various functions
+	  so that function-specific drivers can bind to them.
+
+	  These multi-function chips are found on many OMAP2 and OMAP3
+	  boards, providing power management, RTC, GPIO, keypad, a
+	  high speed USB OTG transceiver, an audio codec (on most
+	  versions) and many other features.
+
 config MFD_TMIO
 	bool
 	default n
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 759b1fe1c89..0acefe8aff8 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -17,6 +17,8 @@ wm8350-objs			:= wm8350-core.o wm8350-regmap.o wm8350-gpio.o
 obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
 obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
 
+obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
+
 obj-$(CONFIG_MFD_CORE)		+= mfd-core.o
 
 obj-$(CONFIG_MCP)		+= mcp-core.o
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index ba5aa200827..e4c0db4dc7b 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -123,7 +123,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
 					irqnr = asic->irq_base +
 						(ASIC3_GPIOS_PER_BANK * bank)
 						+ i;
-					desc = irq_desc + irqnr;
+					desc = irq_to_desc(irqnr);
 					desc->handle_irq(irqnr, desc);
 					if (asic->irq_bothedge[bank] & bit)
 						asic3_irq_flip_edge(asic, base,
@@ -136,7 +136,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
 		for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) {
 			/* They start at bit 4 and go up */
 			if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) {
-				desc = irq_desc + asic->irq_base + i;
+				desc = irq_to_desc(asic->irq_base + i);
 				desc->handle_irq(asic->irq_base + i,
 						 desc);
 			}
diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
new file mode 100644
index 00000000000..b57326ae464
--- /dev/null
+++ b/drivers/mfd/da903x.c
@@ -0,0 +1,563 @@
+/*
+ * Base driver for Dialog Semiconductor DA9030/DA9034
+ *
+ * Copyright (C) 2008 Compulab, Ltd.
+ * 	Mike Rapoport <mike@compulab.co.il>
+ *
+ * Copyright (C) 2006-2008 Marvell International Ltd.
+ * 	Eric Miao <eric.miao@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/mfd/da903x.h>
+
+#define DA9030_CHIP_ID		0x00
+#define DA9030_EVENT_A		0x01
+#define DA9030_EVENT_B		0x02
+#define DA9030_EVENT_C		0x03
+#define DA9030_STATUS		0x04
+#define DA9030_IRQ_MASK_A	0x05
+#define DA9030_IRQ_MASK_B	0x06
+#define DA9030_IRQ_MASK_C	0x07
+#define DA9030_SYS_CTRL_A	0x08
+#define DA9030_SYS_CTRL_B	0x09
+#define DA9030_FAULT_LOG	0x0a
+
+#define DA9034_CHIP_ID		0x00
+#define DA9034_EVENT_A		0x01
+#define DA9034_EVENT_B		0x02
+#define DA9034_EVENT_C		0x03
+#define DA9034_EVENT_D		0x04
+#define DA9034_STATUS_A		0x05
+#define DA9034_STATUS_B		0x06
+#define DA9034_IRQ_MASK_A	0x07
+#define DA9034_IRQ_MASK_B	0x08
+#define DA9034_IRQ_MASK_C	0x09
+#define DA9034_IRQ_MASK_D	0x0a
+#define DA9034_SYS_CTRL_A	0x0b
+#define DA9034_SYS_CTRL_B	0x0c
+#define DA9034_FAULT_LOG	0x0d
+
+struct da903x_chip;
+
+struct da903x_chip_ops {
+	int	(*init_chip)(struct da903x_chip *);
+	int	(*unmask_events)(struct da903x_chip *, unsigned int events);
+	int	(*mask_events)(struct da903x_chip *, unsigned int events);
+	int	(*read_events)(struct da903x_chip *, unsigned int *events);
+	int	(*read_status)(struct da903x_chip *, unsigned int *status);
+};
+
+struct da903x_chip {
+	struct i2c_client	*client;
+	struct device		*dev;
+	struct da903x_chip_ops	*ops;
+
+	int			type;
+	uint32_t		events_mask;
+
+	struct mutex		lock;
+	struct work_struct	irq_work;
+
+	struct blocking_notifier_head notifier_list;
+};
+
+static inline int __da903x_read(struct i2c_client *client,
+				int reg, uint8_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading at 0x%02x\n", reg);
+		return ret;
+	}
+
+	*val = (uint8_t)ret;
+	return 0;
+}
+
+static inline int __da903x_reads(struct i2c_client *client, int reg,
+				 int len, uint8_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_i2c_block_data(client, reg, len, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading from 0x%02x\n", reg);
+		return ret;
+	}
+	return 0;
+}
+
+static inline int __da903x_write(struct i2c_client *client,
+				 int reg, uint8_t val)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, reg, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed writing 0x%02x to 0x%02x\n",
+				val, reg);
+		return ret;
+	}
+	return 0;
+}
+
+static inline int __da903x_writes(struct i2c_client *client, int reg,
+				  int len, uint8_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_write_i2c_block_data(client, reg, len, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed writings to 0x%02x\n", reg);
+		return ret;
+	}
+	return 0;
+}
+
+int da903x_register_notifier(struct device *dev, struct notifier_block *nb,
+				unsigned int events)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+
+	chip->ops->unmask_events(chip, events);
+	return blocking_notifier_chain_register(&chip->notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(da903x_register_notifier);
+
+int da903x_unregister_notifier(struct device *dev, struct notifier_block *nb,
+				unsigned int events)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+
+	chip->ops->mask_events(chip, events);
+	return blocking_notifier_chain_unregister(&chip->notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(da903x_unregister_notifier);
+
+int da903x_write(struct device *dev, int reg, uint8_t val)
+{
+	return __da903x_write(to_i2c_client(dev), reg, val);
+}
+EXPORT_SYMBOL_GPL(da903x_write);
+
+int da903x_read(struct device *dev, int reg, uint8_t *val)
+{
+	return __da903x_read(to_i2c_client(dev), reg, val);
+}
+EXPORT_SYMBOL_GPL(da903x_read);
+
+int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+	uint8_t reg_val;
+	int ret = 0;
+
+	mutex_lock(&chip->lock);
+
+	ret = __da903x_read(chip->client, reg, &reg_val);
+	if (ret)
+		goto out;
+
+	if ((reg_val & bit_mask) == 0) {
+		reg_val |= bit_mask;
+		ret = __da903x_write(chip->client, reg, reg_val);
+	}
+out:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(da903x_set_bits);
+
+int da903x_clr_bits(struct device *dev, int reg, uint8_t bit_mask)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+	uint8_t reg_val;
+	int ret = 0;
+
+	mutex_lock(&chip->lock);
+
+	ret = __da903x_read(chip->client, reg, &reg_val);
+	if (ret)
+		goto out;
+
+	if (reg_val & bit_mask) {
+		reg_val &= ~bit_mask;
+		ret = __da903x_write(chip->client, reg, reg_val);
+	}
+out:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(da903x_clr_bits);
+
+int da903x_update(struct device *dev, int reg, uint8_t val, uint8_t mask)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+	uint8_t reg_val;
+	int ret = 0;
+
+	mutex_lock(&chip->lock);
+
+	ret = __da903x_read(chip->client, reg, &reg_val);
+	if (ret)
+		goto out;
+
+	if ((reg_val & mask) != val) {
+		reg_val = (reg_val & ~mask) | val;
+		ret = __da903x_write(chip->client, reg, reg_val);
+	}
+out:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(da903x_update);
+
+int da903x_query_status(struct device *dev, unsigned int sbits)
+{
+	struct da903x_chip *chip = dev_get_drvdata(dev);
+	unsigned int status = 0;
+
+	chip->ops->read_status(chip, &status);
+	return ((status & sbits) == sbits);
+}
+EXPORT_SYMBOL(da903x_query_status);
+
+static int __devinit da9030_init_chip(struct da903x_chip *chip)
+{
+	uint8_t chip_id;
+	int err;
+
+	err = __da903x_read(chip->client, DA9030_CHIP_ID, &chip_id);
+	if (err)
+		return err;
+
+	err = __da903x_write(chip->client, DA9030_SYS_CTRL_A, 0xE8);
+	if (err)
+		return err;
+
+	dev_info(chip->dev, "DA9030 (CHIP ID: 0x%02x) detected\n", chip_id);
+	return 0;
+}
+
+static int da9030_unmask_events(struct da903x_chip *chip, unsigned int events)
+{
+	uint8_t v[3];
+
+	chip->events_mask &= ~events;
+
+	v[0] = (chip->events_mask & 0xff);
+	v[1] = (chip->events_mask >> 8) & 0xff;
+	v[2] = (chip->events_mask >> 16) & 0xff;
+
+	return __da903x_writes(chip->client, DA9030_IRQ_MASK_A, 3, v);
+}
+
+static int da9030_mask_events(struct da903x_chip *chip, unsigned int events)
+{
+	uint8_t v[3];
+
+	chip->events_mask &= ~events;
+
+	v[0] = (chip->events_mask & 0xff);
+	v[1] = (chip->events_mask >> 8) & 0xff;
+	v[2] = (chip->events_mask >> 16) & 0xff;
+
+	return __da903x_writes(chip->client, DA9030_IRQ_MASK_A, 3, v);
+}
+
+static int da9030_read_events(struct da903x_chip *chip, unsigned int *events)
+{
+	uint8_t v[3] = {0, 0, 0};
+	int ret;
+
+	ret = __da903x_reads(chip->client, DA9030_EVENT_A, 3, v);
+	if (ret < 0)
+		return ret;
+
+	*events = (v[2] << 16) | (v[1] << 8) | v[0];
+	return 0;
+}
+
+static int da9030_read_status(struct da903x_chip *chip, unsigned int *status)
+{
+	return __da903x_read(chip->client, DA9030_STATUS, (uint8_t *)status);
+}
+
+static int da9034_init_chip(struct da903x_chip *chip)
+{
+	uint8_t chip_id;
+	int err;
+
+	err = __da903x_read(chip->client, DA9034_CHIP_ID, &chip_id);
+	if (err)
+		return err;
+
+	err = __da903x_write(chip->client, DA9034_SYS_CTRL_A, 0xE8);
+	if (err)
+		return err;
+
+	/* avoid SRAM power off during sleep*/
+	__da903x_write(chip->client, 0x10, 0x07);
+	__da903x_write(chip->client, 0x11, 0xff);
+	__da903x_write(chip->client, 0x12, 0xff);
+
+	/* Enable the ONKEY power down functionality */
+	__da903x_write(chip->client, DA9034_SYS_CTRL_B, 0x20);
+	__da903x_write(chip->client, DA9034_SYS_CTRL_A, 0x60);
+
+	/* workaround to make LEDs work */
+	__da903x_write(chip->client, 0x90, 0x01);
+	__da903x_write(chip->client, 0xB0, 0x08);
+
+	/* make ADTV1 and SDTV1 effective */
+	__da903x_write(chip->client, 0x20, 0x00);
+
+	dev_info(chip->dev, "DA9034 (CHIP ID: 0x%02x) detected\n", chip_id);
+	return 0;
+}
+
+static int da9034_unmask_events(struct da903x_chip *chip, unsigned int events)
+{
+	uint8_t v[4];
+
+	chip->events_mask &= ~events;
+
+	v[0] = (chip->events_mask & 0xff);
+	v[1] = (chip->events_mask >> 8) & 0xff;
+	v[2] = (chip->events_mask >> 16) & 0xff;
+	v[3] = (chip->events_mask >> 24) & 0xff;
+
+	return __da903x_writes(chip->client, DA9034_IRQ_MASK_A, 4, v);
+}
+
+static int da9034_mask_events(struct da903x_chip *chip, unsigned int events)
+{
+	uint8_t v[4];
+
+	chip->events_mask |= events;
+
+	v[0] = (chip->events_mask & 0xff);
+	v[1] = (chip->events_mask >> 8) & 0xff;
+	v[2] = (chip->events_mask >> 16) & 0xff;
+	v[3] = (chip->events_mask >> 24) & 0xff;
+
+	return __da903x_writes(chip->client, DA9034_IRQ_MASK_A, 4, v);
+}
+
+static int da9034_read_events(struct da903x_chip *chip, unsigned int *events)
+{
+	uint8_t v[4] = {0, 0, 0, 0};
+	int ret;
+
+	ret = __da903x_reads(chip->client, DA9034_EVENT_A, 4, v);
+	if (ret < 0)
+		return ret;
+
+	*events = (v[3] << 24) | (v[2] << 16) | (v[1] << 8) | v[0];
+	return 0;
+}
+
+static int da9034_read_status(struct da903x_chip *chip, unsigned int *status)
+{
+	uint8_t v[2] = {0, 0};
+	int ret = 0;
+
+	ret = __da903x_reads(chip->client, DA9034_STATUS_A, 2, v);
+	if (ret)
+		return ret;
+
+	*status = (v[1] << 8) | v[0];
+	return 0;
+}
+
+static void da903x_irq_work(struct work_struct *work)
+{
+	struct da903x_chip *chip =
+		container_of(work, struct da903x_chip, irq_work);
+	unsigned int events = 0;
+
+	while (1) {
+		if (chip->ops->read_events(chip, &events))
+			break;
+
+		events &= ~chip->events_mask;
+		if (events == 0)
+			break;
+
+		blocking_notifier_call_chain(
+				&chip->notifier_list, events, NULL);
+	}
+	enable_irq(chip->client->irq);
+}
+
+static int da903x_irq_handler(int irq, void *data)
+{
+	struct da903x_chip *chip = data;
+
+	disable_irq_nosync(irq);
+	(void)schedule_work(&chip->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+static struct da903x_chip_ops da903x_ops[] = {
+	[0] = {
+		.init_chip	= da9030_init_chip,
+		.unmask_events	= da9030_unmask_events,
+		.mask_events	= da9030_mask_events,
+		.read_events	= da9030_read_events,
+		.read_status	= da9030_read_status,
+	},
+	[1] = {
+		.init_chip	= da9034_init_chip,
+		.unmask_events	= da9034_unmask_events,
+		.mask_events	= da9034_mask_events,
+		.read_events	= da9034_read_events,
+		.read_status	= da9034_read_status,
+	}
+};
+
+static const struct i2c_device_id da903x_id_table[] = {
+	{ "da9030", 0 },
+	{ "da9034", 1 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, da903x_id_table);
+
+static int __devexit __remove_subdev(struct device *dev, void *unused)
+{
+	platform_device_unregister(to_platform_device(dev));
+	return 0;
+}
+
+static int __devexit da903x_remove_subdevs(struct da903x_chip *chip)
+{
+	return device_for_each_child(chip->dev, NULL, __remove_subdev);
+}
+
+static int __devinit da903x_add_subdevs(struct da903x_chip *chip,
+					struct da903x_platform_data *pdata)
+{
+	struct da903x_subdev_info *subdev;
+	struct platform_device *pdev;
+	int i, ret = 0;
+
+	for (i = 0; i < pdata->num_subdevs; i++) {
+		subdev = &pdata->subdevs[i];
+
+		pdev = platform_device_alloc(subdev->name, subdev->id);
+
+		pdev->dev.parent = chip->dev;
+		pdev->dev.platform_data = subdev->platform_data;
+
+		ret = platform_device_add(pdev);
+		if (ret)
+			goto failed;
+	}
+	return 0;
+
+failed:
+	da903x_remove_subdevs(chip);
+	return ret;
+}
+
+static int __devinit da903x_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct da903x_platform_data *pdata = client->dev.platform_data;
+	struct da903x_chip *chip;
+	unsigned int tmp;
+	int ret;
+
+	chip = kzalloc(sizeof(struct da903x_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	chip->client = client;
+	chip->dev = &client->dev;
+	chip->ops = &da903x_ops[id->driver_data];
+
+	mutex_init(&chip->lock);
+	INIT_WORK(&chip->irq_work, da903x_irq_work);
+	BLOCKING_INIT_NOTIFIER_HEAD(&chip->notifier_list);
+
+	i2c_set_clientdata(client, chip);
+
+	ret = chip->ops->init_chip(chip);
+	if (ret)
+		goto out_free_chip;
+
+	/* mask and clear all IRQs */
+	chip->events_mask = 0xffffffff;
+	chip->ops->mask_events(chip, chip->events_mask);
+	chip->ops->read_events(chip, &tmp);
+
+	ret = request_irq(client->irq, da903x_irq_handler,
+			IRQF_DISABLED | IRQF_TRIGGER_FALLING,
+			"da903x", chip);
+	if (ret) {
+		dev_err(&client->dev, "failed to request irq %d\n",
+				client->irq);
+		goto out_free_chip;
+	}
+
+	ret = da903x_add_subdevs(chip, pdata);
+	if (ret)
+		goto out_free_irq;
+
+	return 0;
+
+out_free_irq:
+	free_irq(client->irq, chip);
+out_free_chip:
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return ret;
+}
+
+static int __devexit da903x_remove(struct i2c_client *client)
+{
+	struct da903x_chip *chip = i2c_get_clientdata(client);
+
+	da903x_remove_subdevs(chip);
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver da903x_driver = {
+	.driver	= {
+		.name	= "da903x",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= da903x_probe,
+	.remove		= __devexit_p(da903x_remove),
+	.id_table	= da903x_id_table,
+};
+
+static int __init da903x_init(void)
+{
+	return i2c_add_driver(&da903x_driver);
+}
+module_init(da903x_init);
+
+static void __exit da903x_exit(void)
+{
+	i2c_del_driver(&da903x_driver);
+}
+module_exit(da903x_exit);
+
+MODULE_DESCRIPTION("PMIC Driver for Dialog Semiconductor DA9034");
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>"
+	      "Mike Rapoport <mike@compulab.co.il>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 6be43172dc6..1a4d04664d6 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -112,7 +112,7 @@ static void egpio_handler(unsigned int irq, struct irq_desc *desc)
 		/* Run irq handler */
 		pr_debug("got IRQ %d\n", irqpin);
 		irq = ei->irq_start + irqpin;
-		desc = &irq_desc[irq];
+		desc = irq_to_desc(irq);
 		desc->handle_irq(irq, desc);
 	}
 }
@@ -289,7 +289,7 @@ static int __init egpio_probe(struct platform_device *pdev)
 	ei->base_addr = ioremap_nocache(res->start, res->end - res->start);
 	if (!ei->base_addr)
 		goto fail;
-	pr_debug("EGPIO phys=%08x virt=%p\n", res->start, ei->base_addr);
+	pr_debug("EGPIO phys=%08x virt=%p\n", (u32)res->start, ei->base_addr);
 
 	if ((pdata->bus_width != 16) && (pdata->bus_width != 32))
 		goto fail;
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 9c9c126ed33..6c0d1bec4b7 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -20,7 +20,7 @@ static int mfd_add_device(struct device *parent, int id,
 			  struct resource *mem_base,
 			  int irq_base)
 {
-	struct resource res[cell->num_resources];
+	struct resource *res;
 	struct platform_device *pdev;
 	int ret = -ENOMEM;
 	int r;
@@ -29,14 +29,17 @@ static int mfd_add_device(struct device *parent, int id,
 	if (!pdev)
 		goto fail_alloc;
 
+	res = kzalloc(sizeof(*res) * cell->num_resources, GFP_KERNEL);
+	if (!res)
+		goto fail_device;
+
 	pdev->dev.parent = parent;
 
 	ret = platform_device_add_data(pdev,
 			cell->platform_data, cell->data_size);
 	if (ret)
-		goto fail_device;
+		goto fail_res;
 
-	memset(res, 0, sizeof(res));
 	for (r = 0; r < cell->num_resources; r++) {
 		res[r].name = cell->resources[r].name;
 		res[r].flags = cell->resources[r].flags;
@@ -64,11 +67,15 @@ static int mfd_add_device(struct device *parent, int id,
 
 	ret = platform_device_add(pdev);
 	if (ret)
-		goto fail_device;
+		goto fail_res;
+
+	kfree(res);
 
 	return 0;
 
 /*	platform_device_del(pdev); */
+fail_res:
+	kfree(res);
 fail_device:
 	platform_device_put(pdev);
 fail_alloc:
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 7aebad4c06f..170f9d47c2f 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -623,8 +623,8 @@ unsigned long sm501_set_clock(struct device *dev,
 
 	sm501_sync_regs(sm);
 
-	dev_info(sm->dev, "gate %08lx, clock %08lx, mode %08lx\n",
-		 gate, clock, mode);
+	dev_dbg(sm->dev, "gate %08lx, clock %08lx, mode %08lx\n",
+		gate, clock, mode);
 
 	sm501_mdelay(sm, 16);
 	mutex_unlock(&sm->clock_lock);
@@ -742,7 +742,7 @@ static int sm501_register_device(struct sm501_devdata *sm,
 	int ret;
 
 	for (ptr = 0; ptr < pdev->num_resources; ptr++) {
-		printk("%s[%d] flags %08lx: %08llx..%08llx\n",
+		printk(KERN_DEBUG "%s[%d] flags %08lx: %08llx..%08llx\n",
 		       pdev->name, ptr,
 		       pdev->resource[ptr].flags,
 		       (unsigned long long)pdev->resource[ptr].start,
@@ -1374,31 +1374,31 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 static int sm501_plat_probe(struct platform_device *dev)
 {
 	struct sm501_devdata *sm;
-	int err;
+	int ret;
 
 	sm = kzalloc(sizeof(struct sm501_devdata), GFP_KERNEL);
 	if (sm == NULL) {
 		dev_err(&dev->dev, "no memory for device data\n");
-		err = -ENOMEM;
+		ret = -ENOMEM;
 		goto err1;
 	}
 
 	sm->dev = &dev->dev;
 	sm->pdev_id = dev->id;
-	sm->irq = platform_get_irq(dev, 0);
-	sm->io_res = platform_get_resource(dev, IORESOURCE_MEM, 1);
-	sm->mem_res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	sm->platdata = dev->dev.platform_data;
 
-	if (sm->irq < 0) {
+	ret = platform_get_irq(dev, 0);
+	if (ret < 0) {
 		dev_err(&dev->dev, "failed to get irq resource\n");
-		err = sm->irq;
 		goto err_res;
 	}
+	sm->irq = ret;
 
+	sm->io_res = platform_get_resource(dev, IORESOURCE_MEM, 1);
+	sm->mem_res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (sm->io_res == NULL || sm->mem_res == NULL) {
 		dev_err(&dev->dev, "failed to get IO resource\n");
-		err = -ENOENT;
+		ret = -ENOENT;
 		goto err_res;
 	}
 
@@ -1407,7 +1407,7 @@ static int sm501_plat_probe(struct platform_device *dev)
 
 	if (sm->regs_claim == NULL) {
 		dev_err(&dev->dev, "cannot claim registers\n");
-		err= -EBUSY;
+		ret = -EBUSY;
 		goto err_res;
 	}
 
@@ -1418,7 +1418,7 @@ static int sm501_plat_probe(struct platform_device *dev)
 
 	if (sm->regs == NULL) {
 		dev_err(&dev->dev, "cannot remap registers\n");
-		err = -EIO;
+		ret = -EIO;
 		goto err_claim;
 	}
 
@@ -1430,7 +1430,7 @@ static int sm501_plat_probe(struct platform_device *dev)
  err_res:
 	kfree(sm);
  err1:
-	return err;
+	return ret;
 
 }
 
@@ -1625,8 +1625,7 @@ static int sm501_pci_probe(struct pci_dev *dev,
 		goto err3;
 	}
 
-	sm->regs = ioremap(pci_resource_start(dev, 1),
-			   pci_resource_len(dev, 1));
+	sm->regs = pci_ioremap_bar(dev, 1);
 
 	if (sm->regs == NULL) {
 		dev_err(&dev->dev, "cannot remap registers\n");
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 49a0fffc02a..9f7024c0f8e 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -24,8 +24,10 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/err.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tmio.h>
@@ -56,6 +58,8 @@ struct t7l66xb {
 	spinlock_t		lock;
 
 	struct resource		rscr;
+	struct clk		*clk48m;
+	struct clk		*clk32k;
 	int			irq;
 	int			irq_base;
 };
@@ -65,13 +69,11 @@ struct t7l66xb {
 static int t7l66xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev = to_platform_device(mmc->dev.parent);
-	struct t7l66xb_platform_data   *pdata = dev->dev.platform_data;
 	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
 	unsigned long flags;
 	u8 dev_ctl;
 
-	if (pdata->enable_clk32k)
-		pdata->enable_clk32k(dev);
+	clk_enable(t7l66xb->clk32k);
 
 	spin_lock_irqsave(&t7l66xb->lock, flags);
 
@@ -87,7 +89,6 @@ static int t7l66xb_mmc_enable(struct platform_device *mmc)
 static int t7l66xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev = to_platform_device(mmc->dev.parent);
-	struct t7l66xb_platform_data   *pdata = dev->dev.platform_data;
 	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
 	unsigned long flags;
 	u8 dev_ctl;
@@ -100,8 +101,7 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 
 	spin_unlock_irqrestore(&t7l66xb->lock, flags);
 
-	if (pdata->disable_clk32k)
-		pdata->disable_clk32k(dev);
+	clk_disable(t7l66xb->clk32k);
 
 	return 0;
 }
@@ -258,18 +258,22 @@ static void t7l66xb_detach_irq(struct platform_device *dev)
 #ifdef CONFIG_PM
 static int t7l66xb_suspend(struct platform_device *dev, pm_message_t state)
 {
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
 	struct t7l66xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
+	clk_disable(t7l66xb->clk48m);
 
 	return 0;
 }
 
 static int t7l66xb_resume(struct platform_device *dev)
 {
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
 	struct t7l66xb_platform_data *pdata = dev->dev.platform_data;
 
+	clk_enable(t7l66xb->clk48m);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
@@ -309,6 +313,19 @@ static int t7l66xb_probe(struct platform_device *dev)
 
 	t7l66xb->irq_base = pdata->irq_base;
 
+	t7l66xb->clk32k = clk_get(&dev->dev, "CLK_CK32K");
+	if (IS_ERR(t7l66xb->clk32k)) {
+		ret = PTR_ERR(t7l66xb->clk32k);
+		goto err_clk32k_get;
+	}
+
+	t7l66xb->clk48m = clk_get(&dev->dev, "CLK_CK48M");
+	if (IS_ERR(t7l66xb->clk48m)) {
+		ret = PTR_ERR(t7l66xb->clk48m);
+		clk_put(t7l66xb->clk32k);
+		goto err_clk48m_get;
+	}
+
 	rscr = &t7l66xb->rscr;
 	rscr->name = "t7l66xb-core";
 	rscr->start = iomem->start;
@@ -325,6 +342,8 @@ static int t7l66xb_probe(struct platform_device *dev)
 		goto err_ioremap;
 	}
 
+	clk_enable(t7l66xb->clk48m);
+
 	if (pdata && pdata->enable)
 		pdata->enable(dev);
 
@@ -359,9 +378,13 @@ static int t7l66xb_probe(struct platform_device *dev)
 	iounmap(t7l66xb->scr);
 err_ioremap:
 	release_resource(&t7l66xb->rscr);
-err_noirq:
 err_request_scr:
 	kfree(t7l66xb);
+	clk_put(t7l66xb->clk48m);
+err_clk48m_get:
+	clk_put(t7l66xb->clk32k);
+err_clk32k_get:
+err_noirq:
 	return ret;
 }
 
@@ -372,7 +395,8 @@ static int t7l66xb_remove(struct platform_device *dev)
 	int ret;
 
 	ret = pdata->disable(dev);
-
+	clk_disable(t7l66xb->clk48m);
+	clk_put(t7l66xb->clk48m);
 	t7l66xb_detach_irq(dev);
 	iounmap(t7l66xb->scr);
 	release_resource(&t7l66xb->rscr);
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index a22b21ac6cf..43222c12fec 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tmio.h>
@@ -24,18 +25,22 @@ enum {
 #ifdef CONFIG_PM
 static int tc6387xb_suspend(struct platform_device *dev, pm_message_t state)
 {
-	struct tc6387xb_platform_data *pdata = platform_get_drvdata(dev);
+	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
+	clk_disable(clk32k);
 
 	return 0;
 }
 
 static int tc6387xb_resume(struct platform_device *dev)
 {
-	struct tc6387xb_platform_data *pdata = platform_get_drvdata(dev);
+	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
+	clk_enable(clk32k);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
@@ -51,10 +56,9 @@ static int tc6387xb_resume(struct platform_device *dev)
 static int tc6387xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct tc6387xb_platform_data *tc6387xb = dev->dev.platform_data;
+	struct clk *clk32k = platform_get_drvdata(dev);
 
-	if (tc6387xb->enable_clk32k)
-		tc6387xb->enable_clk32k(dev);
+	clk_enable(clk32k);
 
 	return 0;
 }
@@ -62,10 +66,9 @@ static int tc6387xb_mmc_enable(struct platform_device *mmc)
 static int tc6387xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct tc6387xb_platform_data *tc6387xb = dev->dev.platform_data;
+	struct clk *clk32k = platform_get_drvdata(dev);
 
-	if (tc6387xb->disable_clk32k)
-		tc6387xb->disable_clk32k(dev);
+	clk_disable(clk32k);
 
 	return 0;
 }
@@ -102,14 +105,14 @@ static struct mfd_cell tc6387xb_cells[] = {
 
 static int tc6387xb_probe(struct platform_device *dev)
 {
-	struct tc6387xb_platform_data *data = platform_get_drvdata(dev);
+	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 	struct resource *iomem;
+	struct clk *clk32k;
 	int irq, ret;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!iomem) {
-		ret = -EINVAL;
-		goto err_resource;
+		return -EINVAL;
 	}
 
 	ret  = platform_get_irq(dev, 0);
@@ -118,8 +121,15 @@ static int tc6387xb_probe(struct platform_device *dev)
 	else
 		goto err_resource;
 
-	if (data && data->enable)
-		data->enable(dev);
+	clk32k = clk_get(&dev->dev, "CLK_CK32K");
+	if (IS_ERR(clk32k)) {
+		ret = PTR_ERR(clk32k);
+		goto err_resource;
+	}
+	platform_set_drvdata(dev, clk32k);
+
+	if (pdata && pdata->enable)
+		pdata->enable(dev);
 
 	printk(KERN_INFO "Toshiba tc6387xb initialised\n");
 
@@ -134,18 +144,19 @@ static int tc6387xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
+	clk_put(clk32k);
 err_resource:
 	return ret;
 }
 
 static int tc6387xb_remove(struct platform_device *dev)
 {
-	struct tc6387xb_platform_data *data = platform_get_drvdata(dev);
-
-	if (data && data->disable)
-		data->disable(dev);
+	struct clk *clk32k = platform_get_drvdata(dev);
 
-	/* FIXME - free the resources! */
+	mfd_remove_devices(&dev->dev);
+	clk_disable(clk32k);
+	clk_put(clk32k);
+	platform_set_drvdata(dev, NULL);
 
 	return 0;
 }
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index e4c1c788b5f..f856e9463a9 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -113,6 +113,8 @@ struct tc6393xb {
 enum {
 	TC6393XB_CELL_NAND,
 	TC6393XB_CELL_MMC,
+	TC6393XB_CELL_OHCI,
+	TC6393XB_CELL_FB,
 };
 
 /*--------------------------------------------------------------------------*/
@@ -170,6 +172,176 @@ static struct resource __devinitdata tc6393xb_mmc_resources[] = {
 	},
 };
 
+const static struct resource tc6393xb_ohci_resources[] = {
+	{
+		.start	= 0x3000,
+		.end	= 0x31ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x0300,
+		.end	= 0x03ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x010000,
+		.end	= 0x017fff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x018000,
+		.end	= 0x01ffff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= IRQ_TC6393_OHCI,
+		.end	= IRQ_TC6393_OHCI,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct resource __devinitdata tc6393xb_fb_resources[] = {
+	{
+		.start	= 0x5000,
+		.end	= 0x51ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x0500,
+		.end	= 0x05ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= 0x100000,
+		.end	= 0x1fffff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= IRQ_TC6393_FB,
+		.end	= IRQ_TC6393_FB,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static int tc6393xb_ohci_enable(struct platform_device *dev)
+{
+	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
+	unsigned long flags;
+	u16 ccr;
+	u8 fer;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	ccr = tmio_ioread16(tc6393xb->scr + SCR_CCR);
+	ccr |= SCR_CCR_USBCK;
+	tmio_iowrite16(ccr, tc6393xb->scr + SCR_CCR);
+
+	fer = tmio_ioread8(tc6393xb->scr + SCR_FER);
+	fer |= SCR_FER_USBEN;
+	tmio_iowrite8(fer, tc6393xb->scr + SCR_FER);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+
+static int tc6393xb_ohci_disable(struct platform_device *dev)
+{
+	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
+	unsigned long flags;
+	u16 ccr;
+	u8 fer;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	fer = tmio_ioread8(tc6393xb->scr + SCR_FER);
+	fer &= ~SCR_FER_USBEN;
+	tmio_iowrite8(fer, tc6393xb->scr + SCR_FER);
+
+	ccr = tmio_ioread16(tc6393xb->scr + SCR_CCR);
+	ccr &= ~SCR_CCR_USBCK;
+	tmio_iowrite16(ccr, tc6393xb->scr + SCR_CCR);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+
+static int tc6393xb_fb_enable(struct platform_device *dev)
+{
+	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
+	unsigned long flags;
+	u16 ccr;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	ccr = tmio_ioread16(tc6393xb->scr + SCR_CCR);
+	ccr &= ~SCR_CCR_MCLK_MASK;
+	ccr |= SCR_CCR_MCLK_48;
+	tmio_iowrite16(ccr, tc6393xb->scr + SCR_CCR);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+
+static int tc6393xb_fb_disable(struct platform_device *dev)
+{
+	struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
+	unsigned long flags;
+	u16 ccr;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	ccr = tmio_ioread16(tc6393xb->scr + SCR_CCR);
+	ccr &= ~SCR_CCR_MCLK_MASK;
+	ccr |= SCR_CCR_MCLK_OFF;
+	tmio_iowrite16(ccr, tc6393xb->scr + SCR_CCR);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+
+int tc6393xb_lcd_set_power(struct platform_device *fb, bool on)
+{
+	struct platform_device *dev = to_platform_device(fb->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+	u8 fer;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	fer = ioread8(tc6393xb->scr + SCR_FER);
+	if (on)
+		fer |= SCR_FER_SLCDEN;
+	else
+		fer &= ~SCR_FER_SLCDEN;
+	iowrite8(fer, tc6393xb->scr + SCR_FER);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(tc6393xb_lcd_set_power);
+
+int tc6393xb_lcd_mode(struct platform_device *fb,
+					const struct fb_videomode *mode) {
+	struct platform_device *dev = to_platform_device(fb->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tc6393xb->lock, flags);
+
+	iowrite16(mode->pixclock, tc6393xb->scr + SCR_PLL1CR + 0);
+	iowrite16(mode->pixclock >> 16, tc6393xb->scr + SCR_PLL1CR + 2);
+
+	spin_unlock_irqrestore(&tc6393xb->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(tc6393xb_lcd_mode);
+
 static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	[TC6393XB_CELL_NAND] = {
 		.name = "tmio-nand",
@@ -182,6 +354,24 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
 	},
+	[TC6393XB_CELL_OHCI] = {
+		.name = "tmio-ohci",
+		.num_resources = ARRAY_SIZE(tc6393xb_ohci_resources),
+		.resources = tc6393xb_ohci_resources,
+		.enable = tc6393xb_ohci_enable,
+		.suspend = tc6393xb_ohci_disable,
+		.resume = tc6393xb_ohci_enable,
+		.disable = tc6393xb_ohci_disable,
+	},
+	[TC6393XB_CELL_FB] = {
+		.name = "tmio-fb",
+		.num_resources = ARRAY_SIZE(tc6393xb_fb_resources),
+		.resources = tc6393xb_fb_resources,
+		.enable = tc6393xb_fb_enable,
+		.suspend = tc6393xb_fb_disable,
+		.resume = tc6393xb_fb_enable,
+		.disable = tc6393xb_fb_disable,
+	},
 };
 
 /*--------------------------------------------------------------------------*/
@@ -369,41 +559,12 @@ static void tc6393xb_detach_irq(struct platform_device *dev)
 
 /*--------------------------------------------------------------------------*/
 
-static int tc6393xb_hw_init(struct platform_device *dev)
-{
-	struct tc6393xb_platform_data *tcpd = dev->dev.platform_data;
-	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
-	int i;
-
-	iowrite8(tc6393xb->suspend_state.fer,	tc6393xb->scr + SCR_FER);
-	iowrite16(tcpd->scr_pll2cr,		tc6393xb->scr + SCR_PLL2CR);
-	iowrite16(tc6393xb->suspend_state.ccr,	tc6393xb->scr + SCR_CCR);
-	iowrite16(SCR_MCR_RDY_OPENDRAIN | SCR_MCR_RDY_UNK | SCR_MCR_RDY_EN |
-		  SCR_MCR_INT_OPENDRAIN | SCR_MCR_INT_UNK | SCR_MCR_INT_EN |
-		  BIT(15),			tc6393xb->scr + SCR_MCR);
-	iowrite16(tcpd->scr_gper,		tc6393xb->scr + SCR_GPER);
-	iowrite8(0,				tc6393xb->scr + SCR_IRR);
-	iowrite8(0xbf,				tc6393xb->scr + SCR_IMR);
-
-	for (i = 0; i < 3; i++) {
-		iowrite8(tc6393xb->suspend_state.gpo_dsr[i],
-					tc6393xb->scr + SCR_GPO_DSR(i));
-		iowrite8(tc6393xb->suspend_state.gpo_doecr[i],
-					tc6393xb->scr + SCR_GPO_DOECR(i));
-		iowrite8(tc6393xb->suspend_state.gpi_bcr[i],
-					tc6393xb->scr + SCR_GPI_BCR(i));
-	}
-
-	return 0;
-}
-
 static int __devinit tc6393xb_probe(struct platform_device *dev)
 {
 	struct tc6393xb_platform_data *tcpd = dev->dev.platform_data;
 	struct tc6393xb *tc6393xb;
 	struct resource *iomem, *rscr;
 	int ret, temp;
-	int i;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!iomem)
@@ -458,21 +619,16 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	if (ret)
 		goto err_enable;
 
-	tc6393xb->suspend_state.fer = 0;
-
-	for (i = 0; i < 3; i++) {
-		tc6393xb->suspend_state.gpo_dsr[i] =
-			(tcpd->scr_gpo_dsr >> (8 * i)) & 0xff;
-		tc6393xb->suspend_state.gpo_doecr[i] =
-			(tcpd->scr_gpo_doecr >> (8 * i)) & 0xff;
-	}
-
-	tc6393xb->suspend_state.ccr = SCR_CCR_UNK1 |
-					SCR_CCR_HCLK_48;
-
-	ret = tc6393xb_hw_init(dev);
-	if (ret)
-		goto err_hw_init;
+	iowrite8(0,				tc6393xb->scr + SCR_FER);
+	iowrite16(tcpd->scr_pll2cr,		tc6393xb->scr + SCR_PLL2CR);
+	iowrite16(SCR_CCR_UNK1 | SCR_CCR_HCLK_48,
+						tc6393xb->scr + SCR_CCR);
+	iowrite16(SCR_MCR_RDY_OPENDRAIN | SCR_MCR_RDY_UNK | SCR_MCR_RDY_EN |
+		  SCR_MCR_INT_OPENDRAIN | SCR_MCR_INT_UNK | SCR_MCR_INT_EN |
+		  BIT(15),			tc6393xb->scr + SCR_MCR);
+	iowrite16(tcpd->scr_gper,		tc6393xb->scr + SCR_GPER);
+	iowrite8(0,				tc6393xb->scr + SCR_IRR);
+	iowrite8(0xbf,				tc6393xb->scr + SCR_IMR);
 
 	printk(KERN_INFO "Toshiba tc6393xb revision %d at 0x%08lx, irq %d\n",
 			tmio_ioread8(tc6393xb->scr + SCR_REVID),
@@ -488,16 +644,33 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 
 	tc6393xb_attach_irq(dev);
 
+	if (tcpd->setup) {
+		ret = tcpd->setup(dev);
+		if (ret)
+			goto err_setup;
+	}
+
 	tc6393xb_cells[TC6393XB_CELL_NAND].driver_data = tcpd->nand_data;
 	tc6393xb_cells[TC6393XB_CELL_NAND].platform_data =
 		&tc6393xb_cells[TC6393XB_CELL_NAND];
 	tc6393xb_cells[TC6393XB_CELL_NAND].data_size =
 		sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]);
+
 	tc6393xb_cells[TC6393XB_CELL_MMC].platform_data =
 		&tc6393xb_cells[TC6393XB_CELL_MMC];
 	tc6393xb_cells[TC6393XB_CELL_MMC].data_size =
 		sizeof(tc6393xb_cells[TC6393XB_CELL_MMC]);
 
+	tc6393xb_cells[TC6393XB_CELL_OHCI].platform_data =
+		&tc6393xb_cells[TC6393XB_CELL_OHCI];
+	tc6393xb_cells[TC6393XB_CELL_OHCI].data_size =
+		sizeof(tc6393xb_cells[TC6393XB_CELL_OHCI]);
+
+	tc6393xb_cells[TC6393XB_CELL_FB].driver_data = tcpd->fb_data;
+	tc6393xb_cells[TC6393XB_CELL_FB].platform_data =
+		&tc6393xb_cells[TC6393XB_CELL_FB];
+	tc6393xb_cells[TC6393XB_CELL_FB].data_size =
+		sizeof(tc6393xb_cells[TC6393XB_CELL_FB]);
 
 	ret = mfd_add_devices(&dev->dev, dev->id,
 			tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
@@ -506,12 +679,15 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
+	if (tcpd->teardown)
+		tcpd->teardown(dev);
+
+err_setup:
 	tc6393xb_detach_irq(dev);
 
 err_gpio_add:
 	if (tc6393xb->gpio.base != -1)
 		temp = gpiochip_remove(&tc6393xb->gpio);
-err_hw_init:
 	tcpd->disable(dev);
 err_clk_enable:
 	clk_disable(tc6393xb->clk);
@@ -535,6 +711,10 @@ static int __devexit tc6393xb_remove(struct platform_device *dev)
 	int ret;
 
 	mfd_remove_devices(&dev->dev);
+
+	if (tcpd->teardown)
+		tcpd->teardown(dev);
+
 	tc6393xb_detach_irq(dev);
 
 	if (tc6393xb->gpio.base != -1) {
@@ -585,15 +765,37 @@ static int tc6393xb_resume(struct platform_device *dev)
 	struct tc6393xb_platform_data *tcpd = dev->dev.platform_data;
 	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
 	int ret;
+	int i;
 
 	clk_enable(tc6393xb->clk);
 
 	ret = tcpd->resume(dev);
-
 	if (ret)
 		return ret;
 
-	return tc6393xb_hw_init(dev);
+	if (!tcpd->resume_restore)
+		return 0;
+
+	iowrite8(tc6393xb->suspend_state.fer,	tc6393xb->scr + SCR_FER);
+	iowrite16(tcpd->scr_pll2cr,		tc6393xb->scr + SCR_PLL2CR);
+	iowrite16(tc6393xb->suspend_state.ccr,	tc6393xb->scr + SCR_CCR);
+	iowrite16(SCR_MCR_RDY_OPENDRAIN | SCR_MCR_RDY_UNK | SCR_MCR_RDY_EN |
+		  SCR_MCR_INT_OPENDRAIN | SCR_MCR_INT_UNK | SCR_MCR_INT_EN |
+		  BIT(15),			tc6393xb->scr + SCR_MCR);
+	iowrite16(tcpd->scr_gper,		tc6393xb->scr + SCR_GPER);
+	iowrite8(0,				tc6393xb->scr + SCR_IRR);
+	iowrite8(0xbf,				tc6393xb->scr + SCR_IMR);
+
+	for (i = 0; i < 3; i++) {
+		iowrite8(tc6393xb->suspend_state.gpo_dsr[i],
+					tc6393xb->scr + SCR_GPO_DSR(i));
+		iowrite8(tc6393xb->suspend_state.gpo_doecr[i],
+					tc6393xb->scr + SCR_GPO_DOECR(i));
+		iowrite8(tc6393xb->suspend_state.gpi_bcr[i],
+					tc6393xb->scr + SCR_GPI_BCR(i));
+	}
+
+	return 0;
 }
 #else
 #define tc6393xb_suspend NULL
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
new file mode 100644
index 00000000000..dd843c4fbcc
--- /dev/null
+++ b/drivers/mfd/twl4030-core.c
@@ -0,0 +1,806 @@
+/*
+ * twl4030_core.c - driver for TWL4030/TPS659x0 PM and audio CODEC devices
+ *
+ * Copyright (C) 2005-2006 Texas Instruments, Inc.
+ *
+ * Modifications to defer interrupt handling to a kernel thread:
+ * Copyright (C) 2006 MontaVista Software, Inc.
+ *
+ * Based on tlv320aic23.c:
+ * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * Code cleanup and modifications to IRQ handler.
+ * by syed khasim <x0khasim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/twl4030.h>
+
+
+/*
+ * The TWL4030 "Triton 2" is one of a family of a multi-function "Power
+ * Management and System Companion Device" chips originally designed for
+ * use in OMAP2 and OMAP 3 based systems.  Its control interfaces use I2C,
+ * often at around 3 Mbit/sec, including for interrupt handling.
+ *
+ * This driver core provides genirq support for the interrupts emitted,
+ * by the various modules, and exports register access primitives.
+ *
+ * FIXME this driver currently requires use of the first interrupt line
+ * (and associated registers).
+ */
+
+#define DRIVER_NAME			"twl4030"
+
+#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
+	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
+#define twl_has_bci()		true
+#else
+#define twl_has_bci()		false
+#endif
+
+#if defined(CONFIG_KEYBOARD_TWL4030) || defined(CONFIG_KEYBOARD_TWL4030_MODULE)
+#define twl_has_keypad()	true
+#else
+#define twl_has_keypad()	false
+#endif
+
+#if defined(CONFIG_GPIO_TWL4030) || defined(CONFIG_GPIO_TWL4030_MODULE)
+#define twl_has_gpio()	true
+#else
+#define twl_has_gpio()	false
+#endif
+
+#if defined(CONFIG_TWL4030_MADC) || defined(CONFIG_TWL4030_MADC_MODULE)
+#define twl_has_madc()	true
+#else
+#define twl_has_madc()	false
+#endif
+
+#if defined(CONFIG_RTC_DRV_TWL4030) || defined(CONFIG_RTC_DRV_TWL4030_MODULE)
+#define twl_has_rtc()	true
+#else
+#define twl_has_rtc()	false
+#endif
+
+#if defined(CONFIG_TWL4030_USB) || defined(CONFIG_TWL4030_USB_MODULE)
+#define twl_has_usb()	true
+#else
+#define twl_has_usb()	false
+#endif
+
+
+/* Triton Core internal information (BEGIN) */
+
+/* Last - for index max*/
+#define TWL4030_MODULE_LAST		TWL4030_MODULE_SECURED_REG
+
+#define TWL4030_NUM_SLAVES		4
+
+
+/* Base Address defns for twl4030_map[] */
+
+/* subchip/slave 0 - USB ID */
+#define TWL4030_BASEADD_USB		0x0000
+
+/* subchip/slave 1 - AUD ID */
+#define TWL4030_BASEADD_AUDIO_VOICE	0x0000
+#define TWL4030_BASEADD_GPIO		0x0098
+#define TWL4030_BASEADD_INTBR		0x0085
+#define TWL4030_BASEADD_PIH		0x0080
+#define TWL4030_BASEADD_TEST		0x004C
+
+/* subchip/slave 2 - AUX ID */
+#define TWL4030_BASEADD_INTERRUPTS	0x00B9
+#define TWL4030_BASEADD_LED		0x00EE
+#define TWL4030_BASEADD_MADC		0x0000
+#define TWL4030_BASEADD_MAIN_CHARGE	0x0074
+#define TWL4030_BASEADD_PRECHARGE	0x00AA
+#define TWL4030_BASEADD_PWM0		0x00F8
+#define TWL4030_BASEADD_PWM1		0x00FB
+#define TWL4030_BASEADD_PWMA		0x00EF
+#define TWL4030_BASEADD_PWMB		0x00F1
+#define TWL4030_BASEADD_KEYPAD		0x00D2
+
+/* subchip/slave 3 - POWER ID */
+#define TWL4030_BASEADD_BACKUP		0x0014
+#define TWL4030_BASEADD_INT		0x002E
+#define TWL4030_BASEADD_PM_MASTER	0x0036
+#define TWL4030_BASEADD_PM_RECEIVER	0x005B
+#define TWL4030_BASEADD_RTC		0x001C
+#define TWL4030_BASEADD_SECURED_REG	0x0000
+
+/* Triton Core internal information (END) */
+
+
+/* Few power values */
+#define R_CFG_BOOT			0x05
+#define R_PROTECT_KEY			0x0E
+
+/* access control values for R_PROTECT_KEY */
+#define KEY_UNLOCK1			0xce
+#define KEY_UNLOCK2			0xec
+#define KEY_LOCK			0x00
+
+/* some fields in R_CFG_BOOT */
+#define HFCLK_FREQ_19p2_MHZ		(1 << 0)
+#define HFCLK_FREQ_26_MHZ		(2 << 0)
+#define HFCLK_FREQ_38p4_MHZ		(3 << 0)
+#define HIGH_PERF_SQ			(1 << 3)
+
+
+/*----------------------------------------------------------------------*/
+
+/* is driver active, bound to a chip? */
+static bool inuse;
+
+/* Structure for each TWL4030 Slave */
+struct twl4030_client {
+	struct i2c_client *client;
+	u8 address;
+
+	/* max numb of i2c_msg required is for read =2 */
+	struct i2c_msg xfer_msg[2];
+
+	/* To lock access to xfer_msg */
+	struct mutex xfer_lock;
+};
+
+static struct twl4030_client twl4030_modules[TWL4030_NUM_SLAVES];
+
+
+/* mapping the module id to slave id and base address */
+struct twl4030mapping {
+	unsigned char sid;	/* Slave ID */
+	unsigned char base;	/* base address */
+};
+
+static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
+	/*
+	 * NOTE:  don't change this table without updating the
+	 * <linux/i2c/twl4030.h> defines for TWL4030_MODULE_*
+	 * so they continue to match the order in this table.
+	 */
+
+	{ 0, TWL4030_BASEADD_USB },
+
+	{ 1, TWL4030_BASEADD_AUDIO_VOICE },
+	{ 1, TWL4030_BASEADD_GPIO },
+	{ 1, TWL4030_BASEADD_INTBR },
+	{ 1, TWL4030_BASEADD_PIH },
+	{ 1, TWL4030_BASEADD_TEST },
+
+	{ 2, TWL4030_BASEADD_KEYPAD },
+	{ 2, TWL4030_BASEADD_MADC },
+	{ 2, TWL4030_BASEADD_INTERRUPTS },
+	{ 2, TWL4030_BASEADD_LED },
+	{ 2, TWL4030_BASEADD_MAIN_CHARGE },
+	{ 2, TWL4030_BASEADD_PRECHARGE },
+	{ 2, TWL4030_BASEADD_PWM0 },
+	{ 2, TWL4030_BASEADD_PWM1 },
+	{ 2, TWL4030_BASEADD_PWMA },
+	{ 2, TWL4030_BASEADD_PWMB },
+
+	{ 3, TWL4030_BASEADD_BACKUP },
+	{ 3, TWL4030_BASEADD_INT },
+	{ 3, TWL4030_BASEADD_PM_MASTER },
+	{ 3, TWL4030_BASEADD_PM_RECEIVER },
+	{ 3, TWL4030_BASEADD_RTC },
+	{ 3, TWL4030_BASEADD_SECURED_REG },
+};
+
+/*----------------------------------------------------------------------*/
+
+/* Exported Functions */
+
+/**
+ * twl4030_i2c_write - Writes a n bit register in TWL4030
+ * @mod_no: module number
+ * @value: an array of num_bytes+1 containing data to write
+ * @reg: register address (just offset will do)
+ * @num_bytes: number of bytes to transfer
+ *
+ * IMPORTANT: for 'value' parameter: Allocate value num_bytes+1 and
+ * valid data starts at Offset 1.
+ *
+ * Returns the result of operation - 0 is success
+ */
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes)
+{
+	int ret;
+	int sid;
+	struct twl4030_client *twl;
+	struct i2c_msg *msg;
+
+	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
+		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
+		return -EPERM;
+	}
+	sid = twl4030_map[mod_no].sid;
+	twl = &twl4030_modules[sid];
+
+	if (unlikely(!inuse)) {
+		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
+		return -EPERM;
+	}
+	mutex_lock(&twl->xfer_lock);
+	/*
+	 * [MSG1]: fill the register address data
+	 * fill the data Tx buffer
+	 */
+	msg = &twl->xfer_msg[0];
+	msg->addr = twl->address;
+	msg->len = num_bytes + 1;
+	msg->flags = 0;
+	msg->buf = value;
+	/* over write the first byte of buffer with the register address */
+	*value = twl4030_map[mod_no].base + reg;
+	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 1);
+	mutex_unlock(&twl->xfer_lock);
+
+	/* i2cTransfer returns num messages.translate it pls.. */
+	if (ret >= 0)
+		ret = 0;
+	return ret;
+}
+EXPORT_SYMBOL(twl4030_i2c_write);
+
+/**
+ * twl4030_i2c_read - Reads a n bit register in TWL4030
+ * @mod_no: module number
+ * @value: an array of num_bytes containing data to be read
+ * @reg: register address (just offset will do)
+ * @num_bytes: number of bytes to transfer
+ *
+ * Returns result of operation - num_bytes is success else failure.
+ */
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes)
+{
+	int ret;
+	u8 val;
+	int sid;
+	struct twl4030_client *twl;
+	struct i2c_msg *msg;
+
+	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
+		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
+		return -EPERM;
+	}
+	sid = twl4030_map[mod_no].sid;
+	twl = &twl4030_modules[sid];
+
+	if (unlikely(!inuse)) {
+		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
+		return -EPERM;
+	}
+	mutex_lock(&twl->xfer_lock);
+	/* [MSG1] fill the register address data */
+	msg = &twl->xfer_msg[0];
+	msg->addr = twl->address;
+	msg->len = 1;
+	msg->flags = 0;	/* Read the register value */
+	val = twl4030_map[mod_no].base + reg;
+	msg->buf = &val;
+	/* [MSG2] fill the data rx buffer */
+	msg = &twl->xfer_msg[1];
+	msg->addr = twl->address;
+	msg->flags = I2C_M_RD;	/* Read the register value */
+	msg->len = num_bytes;	/* only n bytes */
+	msg->buf = value;
+	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 2);
+	mutex_unlock(&twl->xfer_lock);
+
+	/* i2cTransfer returns num messages.translate it pls.. */
+	if (ret >= 0)
+		ret = 0;
+	return ret;
+}
+EXPORT_SYMBOL(twl4030_i2c_read);
+
+/**
+ * twl4030_i2c_write_u8 - Writes a 8 bit register in TWL4030
+ * @mod_no: module number
+ * @value: the value to be written 8 bit
+ * @reg: register address (just offset will do)
+ *
+ * Returns result of operation - 0 is success
+ */
+int twl4030_i2c_write_u8(u8 mod_no, u8 value, u8 reg)
+{
+
+	/* 2 bytes offset 1 contains the data offset 0 is used by i2c_write */
+	u8 temp_buffer[2] = { 0 };
+	/* offset 1 contains the data */
+	temp_buffer[1] = value;
+	return twl4030_i2c_write(mod_no, temp_buffer, reg, 1);
+}
+EXPORT_SYMBOL(twl4030_i2c_write_u8);
+
+/**
+ * twl4030_i2c_read_u8 - Reads a 8 bit register from TWL4030
+ * @mod_no: module number
+ * @value: the value read 8 bit
+ * @reg: register address (just offset will do)
+ *
+ * Returns result of operation - 0 is success
+ */
+int twl4030_i2c_read_u8(u8 mod_no, u8 *value, u8 reg)
+{
+	return twl4030_i2c_read(mod_no, value, reg, 1);
+}
+EXPORT_SYMBOL(twl4030_i2c_read_u8);
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * NOTE:  We know the first 8 IRQs after pdata->base_irq are
+ * for the PIH, and the next are for the PWR_INT SIH, since
+ * that's how twl_init_irq() sets things up.
+ */
+
+static int add_children(struct twl4030_platform_data *pdata)
+{
+	struct platform_device	*pdev = NULL;
+	struct twl4030_client	*twl = NULL;
+	int			status = 0;
+
+	if (twl_has_bci() && pdata->bci) {
+		twl = &twl4030_modules[3];
+
+		pdev = platform_device_alloc("twl4030_bci", -1);
+		if (!pdev) {
+			pr_debug("%s: can't alloc bci dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+
+		if (status == 0) {
+			pdev->dev.parent = &twl->client->dev;
+			status = platform_device_add_data(pdev, pdata->bci,
+					sizeof(*pdata->bci));
+			if (status < 0) {
+				dev_dbg(&twl->client->dev,
+					"can't add bci data, %d\n",
+					status);
+				goto err;
+			}
+		}
+
+		if (status == 0) {
+			struct resource r = {
+				.start = pdata->irq_base + 8 + 1,
+				.flags = IORESOURCE_IRQ,
+			};
+
+			status = platform_device_add_resources(pdev, &r, 1);
+		}
+
+		if (status == 0)
+			status = platform_device_add(pdev);
+
+		if (status < 0) {
+			platform_device_put(pdev);
+			dev_dbg(&twl->client->dev,
+					"can't create bci dev, %d\n",
+					status);
+			goto err;
+		}
+	}
+
+	if (twl_has_gpio() && pdata->gpio) {
+		twl = &twl4030_modules[1];
+
+		pdev = platform_device_alloc("twl4030_gpio", -1);
+		if (!pdev) {
+			pr_debug("%s: can't alloc gpio dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+
+		/* more driver model init */
+		if (status == 0) {
+			pdev->dev.parent = &twl->client->dev;
+			/* device_init_wakeup(&pdev->dev, 1); */
+
+			status = platform_device_add_data(pdev, pdata->gpio,
+					sizeof(*pdata->gpio));
+			if (status < 0) {
+				dev_dbg(&twl->client->dev,
+					"can't add gpio data, %d\n",
+					status);
+				goto err;
+			}
+		}
+
+		/* GPIO module IRQ */
+		if (status == 0) {
+			struct resource	r = {
+				.start = pdata->irq_base + 0,
+				.flags = IORESOURCE_IRQ,
+			};
+
+			status = platform_device_add_resources(pdev, &r, 1);
+		}
+
+		if (status == 0)
+			status = platform_device_add(pdev);
+
+		if (status < 0) {
+			platform_device_put(pdev);
+			dev_dbg(&twl->client->dev,
+					"can't create gpio dev, %d\n",
+					status);
+			goto err;
+		}
+	}
+
+	if (twl_has_keypad() && pdata->keypad) {
+		pdev = platform_device_alloc("twl4030_keypad", -1);
+		if (pdev) {
+			twl = &twl4030_modules[2];
+			pdev->dev.parent = &twl->client->dev;
+			device_init_wakeup(&pdev->dev, 1);
+			status = platform_device_add_data(pdev, pdata->keypad,
+					sizeof(*pdata->keypad));
+			if (status < 0) {
+				dev_dbg(&twl->client->dev,
+					"can't add keypad data, %d\n",
+					status);
+				platform_device_put(pdev);
+				goto err;
+			}
+			status = platform_device_add(pdev);
+			if (status < 0) {
+				platform_device_put(pdev);
+				dev_dbg(&twl->client->dev,
+						"can't create keypad dev, %d\n",
+						status);
+				goto err;
+			}
+		} else {
+			pr_debug("%s: can't alloc keypad dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+	}
+
+	if (twl_has_madc() && pdata->madc) {
+		pdev = platform_device_alloc("twl4030_madc", -1);
+		if (pdev) {
+			twl = &twl4030_modules[2];
+			pdev->dev.parent = &twl->client->dev;
+			device_init_wakeup(&pdev->dev, 1);
+			status = platform_device_add_data(pdev, pdata->madc,
+					sizeof(*pdata->madc));
+			if (status < 0) {
+				platform_device_put(pdev);
+				dev_dbg(&twl->client->dev,
+					"can't add madc data, %d\n",
+					status);
+				goto err;
+			}
+			status = platform_device_add(pdev);
+			if (status < 0) {
+				platform_device_put(pdev);
+				dev_dbg(&twl->client->dev,
+						"can't create madc dev, %d\n",
+						status);
+				goto err;
+			}
+		} else {
+			pr_debug("%s: can't alloc madc dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+	}
+
+	if (twl_has_rtc()) {
+		twl = &twl4030_modules[3];
+
+		pdev = platform_device_alloc("twl4030_rtc", -1);
+		if (!pdev) {
+			pr_debug("%s: can't alloc rtc dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+		} else {
+			pdev->dev.parent = &twl->client->dev;
+			device_init_wakeup(&pdev->dev, 1);
+		}
+
+		/*
+		 * REVISIT platform_data here currently might use of
+		 * "msecure" line ... but for now we just expect board
+		 * setup to tell the chip "we are secure" at all times.
+		 * Eventually, Linux might become more aware of such
+		 * HW security concerns, and "least privilege".
+		 */
+
+		/* RTC module IRQ */
+		if (status == 0) {
+			struct resource	r = {
+				.start = pdata->irq_base + 8 + 3,
+				.flags = IORESOURCE_IRQ,
+			};
+
+			status = platform_device_add_resources(pdev, &r, 1);
+		}
+
+		if (status == 0)
+			status = platform_device_add(pdev);
+
+		if (status < 0) {
+			platform_device_put(pdev);
+			dev_dbg(&twl->client->dev,
+					"can't create rtc dev, %d\n",
+					status);
+			goto err;
+		}
+	}
+
+	if (twl_has_usb() && pdata->usb) {
+		twl = &twl4030_modules[0];
+
+		pdev = platform_device_alloc("twl4030_usb", -1);
+		if (!pdev) {
+			pr_debug("%s: can't alloc usb dev\n", DRIVER_NAME);
+			status = -ENOMEM;
+			goto err;
+		}
+
+		if (status == 0) {
+			pdev->dev.parent = &twl->client->dev;
+			device_init_wakeup(&pdev->dev, 1);
+			status = platform_device_add_data(pdev, pdata->usb,
+					sizeof(*pdata->usb));
+			if (status < 0) {
+				platform_device_put(pdev);
+				dev_dbg(&twl->client->dev,
+					"can't add usb data, %d\n",
+					status);
+				goto err;
+			}
+		}
+
+		if (status == 0) {
+			struct resource r = {
+				.start = pdata->irq_base + 8 + 2,
+				.flags = IORESOURCE_IRQ,
+			};
+
+			status = platform_device_add_resources(pdev, &r, 1);
+		}
+
+		if (status == 0)
+			status = platform_device_add(pdev);
+
+		if (status < 0) {
+			platform_device_put(pdev);
+			dev_dbg(&twl->client->dev,
+					"can't create usb dev, %d\n",
+					status);
+		}
+	}
+
+err:
+	if (status)
+		pr_err("failed to add twl4030's children (status %d)\n", status);
+	return status;
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * These three functions initialize the on-chip clock framework,
+ * letting it generate the right frequencies for USB, MADC, and
+ * other purposes.
+ */
+static inline int __init protect_pm_master(void)
+{
+	int e = 0;
+
+	e = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_LOCK,
+			R_PROTECT_KEY);
+	return e;
+}
+
+static inline int __init unprotect_pm_master(void)
+{
+	int e = 0;
+
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK1,
+			R_PROTECT_KEY);
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK2,
+			R_PROTECT_KEY);
+	return e;
+}
+
+static void __init clocks_init(void)
+{
+	int e = 0;
+	struct clk *osc;
+	u32 rate;
+	u8 ctrl = HFCLK_FREQ_26_MHZ;
+
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+	if (cpu_is_omap2430())
+		osc = clk_get(NULL, "osc_ck");
+	else
+		osc = clk_get(NULL, "osc_sys_ck");
+#else
+	/* REVISIT for non-OMAP systems, pass the clock rate from
+	 * board init code, using platform_data.
+	 */
+	osc = ERR_PTR(-EIO);
+#endif
+	if (IS_ERR(osc)) {
+		printk(KERN_WARNING "Skipping twl4030 internal clock init and "
+				"using bootloader value (unknown osc rate)\n");
+		return;
+	}
+
+	rate = clk_get_rate(osc);
+	clk_put(osc);
+
+	switch (rate) {
+	case 19200000:
+		ctrl = HFCLK_FREQ_19p2_MHZ;
+		break;
+	case 26000000:
+		ctrl = HFCLK_FREQ_26_MHZ;
+		break;
+	case 38400000:
+		ctrl = HFCLK_FREQ_38p4_MHZ;
+		break;
+	}
+
+	ctrl |= HIGH_PERF_SQ;
+	e |= unprotect_pm_master();
+	/* effect->MADC+USB ck en */
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, ctrl, R_CFG_BOOT);
+	e |= protect_pm_master();
+
+	if (e < 0)
+		pr_err("%s: clock init err [%d]\n", DRIVER_NAME, e);
+}
+
+/*----------------------------------------------------------------------*/
+
+int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end);
+int twl_exit_irq(void);
+
+static int twl4030_remove(struct i2c_client *client)
+{
+	unsigned i;
+	int status;
+
+	status = twl_exit_irq();
+	if (status < 0)
+		return status;
+
+	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
+		struct twl4030_client	*twl = &twl4030_modules[i];
+
+		if (twl->client && twl->client != client)
+			i2c_unregister_device(twl->client);
+		twl4030_modules[i].client = NULL;
+	}
+	inuse = false;
+	return 0;
+}
+
+/* NOTE:  this driver only handles a single twl4030/tps659x0 chip */
+static int
+twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	int				status;
+	unsigned			i;
+	struct twl4030_platform_data	*pdata = client->dev.platform_data;
+
+	if (!pdata) {
+		dev_dbg(&client->dev, "no platform data?\n");
+		return -EINVAL;
+	}
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C) == 0) {
+		dev_dbg(&client->dev, "can't talk I2C?\n");
+		return -EIO;
+	}
+
+	if (inuse) {
+		dev_dbg(&client->dev, "driver is already in use\n");
+		return -EBUSY;
+	}
+
+	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
+		struct twl4030_client	*twl = &twl4030_modules[i];
+
+		twl->address = client->addr + i;
+		if (i == 0)
+			twl->client = client;
+		else {
+			twl->client = i2c_new_dummy(client->adapter,
+					twl->address);
+			if (!twl->client) {
+				dev_err(&twl->client->dev,
+					"can't attach client %d\n", i);
+				status = -ENOMEM;
+				goto fail;
+			}
+			strlcpy(twl->client->name, id->name,
+					sizeof(twl->client->name));
+		}
+		mutex_init(&twl->xfer_lock);
+	}
+	inuse = true;
+
+	/* setup clock framework */
+	clocks_init();
+
+	/* Maybe init the T2 Interrupt subsystem */
+	if (client->irq
+			&& pdata->irq_base
+			&& pdata->irq_end > pdata->irq_base) {
+		status = twl_init_irq(client->irq, pdata->irq_base, pdata->irq_end);
+		if (status < 0)
+			goto fail;
+	}
+
+	status = add_children(pdata);
+fail:
+	if (status < 0)
+		twl4030_remove(client);
+	return status;
+}
+
+static const struct i2c_device_id twl4030_ids[] = {
+	{ "twl4030", 0 },	/* "Triton 2" */
+	{ "tps65950", 0 },	/* catalog version of twl4030 */
+	{ "tps65930", 0 },	/* fewer LDOs and DACs; no charger */
+	{ "tps65920", 0 },	/* fewer LDOs; no codec or charger */
+	{ "twl5030", 0 },	/* T2 updated */
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(i2c, twl4030_ids);
+
+/* One Client Driver , 4 Clients */
+static struct i2c_driver twl4030_driver = {
+	.driver.name	= DRIVER_NAME,
+	.id_table	= twl4030_ids,
+	.probe		= twl4030_probe,
+	.remove		= twl4030_remove,
+};
+
+static int __init twl4030_init(void)
+{
+	return i2c_add_driver(&twl4030_driver);
+}
+subsys_initcall(twl4030_init);
+
+static void __exit twl4030_exit(void)
+{
+	i2c_del_driver(&twl4030_driver);
+}
+module_exit(twl4030_exit);
+
+MODULE_AUTHOR("Texas Instruments, Inc.");
+MODULE_DESCRIPTION("I2C Core interface for TWL4030");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
new file mode 100644
index 00000000000..fae868a8d49
--- /dev/null
+++ b/drivers/mfd/twl4030-irq.c
@@ -0,0 +1,743 @@
+/*
+ * twl4030-irq.c - TWL4030/TPS659x0 irq support
+ *
+ * Copyright (C) 2005-2006 Texas Instruments, Inc.
+ *
+ * Modifications to defer interrupt handling to a kernel thread:
+ * Copyright (C) 2006 MontaVista Software, Inc.
+ *
+ * Based on tlv320aic23.c:
+ * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * Code cleanup and modifications to IRQ handler.
+ * by syed khasim <x0khasim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kthread.h>
+
+#include <linux/i2c/twl4030.h>
+
+
+/*
+ * TWL4030 IRQ handling has two stages in hardware, and thus in software.
+ * The Primary Interrupt Handler (PIH) stage exposes status bits saying
+ * which Secondary Interrupt Handler (SIH) stage is raising an interrupt.
+ * SIH modules are more traditional IRQ components, which support per-IRQ
+ * enable/disable and trigger controls; they do most of the work.
+ *
+ * These chips are designed to support IRQ handling from two different
+ * I2C masters.  Each has a dedicated IRQ line, and dedicated IRQ status
+ * and mask registers in the PIH and SIH modules.
+ *
+ * We set up IRQs starting at a platform-specified base, always starting
+ * with PIH and the SIH for PWR_INT and then usually adding GPIO:
+ *	base + 0  .. base + 7	PIH
+ *	base + 8  .. base + 15	SIH for PWR_INT
+ *	base + 16 .. base + 33	SIH for GPIO
+ */
+
+/* PIH register offsets */
+#define REG_PIH_ISR_P1			0x01
+#define REG_PIH_ISR_P2			0x02
+#define REG_PIH_SIR			0x03	/* for testing */
+
+
+/* Linux could (eventually) use either IRQ line */
+static int irq_line;
+
+struct sih {
+	char	name[8];
+	u8	module;			/* module id */
+	u8	control_offset;		/* for SIH_CTRL */
+	bool	set_cor;
+
+	u8	bits;			/* valid in isr/imr */
+	u8	bytes_ixr;		/* bytelen of ISR/IMR/SIR */
+
+	u8	edr_offset;
+	u8	bytes_edr;		/* bytelen of EDR */
+
+	/* SIR ignored -- set interrupt, for testing only */
+	struct irq_data {
+		u8	isr_offset;
+		u8	imr_offset;
+	} mask[2];
+	/* + 2 bytes padding */
+};
+
+#define SIH_INITIALIZER(modname, nbits) \
+	.module		= TWL4030_MODULE_ ## modname, \
+	.control_offset = TWL4030_ ## modname ## _SIH_CTRL, \
+	.bits		= nbits, \
+	.bytes_ixr	= DIV_ROUND_UP(nbits, 8), \
+	.edr_offset	= TWL4030_ ## modname ## _EDR, \
+	.bytes_edr	= DIV_ROUND_UP((2*(nbits)), 8), \
+	.mask = { { \
+		.isr_offset	= TWL4030_ ## modname ## _ISR1, \
+		.imr_offset	= TWL4030_ ## modname ## _IMR1, \
+	}, \
+	{ \
+		.isr_offset	= TWL4030_ ## modname ## _ISR2, \
+		.imr_offset	= TWL4030_ ## modname ## _IMR2, \
+	}, },
+
+/* register naming policies are inconsistent ... */
+#define TWL4030_INT_PWR_EDR		TWL4030_INT_PWR_EDR1
+#define TWL4030_MODULE_KEYPAD_KEYP	TWL4030_MODULE_KEYPAD
+#define TWL4030_MODULE_INT_PWR		TWL4030_MODULE_INT
+
+
+/* Order in this table matches order in PIH_ISR.  That is,
+ * BIT(n) in PIH_ISR is sih_modules[n].
+ */
+static const struct sih sih_modules[6] = {
+	[0] = {
+		.name		= "gpio",
+		.module		= TWL4030_MODULE_GPIO,
+		.control_offset	= REG_GPIO_SIH_CTRL,
+		.set_cor	= true,
+		.bits		= TWL4030_GPIO_MAX,
+		.bytes_ixr	= 3,
+		/* Note: *all* of these IRQs default to no-trigger */
+		.edr_offset	= REG_GPIO_EDR1,
+		.bytes_edr	= 5,
+		.mask = { {
+			.isr_offset	= REG_GPIO_ISR1A,
+			.imr_offset	= REG_GPIO_IMR1A,
+		}, {
+			.isr_offset	= REG_GPIO_ISR1B,
+			.imr_offset	= REG_GPIO_IMR1B,
+		}, },
+	},
+	[1] = {
+		.name		= "keypad",
+		.set_cor	= true,
+		SIH_INITIALIZER(KEYPAD_KEYP, 4)
+	},
+	[2] = {
+		.name		= "bci",
+		.module		= TWL4030_MODULE_INTERRUPTS,
+		.control_offset	= TWL4030_INTERRUPTS_BCISIHCTRL,
+		.bits		= 12,
+		.bytes_ixr	= 2,
+		.edr_offset	= TWL4030_INTERRUPTS_BCIEDR1,
+		/* Note: most of these IRQs default to no-trigger */
+		.bytes_edr	= 3,
+		.mask = { {
+			.isr_offset	= TWL4030_INTERRUPTS_BCIISR1A,
+			.imr_offset	= TWL4030_INTERRUPTS_BCIIMR1A,
+		}, {
+			.isr_offset	= TWL4030_INTERRUPTS_BCIISR1B,
+			.imr_offset	= TWL4030_INTERRUPTS_BCIIMR1B,
+		}, },
+	},
+	[3] = {
+		.name		= "madc",
+		SIH_INITIALIZER(MADC, 4)
+	},
+	[4] = {
+		/* USB doesn't use the same SIH organization */
+		.name		= "usb",
+	},
+	[5] = {
+		.name		= "power",
+		.set_cor	= true,
+		SIH_INITIALIZER(INT_PWR, 8)
+	},
+		/* there are no SIH modules #6 or #7 ... */
+};
+
+#undef TWL4030_MODULE_KEYPAD_KEYP
+#undef TWL4030_MODULE_INT_PWR
+#undef TWL4030_INT_PWR_EDR
+
+/*----------------------------------------------------------------------*/
+
+static unsigned twl4030_irq_base;
+
+static struct completion irq_event;
+
+/*
+ * This thread processes interrupts reported by the Primary Interrupt Handler.
+ */
+static int twl4030_irq_thread(void *data)
+{
+	long irq = (long)data;
+	irq_desc_t *desc = irq_desc + irq;
+	static unsigned i2c_errors;
+	const static unsigned max_i2c_errors = 100;
+
+	current->flags |= PF_NOFREEZE;
+
+	while (!kthread_should_stop()) {
+		int ret;
+		int module_irq;
+		u8 pih_isr;
+
+		/* Wait for IRQ, then read PIH irq status (also blocking) */
+		wait_for_completion_interruptible(&irq_event);
+
+		ret = twl4030_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr,
+					  REG_PIH_ISR_P1);
+		if (ret) {
+			pr_warning("twl4030: I2C error %d reading PIH ISR\n",
+					ret);
+			if (++i2c_errors >= max_i2c_errors) {
+				printk(KERN_ERR "Maximum I2C error count"
+						" exceeded.  Terminating %s.\n",
+						__func__);
+				break;
+			}
+			complete(&irq_event);
+			continue;
+		}
+
+		/* these handlers deal with the relevant SIH irq status */
+		local_irq_disable();
+		for (module_irq = twl4030_irq_base;
+				pih_isr;
+				pih_isr >>= 1, module_irq++) {
+			if (pih_isr & 0x1) {
+				irq_desc_t *d = irq_desc + module_irq;
+
+				/* These can't be masked ... always warn
+				 * if we get any surprises.
+				 */
+				if (d->status & IRQ_DISABLED)
+					note_interrupt(module_irq, d,
+							IRQ_NONE);
+				else
+					d->handle_irq(module_irq, d);
+			}
+		}
+		local_irq_enable();
+
+		desc->chip->unmask(irq);
+	}
+
+	return 0;
+}
+
+/*
+ * handle_twl4030_pih() is the desc->handle method for the twl4030 interrupt.
+ * This is a chained interrupt, so there is no desc->action method for it.
+ * Now we need to query the interrupt controller in the twl4030 to determine
+ * which module is generating the interrupt request.  However, we can't do i2c
+ * transactions in interrupt context, so we must defer that work to a kernel
+ * thread.  All we do here is acknowledge and mask the interrupt and wakeup
+ * the kernel thread.
+ */
+static void handle_twl4030_pih(unsigned int irq, irq_desc_t *desc)
+{
+	/* Acknowledge, clear *AND* mask the interrupt... */
+	desc->chip->ack(irq);
+	complete(&irq_event);
+}
+
+static struct task_struct *start_twl4030_irq_thread(long irq)
+{
+	struct task_struct *thread;
+
+	init_completion(&irq_event);
+	thread = kthread_run(twl4030_irq_thread, (void *)irq, "twl4030-irq");
+	if (!thread)
+		pr_err("twl4030: could not create irq %ld thread!\n", irq);
+
+	return thread;
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * twl4030_init_sih_modules() ... start from a known state where no
+ * IRQs will be coming in, and where we can quickly enable them then
+ * handle them as they arrive.  Mask all IRQs: maybe init SIH_CTRL.
+ *
+ * NOTE:  we don't touch EDR registers here; they stay with hardware
+ * defaults or whatever the last value was.  Note that when both EDR
+ * bits for an IRQ are clear, that's as if its IMR bit is set...
+ */
+static int twl4030_init_sih_modules(unsigned line)
+{
+	const struct sih *sih;
+	u8 buf[4];
+	int i;
+	int status;
+
+	/* line 0 == int1_n signal; line 1 == int2_n signal */
+	if (line > 1)
+		return -EINVAL;
+
+	irq_line = line;
+
+	/* disable all interrupts on our line */
+	memset(buf, 0xff, sizeof buf);
+	sih = sih_modules;
+	for (i = 0; i < ARRAY_SIZE(sih_modules); i++, sih++) {
+
+		/* skip USB -- it's funky */
+		if (!sih->bytes_ixr)
+			continue;
+
+		status = twl4030_i2c_write(sih->module, buf,
+				sih->mask[line].imr_offset, sih->bytes_ixr);
+		if (status < 0)
+			pr_err("twl4030: err %d initializing %s %s\n",
+					status, sih->name, "IMR");
+
+		/* Maybe disable "exclusive" mode; buffer second pending irq;
+		 * set Clear-On-Read (COR) bit.
+		 *
+		 * NOTE that sometimes COR polarity is documented as being
+		 * inverted:  for MADC and BCI, COR=1 means "clear on write".
+		 * And for PWR_INT it's not documented...
+		 */
+		if (sih->set_cor) {
+			status = twl4030_i2c_write_u8(sih->module,
+					TWL4030_SIH_CTRL_COR_MASK,
+					sih->control_offset);
+			if (status < 0)
+				pr_err("twl4030: err %d initializing %s %s\n",
+						status, sih->name, "SIH_CTRL");
+		}
+	}
+
+	sih = sih_modules;
+	for (i = 0; i < ARRAY_SIZE(sih_modules); i++, sih++) {
+		u8 rxbuf[4];
+		int j;
+
+		/* skip USB */
+		if (!sih->bytes_ixr)
+			continue;
+
+		/* Clear pending interrupt status.  Either the read was
+		 * enough, or we need to write those bits.  Repeat, in
+		 * case an IRQ is pending (PENDDIS=0) ... that's not
+		 * uncommon with PWR_INT.PWRON.
+		 */
+		for (j = 0; j < 2; j++) {
+			status = twl4030_i2c_read(sih->module, rxbuf,
+				sih->mask[line].isr_offset, sih->bytes_ixr);
+			if (status < 0)
+				pr_err("twl4030: err %d initializing %s %s\n",
+					status, sih->name, "ISR");
+
+			if (!sih->set_cor)
+				status = twl4030_i2c_write(sih->module, buf,
+					sih->mask[line].isr_offset,
+					sih->bytes_ixr);
+			/* else COR=1 means read sufficed.
+			 * (for most SIH modules...)
+			 */
+		}
+	}
+
+	return 0;
+}
+
+static inline void activate_irq(int irq)
+{
+#ifdef CONFIG_ARM
+	/* ARM requires an extra step to clear IRQ_NOREQUEST, which it
+	 * sets on behalf of every irq_chip.  Also sets IRQ_NOPROBE.
+	 */
+	set_irq_flags(irq, IRQF_VALID);
+#else
+	/* same effect on other architectures */
+	set_irq_noprobe(irq);
+#endif
+}
+
+/*----------------------------------------------------------------------*/
+
+static DEFINE_SPINLOCK(sih_agent_lock);
+
+static struct workqueue_struct *wq;
+
+struct sih_agent {
+	int			irq_base;
+	const struct sih	*sih;
+
+	u32			imr;
+	bool			imr_change_pending;
+	struct work_struct	mask_work;
+
+	u32			edge_change;
+	struct work_struct	edge_work;
+};
+
+static void twl4030_sih_do_mask(struct work_struct *work)
+{
+	struct sih_agent	*agent;
+	const struct sih	*sih;
+	union {
+		u8	bytes[4];
+		u32	word;
+	}			imr;
+	int			status;
+
+	agent = container_of(work, struct sih_agent, mask_work);
+
+	/* see what work we have */
+	spin_lock_irq(&sih_agent_lock);
+	if (agent->imr_change_pending) {
+		sih = agent->sih;
+		/* byte[0] gets overwritten as we write ... */
+		imr.word = cpu_to_le32(agent->imr << 8);
+		agent->imr_change_pending = false;
+	} else
+		sih = NULL;
+	spin_unlock_irq(&sih_agent_lock);
+	if (!sih)
+		return;
+
+	/* write the whole mask ... simpler than subsetting it */
+	status = twl4030_i2c_write(sih->module, imr.bytes,
+			sih->mask[irq_line].imr_offset, sih->bytes_ixr);
+	if (status)
+		pr_err("twl4030: %s, %s --> %d\n", __func__,
+				"write", status);
+}
+
+static void twl4030_sih_do_edge(struct work_struct *work)
+{
+	struct sih_agent	*agent;
+	const struct sih	*sih;
+	u8			bytes[6];
+	u32			edge_change;
+	int			status;
+
+	agent = container_of(work, struct sih_agent, edge_work);
+
+	/* see what work we have */
+	spin_lock_irq(&sih_agent_lock);
+	edge_change = agent->edge_change;
+	agent->edge_change = 0;;
+	sih = edge_change ? agent->sih : NULL;
+	spin_unlock_irq(&sih_agent_lock);
+	if (!sih)
+		return;
+
+	/* Read, reserving first byte for write scratch.  Yes, this
+	 * could be cached for some speedup ... but be careful about
+	 * any processor on the other IRQ line, EDR registers are
+	 * shared.
+	 */
+	status = twl4030_i2c_read(sih->module, bytes + 1,
+			sih->edr_offset, sih->bytes_edr);
+	if (status) {
+		pr_err("twl4030: %s, %s --> %d\n", __func__,
+				"read", status);
+		return;
+	}
+
+	/* Modify only the bits we know must change */
+	while (edge_change) {
+		int		i = fls(edge_change) - 1;
+		struct irq_desc	*d = irq_desc + i + agent->irq_base;
+		int		byte = 1 + (i >> 2);
+		int		off = (i & 0x3) * 2;
+
+		bytes[byte] &= ~(0x03 << off);
+
+		spin_lock_irq(&d->lock);
+		if (d->status & IRQ_TYPE_EDGE_RISING)
+			bytes[byte] |= BIT(off + 1);
+		if (d->status & IRQ_TYPE_EDGE_FALLING)
+			bytes[byte] |= BIT(off + 0);
+		spin_unlock_irq(&d->lock);
+
+		edge_change &= ~BIT(i);
+	}
+
+	/* Write */
+	status = twl4030_i2c_write(sih->module, bytes,
+			sih->edr_offset, sih->bytes_edr);
+	if (status)
+		pr_err("twl4030: %s, %s --> %d\n", __func__,
+				"write", status);
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * All irq_chip methods get issued from code holding irq_desc[irq].lock,
+ * which can't perform the underlying I2C operations (because they sleep).
+ * So we must hand them off to a thread (workqueue) and cope with asynch
+ * completion, potentially including some re-ordering, of these requests.
+ */
+
+static void twl4030_sih_mask(unsigned irq)
+{
+	struct sih_agent *sih = get_irq_chip_data(irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sih_agent_lock, flags);
+	sih->imr |= BIT(irq - sih->irq_base);
+	sih->imr_change_pending = true;
+	queue_work(wq, &sih->mask_work);
+	spin_unlock_irqrestore(&sih_agent_lock, flags);
+}
+
+static void twl4030_sih_unmask(unsigned irq)
+{
+	struct sih_agent *sih = get_irq_chip_data(irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sih_agent_lock, flags);
+	sih->imr &= ~BIT(irq - sih->irq_base);
+	sih->imr_change_pending = true;
+	queue_work(wq, &sih->mask_work);
+	spin_unlock_irqrestore(&sih_agent_lock, flags);
+}
+
+static int twl4030_sih_set_type(unsigned irq, unsigned trigger)
+{
+	struct sih_agent *sih = get_irq_chip_data(irq);
+	struct irq_desc *desc = irq_desc + irq;
+	unsigned long flags;
+
+	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
+		return -EINVAL;
+
+	spin_lock_irqsave(&sih_agent_lock, flags);
+	if ((desc->status & IRQ_TYPE_SENSE_MASK) != trigger) {
+		desc->status &= ~IRQ_TYPE_SENSE_MASK;
+		desc->status |= trigger;
+		sih->edge_change |= BIT(irq - sih->irq_base);
+		queue_work(wq, &sih->edge_work);
+	}
+	spin_unlock_irqrestore(&sih_agent_lock, flags);
+	return 0;
+}
+
+static struct irq_chip twl4030_sih_irq_chip = {
+	.name		= "twl4030",
+	.mask		= twl4030_sih_mask,
+	.unmask		= twl4030_sih_unmask,
+	.set_type	= twl4030_sih_set_type,
+};
+
+/*----------------------------------------------------------------------*/
+
+static inline int sih_read_isr(const struct sih *sih)
+{
+	int status;
+	union {
+		u8 bytes[4];
+		u32 word;
+	} isr;
+
+	/* FIXME need retry-on-error ... */
+
+	isr.word = 0;
+	status = twl4030_i2c_read(sih->module, isr.bytes,
+			sih->mask[irq_line].isr_offset, sih->bytes_ixr);
+
+	return (status < 0) ? status : le32_to_cpu(isr.word);
+}
+
+/*
+ * Generic handler for SIH interrupts ... we "know" this is called
+ * in task context, with IRQs enabled.
+ */
+static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc)
+{
+	struct sih_agent *agent = get_irq_data(irq);
+	const struct sih *sih = agent->sih;
+	int isr;
+
+	/* reading ISR acks the IRQs, using clear-on-read mode */
+	local_irq_enable();
+	isr = sih_read_isr(sih);
+	local_irq_disable();
+
+	if (isr < 0) {
+		pr_err("twl4030: %s SIH, read ISR error %d\n",
+			sih->name, isr);
+		/* REVISIT:  recover; eventually mask it all, etc */
+		return;
+	}
+
+	while (isr) {
+		irq = fls(isr);
+		irq--;
+		isr &= ~BIT(irq);
+
+		if (irq < sih->bits)
+			generic_handle_irq(agent->irq_base + irq);
+		else
+			pr_err("twl4030: %s SIH, invalid ISR bit %d\n",
+				sih->name, irq);
+	}
+}
+
+static unsigned twl4030_irq_next;
+
+/* returns the first IRQ used by this SIH bank,
+ * or negative errno
+ */
+int twl4030_sih_setup(int module)
+{
+	int			sih_mod;
+	const struct sih	*sih = NULL;
+	struct sih_agent	*agent;
+	int			i, irq;
+	int			status = -EINVAL;
+	unsigned		irq_base = twl4030_irq_next;
+
+	/* only support modules with standard clear-on-read for now */
+	for (sih_mod = 0, sih = sih_modules;
+			sih_mod < ARRAY_SIZE(sih_modules);
+			sih_mod++, sih++) {
+		if (sih->module == module && sih->set_cor) {
+			if (!WARN((irq_base + sih->bits) > NR_IRQS,
+					"irq %d for %s too big\n",
+					irq_base + sih->bits,
+					sih->name))
+				status = 0;
+			break;
+		}
+	}
+	if (status < 0)
+		return status;
+
+	agent = kzalloc(sizeof *agent, GFP_KERNEL);
+	if (!agent)
+		return -ENOMEM;
+
+	status = 0;
+
+	agent->irq_base = irq_base;
+	agent->sih = sih;
+	agent->imr = ~0;
+	INIT_WORK(&agent->mask_work, twl4030_sih_do_mask);
+	INIT_WORK(&agent->edge_work, twl4030_sih_do_edge);
+
+	for (i = 0; i < sih->bits; i++) {
+		irq = irq_base + i;
+
+		set_irq_chip_and_handler(irq, &twl4030_sih_irq_chip,
+				handle_edge_irq);
+		set_irq_chip_data(irq, agent);
+		activate_irq(irq);
+	}
+
+	status = irq_base;
+	twl4030_irq_next += i;
+
+	/* replace generic PIH handler (handle_simple_irq) */
+	irq = sih_mod + twl4030_irq_base;
+	set_irq_data(irq, agent);
+	set_irq_chained_handler(irq, handle_twl4030_sih);
+
+	pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", sih->name,
+			irq, irq_base, twl4030_irq_next - 1);
+
+	return status;
+}
+
+/* FIXME need a call to reverse twl4030_sih_setup() ... */
+
+
+/*----------------------------------------------------------------------*/
+
+/* FIXME pass in which interrupt line we'll use ... */
+#define twl_irq_line	0
+
+int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
+{
+	static struct irq_chip	twl4030_irq_chip;
+
+	int			status;
+	int			i;
+	struct task_struct	*task;
+
+	/*
+	 * Mask and clear all TWL4030 interrupts since initially we do
+	 * not have any TWL4030 module interrupt handlers present
+	 */
+	status = twl4030_init_sih_modules(twl_irq_line);
+	if (status < 0)
+		return status;
+
+	wq = create_singlethread_workqueue("twl4030-irqchip");
+	if (!wq) {
+		pr_err("twl4030: workqueue FAIL\n");
+		return -ESRCH;
+	}
+
+	twl4030_irq_base = irq_base;
+
+	/* install an irq handler for each of the SIH modules;
+	 * clone dummy irq_chip since PIH can't *do* anything
+	 */
+	twl4030_irq_chip = dummy_irq_chip;
+	twl4030_irq_chip.name = "twl4030";
+
+	twl4030_sih_irq_chip.ack = dummy_irq_chip.ack;
+
+	for (i = irq_base; i < irq_end; i++) {
+		set_irq_chip_and_handler(i, &twl4030_irq_chip,
+				handle_simple_irq);
+		activate_irq(i);
+	}
+	twl4030_irq_next = i;
+	pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", "PIH",
+			irq_num, irq_base, twl4030_irq_next - 1);
+
+	/* ... and the PWR_INT module ... */
+	status = twl4030_sih_setup(TWL4030_MODULE_INT);
+	if (status < 0) {
+		pr_err("twl4030: sih_setup PWR INT --> %d\n", status);
+		goto fail;
+	}
+
+	/* install an irq handler to demultiplex the TWL4030 interrupt */
+	task = start_twl4030_irq_thread(irq_num);
+	if (!task) {
+		pr_err("twl4030: irq thread FAIL\n");
+		status = -ESRCH;
+		goto fail;
+	}
+
+	set_irq_data(irq_num, task);
+	set_irq_chained_handler(irq_num, handle_twl4030_pih);
+
+	return status;
+
+fail:
+	for (i = irq_base; i < irq_end; i++)
+		set_irq_chip_and_handler(i, NULL, NULL);
+	destroy_workqueue(wq);
+	wq = NULL;
+	return status;
+}
+
+int twl_exit_irq(void)
+{
+	/* FIXME undo twl_init_irq() */
+	if (twl4030_irq_base) {
+		pr_err("twl4030: can't yet clean up IRQs?\n");
+		return -ENOSYS;
+	}
+	return 0;
+}
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 25a7a5d08bc..0d47fb9e4b3 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -183,6 +183,9 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src)
 			(wm8350->reg_cache[i] & ~wm8350_reg_io_map[i].writable)
 			| src[i - reg];
 
+		/* Don't store volatile bits */
+		wm8350->reg_cache[i] &= ~wm8350_reg_io_map[i].vol;
+
 		src[i - reg] = cpu_to_be16(src[i - reg]);
 	}
 
@@ -1120,6 +1123,7 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int mode)
 			}
 			value = be16_to_cpu(value);
 			value &= wm8350_reg_io_map[i].readable;
+			value &= ~wm8350_reg_io_map[i].vol;
 			wm8350->reg_cache[i] = value;
 		} else
 			wm8350->reg_cache[i] = reg_map[i];
@@ -1128,7 +1132,6 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int mode)
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(wm8350_create_cache);
 
 /*
  * Register a client device.  This is non-fatal since there is no need to
@@ -1217,7 +1220,7 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 
 	mutex_init(&wm8350->irq_mutex);
 	INIT_WORK(&wm8350->irq_work, wm8350_irq_worker);
-	if (irq != NO_IRQ) {
+	if (irq) {
 		ret = request_irq(irq, wm8350_irq, 0,
 				  "wm8350", wm8350);
 		if (ret != 0) {
diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
index 5dabfb69ee5..4b7c24c519c 100644
--- a/drivers/misc/hp-wmi.c
+++ b/drivers/misc/hp-wmi.c
@@ -82,6 +82,7 @@ static struct key_entry hp_wmi_keymap[] = {
 	{KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
 	{KE_KEY, 0x20e6, KEY_PROG1},
 	{KE_KEY, 0x2142, KEY_MEDIA},
+	{KE_KEY, 0x213b, KEY_INFO},
 	{KE_KEY, 0x231b, KEY_HELP},
 	{KE_END, 0}
 };
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 406989e992b..7a72e75d5c6 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -132,6 +132,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
 	blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN, NULL);
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
 	if (host->max_hw_segs == 1) {
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index ae16d845d74..3b2085b5776 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -3,6 +3,9 @@
  *
  *  Copyright (C) 2004-2006 maintech GmbH, Thomas Kleffel <tk@maintech.de>
  *
+ * Current driver maintained by Ben Dooks and Simtec Electronics
+ *  Copyright (C) 2008 Simtec Electronics <ben-linux@fluff.org>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -13,6 +16,7 @@
 #include <linux/clk.h>
 #include <linux/mmc/host.h>
 #include <linux/platform_device.h>
+#include <linux/cpufreq.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 
@@ -39,9 +43,9 @@ enum dbg_channels {
 	dbg_conf  = (1 << 8),
 };
 
-static const int dbgmap_err   = dbg_err | dbg_fail;
+static const int dbgmap_err   = dbg_fail;
 static const int dbgmap_info  = dbg_info | dbg_conf;
-static const int dbgmap_debug = dbg_debug;
+static const int dbgmap_debug = dbg_err | dbg_debug;
 
 #define dbg(host, channels, args...)		  \
 	do {					  \
@@ -189,7 +193,7 @@ static inline void clear_imask(struct s3cmci_host *host)
 }
 
 static inline int get_data_buffer(struct s3cmci_host *host,
-				  u32 *words, u32 **pointer)
+				  u32 *bytes, u32 **pointer)
 {
 	struct scatterlist *sg;
 
@@ -206,7 +210,7 @@ static inline int get_data_buffer(struct s3cmci_host *host,
 	}
 	sg = &host->mrq->data->sg[host->pio_sgptr];
 
-	*words = sg->length >> 2;
+	*bytes = sg->length;
 	*pointer = sg_virt(sg);
 
 	host->pio_sgptr++;
@@ -222,7 +226,7 @@ static inline u32 fifo_count(struct s3cmci_host *host)
 	u32 fifostat = readl(host->base + S3C2410_SDIFSTA);
 
 	fifostat &= S3C2410_SDIFSTA_COUNTMASK;
-	return fifostat >> 2;
+	return fifostat;
 }
 
 static inline u32 fifo_free(struct s3cmci_host *host)
@@ -230,13 +234,15 @@ static inline u32 fifo_free(struct s3cmci_host *host)
 	u32 fifostat = readl(host->base + S3C2410_SDIFSTA);
 
 	fifostat &= S3C2410_SDIFSTA_COUNTMASK;
-	return (63 - fifostat) >> 2;
+	return 63 - fifostat;
 }
 
 static void do_pio_read(struct s3cmci_host *host)
 {
 	int res;
 	u32 fifo;
+	u32 *ptr;
+	u32 fifo_words;
 	void __iomem *from_ptr;
 
 	/* write real prescaler to host, it might be set slow to fix */
@@ -245,8 +251,8 @@ static void do_pio_read(struct s3cmci_host *host)
 	from_ptr = host->base + host->sdidata;
 
 	while ((fifo = fifo_count(host))) {
-		if (!host->pio_words) {
-			res = get_data_buffer(host, &host->pio_words,
+		if (!host->pio_bytes) {
+			res = get_data_buffer(host, &host->pio_bytes,
 					      &host->pio_ptr);
 			if (res) {
 				host->pio_active = XFER_NONE;
@@ -259,26 +265,47 @@ static void do_pio_read(struct s3cmci_host *host)
 
 			dbg(host, dbg_pio,
 			    "pio_read(): new target: [%i]@[%p]\n",
-			    host->pio_words, host->pio_ptr);
+			    host->pio_bytes, host->pio_ptr);
 		}
 
 		dbg(host, dbg_pio,
 		    "pio_read(): fifo:[%02i] buffer:[%03i] dcnt:[%08X]\n",
-		    fifo, host->pio_words,
+		    fifo, host->pio_bytes,
 		    readl(host->base + S3C2410_SDIDCNT));
 
-		if (fifo > host->pio_words)
-			fifo = host->pio_words;
+		/* If we have reached the end of the block, we can
+		 * read a word and get 1 to 3 bytes.  If we in the
+		 * middle of the block, we have to read full words,
+		 * otherwise we will write garbage, so round down to
+		 * an even multiple of 4. */
+		if (fifo >= host->pio_bytes)
+			fifo = host->pio_bytes;
+		else
+			fifo -= fifo & 3;
 
-		host->pio_words -= fifo;
+		host->pio_bytes -= fifo;
 		host->pio_count += fifo;
 
-		while (fifo--)
-			*(host->pio_ptr++) = readl(from_ptr);
+		fifo_words = fifo >> 2;
+		ptr = host->pio_ptr;
+		while (fifo_words--)
+			*ptr++ = readl(from_ptr);
+		host->pio_ptr = ptr;
+
+		if (fifo & 3) {
+			u32 n = fifo & 3;
+			u32 data = readl(from_ptr);
+			u8 *p = (u8 *)host->pio_ptr;
+
+			while (n--) {
+				*p++ = data;
+				data >>= 8;
+			}
+		}
 	}
 
-	if (!host->pio_words) {
-		res = get_data_buffer(host, &host->pio_words, &host->pio_ptr);
+	if (!host->pio_bytes) {
+		res = get_data_buffer(host, &host->pio_bytes, &host->pio_ptr);
 		if (res) {
 			dbg(host, dbg_pio,
 			    "pio_read(): complete (no more buffers).\n");
@@ -298,12 +325,13 @@ static void do_pio_write(struct s3cmci_host *host)
 	void __iomem *to_ptr;
 	int res;
 	u32 fifo;
+	u32 *ptr;
 
 	to_ptr = host->base + host->sdidata;
 
 	while ((fifo = fifo_free(host))) {
-		if (!host->pio_words) {
-			res = get_data_buffer(host, &host->pio_words,
+		if (!host->pio_bytes) {
+			res = get_data_buffer(host, &host->pio_bytes,
 							&host->pio_ptr);
 			if (res) {
 				dbg(host, dbg_pio,
@@ -315,18 +343,27 @@ static void do_pio_write(struct s3cmci_host *host)
 
 			dbg(host, dbg_pio,
 			    "pio_write(): new source: [%i]@[%p]\n",
-			    host->pio_words, host->pio_ptr);
+			    host->pio_bytes, host->pio_ptr);
 
 		}
 
-		if (fifo > host->pio_words)
-			fifo = host->pio_words;
+		/* If we have reached the end of the block, we have to
+		 * write exactly the remaining number of bytes.  If we
+		 * in the middle of the block, we have to write full
+		 * words, so round down to an even multiple of 4. */
+		if (fifo >= host->pio_bytes)
+			fifo = host->pio_bytes;
+		else
+			fifo -= fifo & 3;
 
-		host->pio_words -= fifo;
+		host->pio_bytes -= fifo;
 		host->pio_count += fifo;
 
+		fifo = (fifo + 3) >> 2;
+		ptr = host->pio_ptr;
 		while (fifo--)
-			writel(*(host->pio_ptr++), to_ptr);
+			writel(*ptr++, to_ptr);
+		host->pio_ptr = ptr;
 	}
 
 	enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF);
@@ -349,9 +386,9 @@ static void pio_tasklet(unsigned long data)
 		clear_imask(host);
 		if (host->pio_active != XFER_NONE) {
 			dbg(host, dbg_err, "unfinished %s "
-			    "- pio_count:[%u] pio_words:[%u]\n",
+			    "- pio_count:[%u] pio_bytes:[%u]\n",
 			    (host->pio_active == XFER_READ) ? "read" : "write",
-			    host->pio_count, host->pio_words);
+			    host->pio_count, host->pio_bytes);
 
 			if (host->mrq->data)
 				host->mrq->data->error = -EINVAL;
@@ -812,11 +849,10 @@ static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data)
 		/* We cannot deal with unaligned blocks with more than
 		 * one block being transfered. */
 
-		if (data->blocks > 1)
+		if (data->blocks > 1) {
+			pr_warning("%s: can't do non-word sized block transfers (blksz %d)\n", __func__, data->blksz);
 			return -EINVAL;
-
-		/* No support yet for non-word block transfers. */
-		return -EINVAL;
+		}
 	}
 
 	while (readl(host->base + S3C2410_SDIDSTA) &
@@ -896,7 +932,7 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
 	BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
 
 	host->pio_sgptr = 0;
-	host->pio_words = 0;
+	host->pio_bytes = 0;
 	host->pio_count = 0;
 	host->pio_active = rw ? XFER_WRITE : XFER_READ;
 
@@ -1033,10 +1069,33 @@ static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		s3cmci_send_request(mmc);
 }
 
+static void s3cmci_set_clk(struct s3cmci_host *host, struct mmc_ios *ios)
+{
+	u32 mci_psc;
+
+	/* Set clock */
+	for (mci_psc = 0; mci_psc < 255; mci_psc++) {
+		host->real_rate = host->clk_rate / (host->clk_div*(mci_psc+1));
+
+		if (host->real_rate <= ios->clock)
+			break;
+	}
+
+	if (mci_psc > 255)
+		mci_psc = 255;
+
+	host->prescaler = mci_psc;
+	writel(host->prescaler, host->base + S3C2410_SDIPRE);
+
+	/* If requested clock is 0, real_rate will be 0, too */
+	if (ios->clock == 0)
+		host->real_rate = 0;
+}
+
 static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct s3cmci_host *host = mmc_priv(mmc);
-	u32 mci_psc, mci_con;
+	u32 mci_con;
 
 	/* Set the power state */
 
@@ -1074,23 +1133,7 @@ static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		break;
 	}
 
-	/* Set clock */
-	for (mci_psc = 0; mci_psc < 255; mci_psc++) {
-		host->real_rate = host->clk_rate / (host->clk_div*(mci_psc+1));
-
-		if (host->real_rate <= ios->clock)
-			break;
-	}
-
-	if (mci_psc > 255)
-		mci_psc = 255;
-
-	host->prescaler = mci_psc;
-	writel(host->prescaler, host->base + S3C2410_SDIPRE);
-
-	/* If requested clock is 0, real_rate will be 0, too */
-	if (ios->clock == 0)
-		host->real_rate = 0;
+	s3cmci_set_clk(host, ios);
 
 	/* Set CLOCK_ENABLE */
 	if (ios->clock)
@@ -1148,6 +1191,61 @@ static struct s3c24xx_mci_pdata s3cmci_def_pdata = {
 	 * checks. Any zero fields to ensure reaonable defaults are picked. */
 };
 
+#ifdef CONFIG_CPU_FREQ
+
+static int s3cmci_cpufreq_transition(struct notifier_block *nb,
+				     unsigned long val, void *data)
+{
+	struct s3cmci_host *host;
+	struct mmc_host *mmc;
+	unsigned long newclk;
+	unsigned long flags;
+
+	host = container_of(nb, struct s3cmci_host, freq_transition);
+	newclk = clk_get_rate(host->clk);
+	mmc = host->mmc;
+
+	if ((val == CPUFREQ_PRECHANGE && newclk > host->clk_rate) ||
+	    (val == CPUFREQ_POSTCHANGE && newclk < host->clk_rate)) {
+		spin_lock_irqsave(&mmc->lock, flags);
+
+		host->clk_rate = newclk;
+
+		if (mmc->ios.power_mode != MMC_POWER_OFF &&
+		    mmc->ios.clock != 0)
+			s3cmci_set_clk(host, &mmc->ios);
+
+		spin_unlock_irqrestore(&mmc->lock, flags);
+	}
+
+	return 0;
+}
+
+static inline int s3cmci_cpufreq_register(struct s3cmci_host *host)
+{
+	host->freq_transition.notifier_call = s3cmci_cpufreq_transition;
+
+	return cpufreq_register_notifier(&host->freq_transition,
+					 CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+static inline void s3cmci_cpufreq_deregister(struct s3cmci_host *host)
+{
+	cpufreq_unregister_notifier(&host->freq_transition,
+				    CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+#else
+static inline int s3cmci_cpufreq_register(struct s3cmci_host *host)
+{
+	return 0;
+}
+
+static inline void s3cmci_cpufreq_deregister(struct s3cmci_host *host)
+{
+}
+#endif
+
 static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440)
 {
 	struct s3cmci_host *host;
@@ -1298,10 +1396,16 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440)
 	    (host->is2440?"2440":""),
 	    host->base, host->irq, host->irq_cd, host->dma);
 
+	ret = s3cmci_cpufreq_register(host);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register cpufreq\n");
+		goto free_dmabuf;
+	}
+
 	ret = mmc_add_host(mmc);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to add mmc host.\n");
-		goto free_dmabuf;
+		goto free_cpufreq;
 	}
 
 	platform_set_drvdata(pdev, mmc);
@@ -1309,6 +1413,9 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440)
 
 	return 0;
 
+ free_cpufreq:
+	s3cmci_cpufreq_deregister(host);
+
  free_dmabuf:
 	clk_disable(host->clk);
 
@@ -1342,6 +1449,7 @@ static void s3cmci_shutdown(struct platform_device *pdev)
 	if (host->irq_cd >= 0)
 		free_irq(host->irq_cd, host);
 
+	s3cmci_cpufreq_deregister(host);
 	mmc_remove_host(mmc);
 	clk_disable(host->clk);
 }
@@ -1455,7 +1563,7 @@ module_exit(s3cmci_exit);
 
 MODULE_DESCRIPTION("Samsung S3C MMC/SD Card Interface driver");
 MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Thomas Kleffel <tk@maintech.de>");
+MODULE_AUTHOR("Thomas Kleffel <tk@maintech.de>, Ben Dooks <ben-linux@fluff.org>");
 MODULE_ALIAS("platform:s3c2410-sdi");
 MODULE_ALIAS("platform:s3c2412-sdi");
 MODULE_ALIAS("platform:s3c2440-sdi");
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index 37d9c60010c..ca1ba3d58cf 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -51,7 +51,7 @@ struct s3cmci_host {
 	int			dma_complete;
 
 	u32			pio_sgptr;
-	u32			pio_words;
+	u32			pio_bytes;
 	u32			pio_count;
 	u32			*pio_ptr;
 #define XFER_NONE 0
@@ -67,4 +67,8 @@ struct s3cmci_host {
 
 	unsigned int		ccnt, dcnt;
 	struct tasklet_struct	pio_tasklet;
+
+#ifdef CONFIG_CPU_FREQ
+	struct notifier_block	freq_transition;
+#endif
 };
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 14f11f8b9e5..a90d50c2c3e 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -172,6 +172,11 @@ config MTD_CHAR
 	  memory chips, and also use ioctl() to obtain information about
 	  the device, or to erase parts of it.
 
+config HAVE_MTD_OTP
+	bool
+	help
+	  Enable access to OTP regions using MTD_CHAR.
+
 config MTD_BLKDEVS
 	tristate "Common interface to block layer for MTD 'translation layers'"
 	depends on BLOCK
diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index 479d32b57a1..9408099eec4 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig
@@ -6,6 +6,7 @@ menu "RAM/ROM/Flash chip drivers"
 config MTD_CFI
 	tristate "Detect flash chips by Common Flash Interface (CFI) probe"
 	select MTD_GEN_PROBE
+	select MTD_CFI_UTIL
 	help
 	  The Common Flash Interface specification was developed by Intel,
 	  AMD and other flash manufactures that provides a universal method
@@ -154,6 +155,7 @@ config MTD_CFI_I8
 config MTD_OTP
 	bool "Protection Registers aka one-time programmable (OTP) bits"
 	depends on MTD_CFI_ADV_OPTIONS
+	select HAVE_MTD_OTP
 	default n
 	help
 	  This enables support for reading, writing and locking so called
@@ -187,7 +189,7 @@ config MTD_CFI_INTELEXT
 	  StrataFlash and other parts.
 
 config MTD_CFI_AMDSTD
-	tristate "Support for AMD/Fujitsu flash chips"
+	tristate "Support for AMD/Fujitsu/Spansion flash chips"
 	depends on MTD_GEN_PROBE
 	select MTD_CFI_UTIL
 	help
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 5f1b472137a..c93a8be5d5f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -478,6 +478,28 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary)
 		else
 			cfi->chips[i].erase_time = 2000000;
 
+		if (cfi->cfiq->WordWriteTimeoutTyp &&
+		    cfi->cfiq->WordWriteTimeoutMax)
+			cfi->chips[i].word_write_time_max =
+				1<<(cfi->cfiq->WordWriteTimeoutTyp +
+				    cfi->cfiq->WordWriteTimeoutMax);
+		else
+			cfi->chips[i].word_write_time_max = 50000 * 8;
+
+		if (cfi->cfiq->BufWriteTimeoutTyp &&
+		    cfi->cfiq->BufWriteTimeoutMax)
+			cfi->chips[i].buffer_write_time_max =
+				1<<(cfi->cfiq->BufWriteTimeoutTyp +
+				    cfi->cfiq->BufWriteTimeoutMax);
+
+		if (cfi->cfiq->BlockEraseTimeoutTyp &&
+		    cfi->cfiq->BlockEraseTimeoutMax)
+			cfi->chips[i].erase_time_max =
+				1000<<(cfi->cfiq->BlockEraseTimeoutTyp +
+				       cfi->cfiq->BlockEraseTimeoutMax);
+		else
+			cfi->chips[i].erase_time_max = 2000000 * 8;
+
 		cfi->chips[i].ref_point_counter = 0;
 		init_waitqueue_head(&(cfi->chips[i].wq));
 	}
@@ -703,6 +725,10 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
 	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
 	unsigned long timeo = jiffies + HZ;
 
+	/* Prevent setting state FL_SYNCING for chip in suspended state. */
+	if (mode == FL_SYNCING && chip->oldstate != FL_READY)
+		goto sleep;
+
 	switch (chip->state) {
 
 	case FL_STATUS:
@@ -808,8 +834,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 	DECLARE_WAITQUEUE(wait, current);
 
  retry:
-	if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING
-			   || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
+	if (chip->priv &&
+	    (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE
+	    || mode == FL_SHUTDOWN) && chip->state != FL_SYNCING) {
 		/*
 		 * OK. We have possibility for contention on the write/erase
 		 * operations which are global to the real chip and not per
@@ -859,6 +886,14 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 				return ret;
 			}
 			spin_lock(&shared->lock);
+
+			/* We should not own chip if it is already
+			 * in FL_SYNCING state. Put contender and retry. */
+			if (chip->state == FL_SYNCING) {
+				put_chip(map, contender, contender->start);
+				spin_unlock(contender->mutex);
+				goto retry;
+			}
 			spin_unlock(contender->mutex);
 		}
 
@@ -1012,7 +1047,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
 
 static int __xipram xip_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
-		unsigned long adr, unsigned int chip_op_time )
+		unsigned long adr, unsigned int chip_op_time_max)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -1021,7 +1056,7 @@ static int __xipram xip_wait_for_operation(
 	flstate_t oldstate, newstate;
 
        	start = xip_currtime();
-	usec = chip_op_time * 8;
+	usec = chip_op_time_max;
 	if (usec == 0)
 		usec = 500000;
 	done = 0;
@@ -1131,8 +1166,8 @@ static int __xipram xip_wait_for_operation(
 #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
 	INVALIDATE_CACHED_RANGE(map, from, size)
 
-#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \
-	xip_wait_for_operation(map, chip, cmd_adr, usec)
+#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
+	xip_wait_for_operation(map, chip, cmd_adr, usec_max)
 
 #else
 
@@ -1144,7 +1179,7 @@ static int __xipram xip_wait_for_operation(
 static int inval_cache_and_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
 		unsigned long cmd_adr, unsigned long inval_adr, int inval_len,
-		unsigned int chip_op_time)
+		unsigned int chip_op_time, unsigned int chip_op_time_max)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	map_word status, status_OK = CMD(0x80);
@@ -1156,8 +1191,7 @@ static int inval_cache_and_wait_for_operation(
 		INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len);
 	spin_lock(chip->mutex);
 
-	/* set our timeout to 8 times the expected delay */
-	timeo = chip_op_time * 8;
+	timeo = chip_op_time_max;
 	if (!timeo)
 		timeo = 500000;
 	reset_timeo = timeo;
@@ -1217,8 +1251,8 @@ static int inval_cache_and_wait_for_operation(
 
 #endif
 
-#define WAIT_TIMEOUT(map, chip, adr, udelay) \
-	INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay);
+#define WAIT_TIMEOUT(map, chip, adr, udelay, udelay_max) \
+	INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay, udelay_max);
 
 
 static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len)
@@ -1452,7 +1486,8 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
 				   adr, map_bankwidth(map),
-				   chip->word_write_time);
+				   chip->word_write_time,
+				   chip->word_write_time_max);
 	if (ret) {
 		xip_enable(map, chip, adr);
 		printk(KERN_ERR "%s: word write error (status timeout)\n", map->name);
@@ -1623,7 +1658,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	chip->state = FL_WRITING_TO_BUFFER;
 	map_write(map, write_cmd, cmd_adr);
-	ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0);
+	ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0, 0);
 	if (ret) {
 		/* Argh. Not ready for write to buffer */
 		map_word Xstatus = map_read(map, cmd_adr);
@@ -1640,7 +1675,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	/* Figure out the number of words to write */
 	word_gap = (-adr & (map_bankwidth(map)-1));
-	words = (len - word_gap + map_bankwidth(map) - 1) / map_bankwidth(map);
+	words = DIV_ROUND_UP(len - word_gap, map_bankwidth(map));
 	if (!word_gap) {
 		words--;
 	} else {
@@ -1692,7 +1727,8 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr,
 				   initial_adr, initial_len,
-				   chip->buffer_write_time);
+				   chip->buffer_write_time,
+				   chip->buffer_write_time_max);
 	if (ret) {
 		map_write(map, CMD(0x70), cmd_adr);
 		chip->state = FL_STATUS;
@@ -1827,7 +1863,8 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
 				   adr, len,
-				   chip->erase_time);
+				   chip->erase_time,
+				   chip->erase_time_max);
 	if (ret) {
 		map_write(map, CMD(0x70), adr);
 		chip->state = FL_STATUS;
@@ -2006,7 +2043,7 @@ static int __xipram do_xxlock_oneblock(struct map_info *map, struct flchip *chip
 	 */
 	udelay = (!extp || !(extp->FeatureSupport & (1 << 5))) ? 1000000/HZ : 0;
 
-	ret = WAIT_TIMEOUT(map, chip, adr, udelay);
+	ret = WAIT_TIMEOUT(map, chip, adr, udelay, udelay * 100);
 	if (ret) {
 		map_write(map, CMD(0x70), adr);
 		chip->state = FL_STATUS;
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index a972cc6be43..3e6f5d8609e 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -13,6 +13,8 @@
  * XIP support hooks by Vitaly Wool (based on code for Intel flash
  * by Nicolas Pitre)
  *
+ * 25/09/2008 Christopher Moore: TopBottom fixup for many Macronix with CFI V1.0
+ *
  * Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com
  *
  * This code is GPL
@@ -43,6 +45,7 @@
 
 #define MANUFACTURER_AMD	0x0001
 #define MANUFACTURER_ATMEL	0x001F
+#define MANUFACTURER_MACRONIX	0x00C2
 #define MANUFACTURER_SST	0x00BF
 #define SST49LF004B	        0x0060
 #define SST49LF040B	        0x0050
@@ -144,12 +147,44 @@ static void fixup_amd_bootblock(struct mtd_info *mtd, void* param)
 
 	if (((major << 8) | minor) < 0x3131) {
 		/* CFI version 1.0 => don't trust bootloc */
+
+		DEBUG(MTD_DEBUG_LEVEL1,
+			"%s: JEDEC Vendor ID is 0x%02X Device ID is 0x%02X\n",
+			map->name, cfi->mfr, cfi->id);
+
+		/* AFAICS all 29LV400 with a bottom boot block have a device ID
+		 * of 0x22BA in 16-bit mode and 0xBA in 8-bit mode.
+		 * These were badly detected as they have the 0x80 bit set
+		 * so treat them as a special case.
+		 */
+		if (((cfi->id == 0xBA) || (cfi->id == 0x22BA)) &&
+
+			/* Macronix added CFI to their 2nd generation
+			 * MX29LV400C B/T but AFAICS no other 29LV400 (AMD,
+			 * Fujitsu, Spansion, EON, ESI and older Macronix)
+			 * has CFI.
+			 *
+			 * Therefore also check the manufacturer.
+			 * This reduces the risk of false detection due to
+			 * the 8-bit device ID.
+			 */
+			(cfi->mfr == MANUFACTURER_MACRONIX)) {
+			DEBUG(MTD_DEBUG_LEVEL1,
+				"%s: Macronix MX29LV400C with bottom boot block"
+				" detected\n", map->name);
+			extp->TopBottom = 2;	/* bottom boot */
+		} else
 		if (cfi->id & 0x80) {
 			printk(KERN_WARNING "%s: JEDEC Device ID is 0x%02X. Assuming broken CFI table.\n", map->name, cfi->id);
 			extp->TopBottom = 3;	/* top boot */
 		} else {
 			extp->TopBottom = 2;	/* bottom boot */
 		}
+
+		DEBUG(MTD_DEBUG_LEVEL1,
+			"%s: AMD CFI PRI V%c.%c has no boot block field;"
+			" deduced %s from Device ID\n", map->name, major, minor,
+			extp->TopBottom == 2 ? "bottom" : "top");
 	}
 }
 #endif
@@ -178,10 +213,18 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
 	if (atmel_pri.Features & 0x02)
 		extp->EraseSuspend = 2;
 
-	if (atmel_pri.BottomBoot)
-		extp->TopBottom = 2;
-	else
-		extp->TopBottom = 3;
+	/* Some chips got it backwards... */
+	if (cfi->id == AT49BV6416) {
+		if (atmel_pri.BottomBoot)
+			extp->TopBottom = 3;
+		else
+			extp->TopBottom = 2;
+	} else {
+		if (atmel_pri.BottomBoot)
+			extp->TopBottom = 2;
+		else
+			extp->TopBottom = 3;
+	}
 
 	/* burst write mode not supported */
 	cfi->cfiq->BufWriteTimeoutTyp = 0;
@@ -243,6 +286,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
 	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
 #ifdef AMD_BOOTLOC_BUG
 	{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
+	{ MANUFACTURER_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL },
 #endif
 	{ CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, },
 	{ CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, },
diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index c418e92e1d9..e63e6749429 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -44,17 +44,14 @@ do { \
 
 #define xip_enable(base, map, cfi) \
 do { \
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
+	cfi_qry_mode_off(base, map, cfi);		\
 	xip_allowed(base, map); \
 } while (0)
 
 #define xip_disable_qry(base, map, cfi) \
 do { \
 	xip_disable(); \
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
-	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); \
+	cfi_qry_mode_on(base, map, cfi); \
 } while (0)
 
 #else
@@ -70,32 +67,6 @@ do { \
    in: interleave,type,mode
    ret: table index, <0 for error
  */
-static int __xipram qry_present(struct map_info *map, __u32 base,
-				struct cfi_private *cfi)
-{
-	int osf = cfi->interleave * cfi->device_type;	// scale factor
-	map_word val[3];
-	map_word qry[3];
-
-	qry[0] = cfi_build_cmd('Q', map, cfi);
-	qry[1] = cfi_build_cmd('R', map, cfi);
-	qry[2] = cfi_build_cmd('Y', map, cfi);
-
-	val[0] = map_read(map, base + osf*0x10);
-	val[1] = map_read(map, base + osf*0x11);
-	val[2] = map_read(map, base + osf*0x12);
-
-	if (!map_word_equal(map, qry[0], val[0]))
-		return 0;
-
-	if (!map_word_equal(map, qry[1], val[1]))
-		return 0;
-
-	if (!map_word_equal(map, qry[2], val[2]))
-		return 0;
-
-	return 1; 	// "QRY" found
-}
 
 static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 				   unsigned long *chip_map, struct cfi_private *cfi)
@@ -116,11 +87,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 	}
 
 	xip_disable();
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
-	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-
-	if (!qry_present(map,base,cfi)) {
+	if (!cfi_qry_mode_on(base, map, cfi)) {
 		xip_enable(base, map, cfi);
 		return 0;
 	}
@@ -141,14 +108,13 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
  		start = i << cfi->chipshift;
 		/* This chip should be in read mode if it's one
 		   we've already touched. */
-		if (qry_present(map, start, cfi)) {
+		if (cfi_qry_present(map, start, cfi)) {
 			/* Eep. This chip also had the QRY marker.
 			 * Is it an alias for the new one? */
-			cfi_send_gen_cmd(0xF0, 0, start, map, cfi, cfi->device_type, NULL);
-			cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
+			cfi_qry_mode_off(start, map, cfi);
 
 			/* If the QRY marker goes away, it's an alias */
-			if (!qry_present(map, start, cfi)) {
+			if (!cfi_qry_present(map, start, cfi)) {
 				xip_allowed(base, map);
 				printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
 				       map->name, base, start);
@@ -158,10 +124,9 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 			 * unfortunate. Stick the new chip in read mode
 			 * too and if it's the same, assume it's an alias. */
 			/* FIXME: Use other modes to do a proper check */
-			cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-			cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
+			cfi_qry_mode_off(base, map, cfi);
 
-			if (qry_present(map, base, cfi)) {
+			if (cfi_qry_present(map, base, cfi)) {
 				xip_allowed(base, map);
 				printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
 				       map->name, base, start);
@@ -176,8 +141,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
 	cfi->numchips++;
 
 	/* Put it back into Read Mode */
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_qry_mode_off(base, map, cfi);
 	xip_allowed(base, map);
 
 	printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n",
@@ -237,9 +201,7 @@ static int __xipram cfi_chip_setup(struct map_info *map,
 			  cfi_read_query(map, base + 0xf * ofs_factor);
 
 	/* Put it back into Read Mode */
-	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-	/* ... even if it's an Intel chip */
-	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_qry_mode_off(base, map, cfi);
 	xip_allowed(base, map);
 
 	/* Do any necessary byteswapping */
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index 0ee45701801..34d40e25d31 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -24,6 +24,66 @@
 #include <linux/mtd/cfi.h>
 #include <linux/mtd/compatmac.h>
 
+int __xipram cfi_qry_present(struct map_info *map, __u32 base,
+			     struct cfi_private *cfi)
+{
+	int osf = cfi->interleave * cfi->device_type;	/* scale factor */
+	map_word val[3];
+	map_word qry[3];
+
+	qry[0] = cfi_build_cmd('Q', map, cfi);
+	qry[1] = cfi_build_cmd('R', map, cfi);
+	qry[2] = cfi_build_cmd('Y', map, cfi);
+
+	val[0] = map_read(map, base + osf*0x10);
+	val[1] = map_read(map, base + osf*0x11);
+	val[2] = map_read(map, base + osf*0x12);
+
+	if (!map_word_equal(map, qry[0], val[0]))
+		return 0;
+
+	if (!map_word_equal(map, qry[1], val[1]))
+		return 0;
+
+	if (!map_word_equal(map, qry[2], val[2]))
+		return 0;
+
+	return 1; 	/* "QRY" found */
+}
+EXPORT_SYMBOL_GPL(cfi_qry_present);
+
+int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
+			     struct cfi_private *cfi)
+{
+	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
+	if (cfi_qry_present(map, base, cfi))
+		return 1;
+	/* QRY not found probably we deal with some odd CFI chips */
+	/* Some revisions of some old Intel chips? */
+	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
+	if (cfi_qry_present(map, base, cfi))
+		return 1;
+	/* ST M29DW chips */
+	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x98, 0x555, base, map, cfi, cfi->device_type, NULL);
+	if (cfi_qry_present(map, base, cfi))
+		return 1;
+	/* QRY not found */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cfi_qry_mode_on);
+
+void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
+			       struct cfi_private *cfi)
+{
+	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+}
+EXPORT_SYMBOL_GPL(cfi_qry_mode_off);
+
 struct cfi_extquery *
 __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name)
 {
@@ -48,8 +108,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
 #endif
 
 	/* Switch it into Query Mode */
-	cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-
+	cfi_qry_mode_on(base, map, cfi);
 	/* Read in the Extended Query Table */
 	for (i=0; i<size; i++) {
 		((unsigned char *)extp)[i] =
@@ -57,8 +116,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
 	}
 
 	/* Make sure it returns to read mode */
-	cfi_send_gen_cmd(0xf0, 0, base, map, cfi, cfi->device_type, NULL);
-	cfi_send_gen_cmd(0xff, 0, base, map, cfi, cfi->device_type, NULL);
+	cfi_qry_mode_off(base, map, cfi);
 
 #ifdef CONFIG_MTD_XIP
 	(void) map_read(map, base);
diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c
index f061885b281..e2dc96441e0 100644
--- a/drivers/mtd/chips/gen_probe.c
+++ b/drivers/mtd/chips/gen_probe.c
@@ -111,7 +111,7 @@ static struct cfi_private *genprobe_ident_chips(struct map_info *map, struct chi
 		max_chips = 1;
 	}
 
-	mapsize = sizeof(long) * ( (max_chips + BITS_PER_LONG-1) / BITS_PER_LONG );
+	mapsize = sizeof(long) * DIV_ROUND_UP(max_chips, BITS_PER_LONG);
 	chip_map = kzalloc(mapsize, GFP_KERNEL);
 	if (!chip_map) {
 		printk(KERN_WARNING "%s: kmalloc failed for CFI chip map\n", map->name);
diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c
index 71bc07f149b..50a340388e7 100644
--- a/drivers/mtd/cmdlinepart.c
+++ b/drivers/mtd/cmdlinepart.c
@@ -7,6 +7,7 @@
  *
  * mtdparts=<mtddef>[;<mtddef]
  * <mtddef>  := <mtd-id>:<partdef>[,<partdef>]
+ *              where <mtd-id> is the name from the "cat /proc/mtd" command
  * <partdef> := <size>[@offset][<name>][ro][lk]
  * <mtd-id>  := unique name used in mapping driver/device (mtd->name)
  * <size>    := standard linux memsize OR "-" to denote all remaining space
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 9c613f06623..6fde0a2e356 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -59,6 +59,27 @@ config MTD_DATAFLASH
 	  Sometimes DataFlash chips are packaged inside MMC-format
 	  cards; at this writing, the MMC stack won't handle those.
 
+config MTD_DATAFLASH_WRITE_VERIFY
+	bool "Verify DataFlash page writes"
+	depends on MTD_DATAFLASH
+	help
+	  This adds an extra check when data is written to the flash.
+	  It may help if you are verifying chip setup (timings etc) on
+	  your board.  There is a rare possibility that even though the
+	  device thinks the write was successful, a bit could have been
+	  flipped accidentally due to device wear or something else.
+
+config MTD_DATAFLASH_OTP
+	bool "DataFlash OTP support (Security Register)"
+	depends on MTD_DATAFLASH
+	select HAVE_MTD_OTP
+	help
+	  Newer DataFlash chips (revisions C and D) support 128 bytes of
+	  one-time-programmable (OTP) data.  The first half may be written
+	  (once) with up to 64 bytes of data, such as a serial number or
+	  other key product data.  The second half is programmed with a
+	  unique-to-each-chip bit pattern at the factory.
+
 config MTD_M25P80
 	tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
 	depends on SPI_MASTER && EXPERIMENTAL
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index b35c3333e21..76a76751da3 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -39,6 +39,7 @@
 #define	OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
 #define	OPCODE_BE_4K 		0x20	/* Erase 4KiB block */
 #define	OPCODE_BE_32K		0x52	/* Erase 32KiB block */
+#define	OPCODE_BE		0xc7	/* Erase whole flash block */
 #define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
 #define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
 
@@ -161,6 +162,31 @@ static int wait_till_ready(struct m25p *flash)
 	return 1;
 }
 
+/*
+ * Erase the whole flash memory
+ *
+ * Returns 0 if successful, non-zero otherwise.
+ */
+static int erase_block(struct m25p *flash)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB\n",
+			flash->spi->dev.bus_id, __func__,
+			flash->mtd.size / 1024);
+
+	/* Wait until finished previous write command. */
+	if (wait_till_ready(flash))
+		return 1;
+
+	/* Send write enable, then erase commands. */
+	write_enable(flash);
+
+	/* Set up command buffer. */
+	flash->command[0] = OPCODE_BE;
+
+	spi_write(flash->spi, flash->command, 1);
+
+	return 0;
+}
 
 /*
  * Erase one sector of flash memory at offset ``offset'' which is any
@@ -229,15 +255,21 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
 	 */
 
 	/* now erase those sectors */
-	while (len) {
-		if (erase_sector(flash, addr)) {
-			instr->state = MTD_ERASE_FAILED;
-			mutex_unlock(&flash->lock);
-			return -EIO;
-		}
+	if (len == flash->mtd.size && erase_block(flash)) {
+		instr->state = MTD_ERASE_FAILED;
+		mutex_unlock(&flash->lock);
+		return -EIO;
+	} else {
+		while (len) {
+			if (erase_sector(flash, addr)) {
+				instr->state = MTD_ERASE_FAILED;
+				mutex_unlock(&flash->lock);
+				return -EIO;
+			}
 
-		addr += mtd->erasesize;
-		len -= mtd->erasesize;
+			addr += mtd->erasesize;
+			len -= mtd->erasesize;
+		}
 	}
 
 	mutex_unlock(&flash->lock);
@@ -437,6 +469,7 @@ struct flash_info {
 	 * then a two byte device id.
 	 */
 	u32		jedec_id;
+	u16             ext_id;
 
 	/* The size listed here is what works with OPCODE_SE, which isn't
 	 * necessarily called a "sector" by the vendor.
@@ -456,72 +489,75 @@ struct flash_info {
 static struct flash_info __devinitdata m25p_data [] = {
 
 	/* Atmel -- some are (confusingly) marketed as "DataFlash" */
-	{ "at25fs010",  0x1f6601, 32 * 1024, 4, SECT_4K, },
-	{ "at25fs040",  0x1f6604, 64 * 1024, 8, SECT_4K, },
+	{ "at25fs010",  0x1f6601, 0, 32 * 1024, 4, SECT_4K, },
+	{ "at25fs040",  0x1f6604, 0, 64 * 1024, 8, SECT_4K, },
 
-	{ "at25df041a", 0x1f4401, 64 * 1024, 8, SECT_4K, },
-	{ "at25df641",  0x1f4800, 64 * 1024, 128, SECT_4K, },
+	{ "at25df041a", 0x1f4401, 0, 64 * 1024, 8, SECT_4K, },
+	{ "at25df641",  0x1f4800, 0, 64 * 1024, 128, SECT_4K, },
 
-	{ "at26f004",   0x1f0400, 64 * 1024, 8, SECT_4K, },
-	{ "at26df081a", 0x1f4501, 64 * 1024, 16, SECT_4K, },
-	{ "at26df161a", 0x1f4601, 64 * 1024, 32, SECT_4K, },
-	{ "at26df321",  0x1f4701, 64 * 1024, 64, SECT_4K, },
+	{ "at26f004",   0x1f0400, 0, 64 * 1024, 8, SECT_4K, },
+	{ "at26df081a", 0x1f4501, 0, 64 * 1024, 16, SECT_4K, },
+	{ "at26df161a", 0x1f4601, 0, 64 * 1024, 32, SECT_4K, },
+	{ "at26df321",  0x1f4701, 0, 64 * 1024, 64, SECT_4K, },
 
 	/* Spansion -- single (large) sector size only, at least
 	 * for the chips listed here (without boot sectors).
 	 */
-	{ "s25sl004a", 0x010212, 64 * 1024, 8, },
-	{ "s25sl008a", 0x010213, 64 * 1024, 16, },
-	{ "s25sl016a", 0x010214, 64 * 1024, 32, },
-	{ "s25sl032a", 0x010215, 64 * 1024, 64, },
-	{ "s25sl064a", 0x010216, 64 * 1024, 128, },
+	{ "s25sl004a", 0x010212, 0, 64 * 1024, 8, },
+	{ "s25sl008a", 0x010213, 0, 64 * 1024, 16, },
+	{ "s25sl016a", 0x010214, 0, 64 * 1024, 32, },
+	{ "s25sl032a", 0x010215, 0, 64 * 1024, 64, },
+	{ "s25sl064a", 0x010216, 0, 64 * 1024, 128, },
+        { "s25sl12800", 0x012018, 0x0300, 256 * 1024, 64, },
+	{ "s25sl12801", 0x012018, 0x0301, 64 * 1024, 256, },
 
 	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
-	{ "sst25vf040b", 0xbf258d, 64 * 1024, 8, SECT_4K, },
-	{ "sst25vf080b", 0xbf258e, 64 * 1024, 16, SECT_4K, },
-	{ "sst25vf016b", 0xbf2541, 64 * 1024, 32, SECT_4K, },
-	{ "sst25vf032b", 0xbf254a, 64 * 1024, 64, SECT_4K, },
+	{ "sst25vf040b", 0xbf258d, 0, 64 * 1024, 8, SECT_4K, },
+	{ "sst25vf080b", 0xbf258e, 0, 64 * 1024, 16, SECT_4K, },
+	{ "sst25vf016b", 0xbf2541, 0, 64 * 1024, 32, SECT_4K, },
+	{ "sst25vf032b", 0xbf254a, 0, 64 * 1024, 64, SECT_4K, },
 
 	/* ST Microelectronics -- newer production may have feature updates */
-	{ "m25p05",  0x202010,  32 * 1024, 2, },
-	{ "m25p10",  0x202011,  32 * 1024, 4, },
-	{ "m25p20",  0x202012,  64 * 1024, 4, },
-	{ "m25p40",  0x202013,  64 * 1024, 8, },
-	{ "m25p80",         0,  64 * 1024, 16, },
-	{ "m25p16",  0x202015,  64 * 1024, 32, },
-	{ "m25p32",  0x202016,  64 * 1024, 64, },
-	{ "m25p64",  0x202017,  64 * 1024, 128, },
-	{ "m25p128", 0x202018, 256 * 1024, 64, },
-
-	{ "m45pe80", 0x204014,  64 * 1024, 16, },
-	{ "m45pe16", 0x204015,  64 * 1024, 32, },
-
-	{ "m25pe80", 0x208014,  64 * 1024, 16, },
-	{ "m25pe16", 0x208015,  64 * 1024, 32, SECT_4K, },
+	{ "m25p05",  0x202010,  0, 32 * 1024, 2, },
+	{ "m25p10",  0x202011,  0, 32 * 1024, 4, },
+	{ "m25p20",  0x202012,  0, 64 * 1024, 4, },
+	{ "m25p40",  0x202013,  0, 64 * 1024, 8, },
+	{ "m25p80",         0,  0, 64 * 1024, 16, },
+	{ "m25p16",  0x202015,  0, 64 * 1024, 32, },
+	{ "m25p32",  0x202016,  0, 64 * 1024, 64, },
+	{ "m25p64",  0x202017,  0, 64 * 1024, 128, },
+	{ "m25p128", 0x202018, 0, 256 * 1024, 64, },
+
+	{ "m45pe80", 0x204014,  0, 64 * 1024, 16, },
+	{ "m45pe16", 0x204015,  0, 64 * 1024, 32, },
+
+	{ "m25pe80", 0x208014,  0, 64 * 1024, 16, },
+	{ "m25pe16", 0x208015,  0, 64 * 1024, 32, SECT_4K, },
 
 	/* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
-	{ "w25x10", 0xef3011, 64 * 1024, 2, SECT_4K, },
-	{ "w25x20", 0xef3012, 64 * 1024, 4, SECT_4K, },
-	{ "w25x40", 0xef3013, 64 * 1024, 8, SECT_4K, },
-	{ "w25x80", 0xef3014, 64 * 1024, 16, SECT_4K, },
-	{ "w25x16", 0xef3015, 64 * 1024, 32, SECT_4K, },
-	{ "w25x32", 0xef3016, 64 * 1024, 64, SECT_4K, },
-	{ "w25x64", 0xef3017, 64 * 1024, 128, SECT_4K, },
+	{ "w25x10", 0xef3011, 0, 64 * 1024, 2, SECT_4K, },
+	{ "w25x20", 0xef3012, 0, 64 * 1024, 4, SECT_4K, },
+	{ "w25x40", 0xef3013, 0, 64 * 1024, 8, SECT_4K, },
+	{ "w25x80", 0xef3014, 0, 64 * 1024, 16, SECT_4K, },
+	{ "w25x16", 0xef3015, 0, 64 * 1024, 32, SECT_4K, },
+	{ "w25x32", 0xef3016, 0, 64 * 1024, 64, SECT_4K, },
+	{ "w25x64", 0xef3017, 0, 64 * 1024, 128, SECT_4K, },
 };
 
 static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 {
 	int			tmp;
 	u8			code = OPCODE_RDID;
-	u8			id[3];
+	u8			id[5];
 	u32			jedec;
+	u16                     ext_jedec;
 	struct flash_info	*info;
 
 	/* JEDEC also defines an optional "extended device information"
 	 * string for after vendor-specific data, after the three bytes
 	 * we use here.  Supporting some chips might require using it.
 	 */
-	tmp = spi_write_then_read(spi, &code, 1, id, 3);
+	tmp = spi_write_then_read(spi, &code, 1, id, 5);
 	if (tmp < 0) {
 		DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n",
 			spi->dev.bus_id, tmp);
@@ -533,10 +569,14 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 	jedec = jedec << 8;
 	jedec |= id[2];
 
+	ext_jedec = id[3] << 8 | id[4];
+
 	for (tmp = 0, info = m25p_data;
 			tmp < ARRAY_SIZE(m25p_data);
 			tmp++, info++) {
 		if (info->jedec_id == jedec)
+			if (ext_jedec != 0 && info->ext_id != ext_jedec)
+				continue;
 			return info;
 	}
 	dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec);
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 8bd0dea6885..6dd9aff8bb2 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -30,12 +30,10 @@
  * doesn't (yet) use these for any kind of i/o overlap or prefetching.
  *
  * Sometimes DataFlash is packaged in MMC-format cards, although the
- * MMC stack can't use SPI (yet), or distinguish between MMC and DataFlash
+ * MMC stack can't (yet?) distinguish between MMC and DataFlash
  * protocols during enumeration.
  */
 
-#define CONFIG_DATAFLASH_WRITE_VERIFY
-
 /* reads can bypass the buffers */
 #define OP_READ_CONTINUOUS	0xE8
 #define OP_READ_PAGE		0xD2
@@ -80,7 +78,8 @@
  */
 #define OP_READ_ID		0x9F
 #define OP_READ_SECURITY	0x77
-#define OP_WRITE_SECURITY	0x9A	/* OTP bits */
+#define OP_WRITE_SECURITY_REVC	0x9A
+#define OP_WRITE_SECURITY	0x9B	/* revision D */
 
 
 struct dataflash {
@@ -402,7 +401,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		(void) dataflash_waitready(priv->spi);
 
 
-#ifdef	CONFIG_DATAFLASH_WRITE_VERIFY
+#ifdef CONFIG_MTD_DATAFLASH_VERIFY_WRITE
 
 		/* (3) Compare to Buffer1 */
 		addr = pageaddr << priv->page_offset;
@@ -431,7 +430,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		} else
 			status = 0;
 
-#endif	/* CONFIG_DATAFLASH_WRITE_VERIFY */
+#endif	/* CONFIG_MTD_DATAFLASH_VERIFY_WRITE */
 
 		remaining = remaining - writelen;
 		pageaddr++;
@@ -451,16 +450,192 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 /* ......................................................................... */
 
+#ifdef CONFIG_MTD_DATAFLASH_OTP
+
+static int dataflash_get_otp_info(struct mtd_info *mtd,
+		struct otp_info *info, size_t len)
+{
+	/* Report both blocks as identical:  bytes 0..64, locked.
+	 * Unless the user block changed from all-ones, we can't
+	 * tell whether it's still writable; so we assume it isn't.
+	 */
+	info->start = 0;
+	info->length = 64;
+	info->locked = 1;
+	return sizeof(*info);
+}
+
+static ssize_t otp_read(struct spi_device *spi, unsigned base,
+		uint8_t *buf, loff_t off, size_t len)
+{
+	struct spi_message	m;
+	size_t			l;
+	uint8_t			*scratch;
+	struct spi_transfer	t;
+	int			status;
+
+	if (off > 64)
+		return -EINVAL;
+
+	if ((off + len) > 64)
+		len = 64 - off;
+	if (len == 0)
+		return len;
+
+	spi_message_init(&m);
+
+	l = 4 + base + off + len;
+	scratch = kzalloc(l, GFP_KERNEL);
+	if (!scratch)
+		return -ENOMEM;
+
+	/* OUT: OP_READ_SECURITY, 3 don't-care bytes, zeroes
+	 * IN:  ignore 4 bytes, data bytes 0..N (max 127)
+	 */
+	scratch[0] = OP_READ_SECURITY;
+
+	memset(&t, 0, sizeof t);
+	t.tx_buf = scratch;
+	t.rx_buf = scratch;
+	t.len = l;
+	spi_message_add_tail(&t, &m);
+
+	dataflash_waitready(spi);
+
+	status = spi_sync(spi, &m);
+	if (status >= 0) {
+		memcpy(buf, scratch + 4 + base + off, len);
+		status = len;
+	}
+
+	kfree(scratch);
+	return status;
+}
+
+static int dataflash_read_fact_otp(struct mtd_info *mtd,
+		loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+	struct dataflash	*priv = (struct dataflash *)mtd->priv;
+	int			status;
+
+	/* 64 bytes, from 0..63 ... start at 64 on-chip */
+	mutex_lock(&priv->lock);
+	status = otp_read(priv->spi, 64, buf, from, len);
+	mutex_unlock(&priv->lock);
+
+	if (status < 0)
+		return status;
+	*retlen = status;
+	return 0;
+}
+
+static int dataflash_read_user_otp(struct mtd_info *mtd,
+		loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+	struct dataflash	*priv = (struct dataflash *)mtd->priv;
+	int			status;
+
+	/* 64 bytes, from 0..63 ... start at 0 on-chip */
+	mutex_lock(&priv->lock);
+	status = otp_read(priv->spi, 0, buf, from, len);
+	mutex_unlock(&priv->lock);
+
+	if (status < 0)
+		return status;
+	*retlen = status;
+	return 0;
+}
+
+static int dataflash_write_user_otp(struct mtd_info *mtd,
+		loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+	struct spi_message	m;
+	const size_t		l = 4 + 64;
+	uint8_t			*scratch;
+	struct spi_transfer	t;
+	struct dataflash	*priv = (struct dataflash *)mtd->priv;
+	int			status;
+
+	if (len > 64)
+		return -EINVAL;
+
+	/* Strictly speaking, we *could* truncate the write ... but
+	 * let's not do that for the only write that's ever possible.
+	 */
+	if ((from + len) > 64)
+		return -EINVAL;
+
+	/* OUT: OP_WRITE_SECURITY, 3 zeroes, 64 data-or-zero bytes
+	 * IN:  ignore all
+	 */
+	scratch = kzalloc(l, GFP_KERNEL);
+	if (!scratch)
+		return -ENOMEM;
+	scratch[0] = OP_WRITE_SECURITY;
+	memcpy(scratch + 4 + from, buf, len);
+
+	spi_message_init(&m);
+
+	memset(&t, 0, sizeof t);
+	t.tx_buf = scratch;
+	t.len = l;
+	spi_message_add_tail(&t, &m);
+
+	/* Write the OTP bits, if they've not yet been written.
+	 * This modifies SRAM buffer1.
+	 */
+	mutex_lock(&priv->lock);
+	dataflash_waitready(priv->spi);
+	status = spi_sync(priv->spi, &m);
+	mutex_unlock(&priv->lock);
+
+	kfree(scratch);
+
+	if (status >= 0) {
+		status = 0;
+		*retlen = len;
+	}
+	return status;
+}
+
+static char *otp_setup(struct mtd_info *device, char revision)
+{
+	device->get_fact_prot_info = dataflash_get_otp_info;
+	device->read_fact_prot_reg = dataflash_read_fact_otp;
+	device->get_user_prot_info = dataflash_get_otp_info;
+	device->read_user_prot_reg = dataflash_read_user_otp;
+
+	/* rev c parts (at45db321c and at45db1281 only!) use a
+	 * different write procedure; not (yet?) implemented.
+	 */
+	if (revision > 'c')
+		device->write_user_prot_reg = dataflash_write_user_otp;
+
+	return ", OTP";
+}
+
+#else
+
+static char *otp_setup(struct mtd_info *device, char revision)
+{
+	return " (OTP)";
+}
+
+#endif
+
+/* ......................................................................... */
+
 /*
  * Register DataFlash device with MTD subsystem.
  */
 static int __devinit
-add_dataflash(struct spi_device *spi, char *name,
-		int nr_pages, int pagesize, int pageoffset)
+add_dataflash_otp(struct spi_device *spi, char *name,
+		int nr_pages, int pagesize, int pageoffset, char revision)
 {
 	struct dataflash		*priv;
 	struct mtd_info			*device;
 	struct flash_platform_data	*pdata = spi->dev.platform_data;
+	char				*otp_tag = "";
 
 	priv = kzalloc(sizeof *priv, GFP_KERNEL);
 	if (!priv)
@@ -489,8 +664,12 @@ add_dataflash(struct spi_device *spi, char *name,
 	device->write = dataflash_write;
 	device->priv = priv;
 
-	dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes\n",
-			name, DIV_ROUND_UP(device->size, 1024), pagesize);
+	if (revision >= 'c')
+		otp_tag = otp_setup(device, revision);
+
+	dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes%s\n",
+			name, DIV_ROUND_UP(device->size, 1024),
+			pagesize, otp_tag);
 	dev_set_drvdata(&spi->dev, priv);
 
 	if (mtd_has_partitions()) {
@@ -519,6 +698,14 @@ add_dataflash(struct spi_device *spi, char *name,
 	return add_mtd_device(device) == 1 ? -ENODEV : 0;
 }
 
+static inline int __devinit
+add_dataflash(struct spi_device *spi, char *name,
+		int nr_pages, int pagesize, int pageoffset)
+{
+	return add_dataflash_otp(spi, name, nr_pages, pagesize,
+			pageoffset, 0);
+}
+
 struct flash_info {
 	char		*name;
 
@@ -664,13 +851,16 @@ static int __devinit dataflash_probe(struct spi_device *spi)
 	 * Try to detect dataflash by JEDEC ID.
 	 * If it succeeds we know we have either a C or D part.
 	 * D will support power of 2 pagesize option.
+	 * Both support the security register, though with different
+	 * write procedures.
 	 */
 	info = jedec_probe(spi);
 	if (IS_ERR(info))
 		return PTR_ERR(info);
 	if (info != NULL)
-		return add_dataflash(spi, info->name, info->nr_pages,
-				 info->pagesize, info->pageoffset);
+		return add_dataflash_otp(spi, info->name, info->nr_pages,
+				info->pagesize, info->pageoffset,
+				(info->flags & SUP_POW2PS) ? 'd' : 'c');
 
 	/*
 	 * Older chips support only legacy commands, identifing
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index c4f9d3378b2..50ce13887f6 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -388,6 +388,10 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
 		if (thisEUN == targetEUN)
 			break;
 
+		/* Unlink the last block from the chain. */
+		inftl->PUtable[prevEUN] = BLOCK_NIL;
+
+		/* Now try to erase it. */
 		if (INFTL_formatblock(inftl, thisEUN) < 0) {
 			/*
 			 * Could not erase : mark block as reserved.
@@ -396,7 +400,6 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
 		} else {
 			/* Correctly erased : mark it as free */
 			inftl->PUtable[thisEUN] = BLOCK_FREE;
-			inftl->PUtable[prevEUN] = BLOCK_NIL;
 			inftl->numfreeEUNs++;
 		}
 	}
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index df8e00bba07..5ea16936216 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -332,30 +332,6 @@ config MTD_CFI_FLAGADM
 	  Mapping for the Flaga digital module. If you don't have one, ignore
 	  this setting.
 
-config MTD_WALNUT
-	tristate "Flash device mapped on IBM 405GP Walnut"
-	depends on MTD_JEDECPROBE && WALNUT && !PPC_MERGE
-	help
-	  This enables access routines for the flash chips on the IBM 405GP
-	  Walnut board. If you have one of these boards and would like to
-	  use the flash chips on it, say 'Y'.
-
-config MTD_EBONY
-	tristate "Flash devices mapped on IBM 440GP Ebony"
-	depends on MTD_JEDECPROBE && EBONY && !PPC_MERGE
-	help
-	  This enables access routines for the flash chips on the IBM 440GP
-	  Ebony board. If you have one of these boards and would like to
-	  use the flash chips on it, say 'Y'.
-
-config MTD_OCOTEA
-	tristate "Flash devices mapped on IBM 440GX Ocotea"
-	depends on MTD_CFI && OCOTEA && !PPC_MERGE
-	help
-	  This enables access routines for the flash chips on the IBM 440GX
-	  Ocotea board. If you have one of these boards and would like to
-	  use the flash chips on it, say 'Y'.
-
 config MTD_REDWOOD
 	tristate "CFI Flash devices mapped on IBM Redwood"
 	depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 )
@@ -458,13 +434,6 @@ config MTD_CEIVA
 	  PhotoMax Digital Picture Frame.
 	  If you have such a device, say 'Y'.
 
-config MTD_NOR_TOTO
-	tristate "NOR Flash device on TOTO board"
-	depends on ARCH_OMAP && OMAP_TOTO
-	help
-	  This enables access to the NOR flash on the Texas Instruments
-	  TOTO board.
-
 config MTD_H720X
 	tristate "Hynix evaluation board mappings"
 	depends on MTD_CFI && ( ARCH_H7201 || ARCH_H7202 )
@@ -522,7 +491,7 @@ config MTD_BFIN_ASYNC
 
 config MTD_UCLINUX
 	tristate "Generic uClinux RAM/ROM filesystem support"
-	depends on MTD_PARTITIONS && !MMU
+	depends on MTD_PARTITIONS && MTD_RAM && !MMU
 	help
 	  Map driver to support image based filesystems for uClinux.
 
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 6cda6df973e..6d9ba35caf1 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -50,12 +50,8 @@ obj-$(CONFIG_MTD_REDWOOD)	+= redwood.o
 obj-$(CONFIG_MTD_UCLINUX)	+= uclinux.o
 obj-$(CONFIG_MTD_NETtel)	+= nettel.o
 obj-$(CONFIG_MTD_SCB2_FLASH)	+= scb2_flash.o
-obj-$(CONFIG_MTD_EBONY)		+= ebony.o
-obj-$(CONFIG_MTD_OCOTEA)	+= ocotea.o
-obj-$(CONFIG_MTD_WALNUT)        += walnut.o
 obj-$(CONFIG_MTD_H720X)		+= h720x-flash.o
 obj-$(CONFIG_MTD_SBC8240)	+= sbc8240.o
-obj-$(CONFIG_MTD_NOR_TOTO)	+= omap-toto-flash.o
 obj-$(CONFIG_MTD_IXP4XX)	+= ixp4xx.o
 obj-$(CONFIG_MTD_IXP2000)	+= ixp2000.o
 obj-$(CONFIG_MTD_WRSBC8260)	+= wr_sbc82xx_flash.o
diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c
deleted file mode 100644
index d92b7c70d3e..00000000000
--- a/drivers/mtd/maps/ebony.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Mapping for Ebony user flash
- *
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm44x.h>
-#include <platforms/4xx/ebony.h>
-
-static struct mtd_info *flash;
-
-static struct map_info ebony_small_map = {
-	.name =		"Ebony small flash",
-	.size =		EBONY_SMALL_FLASH_SIZE,
-	.bankwidth =	1,
-};
-
-static struct map_info ebony_large_map = {
-	.name =		"Ebony large flash",
-	.size =		EBONY_LARGE_FLASH_SIZE,
-	.bankwidth =	1,
-};
-
-static struct mtd_partition ebony_small_partitions[] = {
-	{
-		.name =   "OpenBIOS",
-		.offset = 0x0,
-		.size =   0x80000,
-	}
-};
-
-static struct mtd_partition ebony_large_partitions[] = {
-	{
-		.name =   "fs",
-		.offset = 0,
-		.size =   0x380000,
-	},
-	{
-		.name =   "firmware",
-		.offset = 0x380000,
-		.size =   0x80000,
-	}
-};
-
-int __init init_ebony(void)
-{
-	u8 fpga0_reg;
-	u8 __iomem *fpga0_adr;
-	unsigned long long small_flash_base, large_flash_base;
-
-	fpga0_adr = ioremap64(EBONY_FPGA_ADDR, 16);
-	if (!fpga0_adr)
-		return -ENOMEM;
-
-	fpga0_reg = readb(fpga0_adr);
-	iounmap(fpga0_adr);
-
-	if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-			!EBONY_FLASH_SEL(fpga0_reg))
-		small_flash_base = EBONY_SMALL_FLASH_HIGH2;
-	else if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-			EBONY_FLASH_SEL(fpga0_reg))
-		small_flash_base = EBONY_SMALL_FLASH_HIGH1;
-	else if (!EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-			!EBONY_FLASH_SEL(fpga0_reg))
-		small_flash_base = EBONY_SMALL_FLASH_LOW2;
-	else
-		small_flash_base = EBONY_SMALL_FLASH_LOW1;
-
-	if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-			!EBONY_ONBRD_FLASH_EN(fpga0_reg))
-		large_flash_base = EBONY_LARGE_FLASH_LOW;
-	else
-		large_flash_base = EBONY_LARGE_FLASH_HIGH;
-
-	ebony_small_map.phys = small_flash_base;
-	ebony_small_map.virt = ioremap64(small_flash_base,
-					 ebony_small_map.size);
-
-	if (!ebony_small_map.virt) {
-		printk("Failed to ioremap flash\n");
-		return -EIO;
-	}
-
-	simple_map_init(&ebony_small_map);
-
-	flash = do_map_probe("jedec_probe", &ebony_small_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, ebony_small_partitions,
-					ARRAY_SIZE(ebony_small_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(ebony_small_map.virt);
-		return -ENXIO;
-	}
-
-	ebony_large_map.phys = large_flash_base;
-	ebony_large_map.virt = ioremap64(large_flash_base,
-					 ebony_large_map.size);
-
-	if (!ebony_large_map.virt) {
-		printk("Failed to ioremap flash\n");
-		iounmap(ebony_small_map.virt);
-		return -EIO;
-	}
-
-	simple_map_init(&ebony_large_map);
-
-	flash = do_map_probe("jedec_probe", &ebony_large_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, ebony_large_partitions,
-					ARRAY_SIZE(ebony_large_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(ebony_small_map.virt);
-		iounmap(ebony_large_map.virt);
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
-static void __exit cleanup_ebony(void)
-{
-	if (flash) {
-		del_mtd_partitions(flash);
-		map_destroy(flash);
-	}
-
-	if (ebony_small_map.virt) {
-		iounmap(ebony_small_map.virt);
-		ebony_small_map.virt = NULL;
-	}
-
-	if (ebony_large_map.virt) {
-		iounmap(ebony_large_map.virt);
-		ebony_large_map.virt = NULL;
-	}
-}
-
-module_init(init_ebony);
-module_exit(cleanup_ebony);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 440GP Ebony boards");
diff --git a/drivers/mtd/maps/ocotea.c b/drivers/mtd/maps/ocotea.c
deleted file mode 100644
index 5522eac8c98..00000000000
--- a/drivers/mtd/maps/ocotea.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Mapping for Ocotea user flash
- *
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm44x.h>
-#include <platforms/4xx/ocotea.h>
-
-static struct mtd_info *flash;
-
-static struct map_info ocotea_small_map = {
-	.name =		"Ocotea small flash",
-	.size =		OCOTEA_SMALL_FLASH_SIZE,
-	.buswidth =	1,
-};
-
-static struct map_info ocotea_large_map = {
-	.name =		"Ocotea large flash",
-	.size =		OCOTEA_LARGE_FLASH_SIZE,
-	.buswidth =	1,
-};
-
-static struct mtd_partition ocotea_small_partitions[] = {
-	{
-		.name =   "pibs",
-		.offset = 0x0,
-		.size =   0x100000,
-	}
-};
-
-static struct mtd_partition ocotea_large_partitions[] = {
-	{
-		.name =   "fs",
-		.offset = 0,
-		.size =   0x300000,
-	},
-	{
-		.name =   "firmware",
-		.offset = 0x300000,
-		.size =   0x100000,
-	}
-};
-
-int __init init_ocotea(void)
-{
-	u8 fpga0_reg;
-	u8 *fpga0_adr;
-	unsigned long long small_flash_base, large_flash_base;
-
-	fpga0_adr = ioremap64(OCOTEA_FPGA_ADDR, 16);
-	if (!fpga0_adr)
-		return -ENOMEM;
-
-	fpga0_reg = readb((unsigned long)fpga0_adr);
-	iounmap(fpga0_adr);
-
-	if (OCOTEA_BOOT_LARGE_FLASH(fpga0_reg)) {
-		small_flash_base = OCOTEA_SMALL_FLASH_HIGH;
-		large_flash_base = OCOTEA_LARGE_FLASH_LOW;
-	}
-	else {
-		small_flash_base = OCOTEA_SMALL_FLASH_LOW;
-		large_flash_base = OCOTEA_LARGE_FLASH_HIGH;
-	}
-
-	ocotea_small_map.phys = small_flash_base;
-	ocotea_small_map.virt = ioremap64(small_flash_base,
-					 ocotea_small_map.size);
-
-	if (!ocotea_small_map.virt) {
-		printk("Failed to ioremap flash\n");
-		return -EIO;
-	}
-
-	simple_map_init(&ocotea_small_map);
-
-	flash = do_map_probe("map_rom", &ocotea_small_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, ocotea_small_partitions,
-					ARRAY_SIZE(ocotea_small_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(ocotea_small_map.virt);
-		return -ENXIO;
-	}
-
-	ocotea_large_map.phys = large_flash_base;
-	ocotea_large_map.virt = ioremap64(large_flash_base,
-					 ocotea_large_map.size);
-
-	if (!ocotea_large_map.virt) {
-		printk("Failed to ioremap flash\n");
-		iounmap(ocotea_small_map.virt);
-		return -EIO;
-	}
-
-	simple_map_init(&ocotea_large_map);
-
-	flash = do_map_probe("cfi_probe", &ocotea_large_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, ocotea_large_partitions,
-					ARRAY_SIZE(ocotea_large_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(ocotea_small_map.virt);
-		iounmap(ocotea_large_map.virt);
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
-static void __exit cleanup_ocotea(void)
-{
-	if (flash) {
-		del_mtd_partitions(flash);
-		map_destroy(flash);
-	}
-
-	if (ocotea_small_map.virt) {
-		iounmap((void *)ocotea_small_map.virt);
-		ocotea_small_map.virt = 0;
-	}
-
-	if (ocotea_large_map.virt) {
-		iounmap((void *)ocotea_large_map.virt);
-		ocotea_large_map.virt = 0;
-	}
-}
-
-module_init(init_ocotea);
-module_exit(cleanup_ocotea);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 440GX Ocotea boards");
diff --git a/drivers/mtd/maps/omap-toto-flash.c b/drivers/mtd/maps/omap-toto-flash.c
deleted file mode 100644
index 0a60ebbc217..00000000000
--- a/drivers/mtd/maps/omap-toto-flash.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * NOR Flash memory access on TI Toto board
- *
- * jzhang@ti.com (C) 2003 Texas Instruments.
- *
- *  (C) 2002 MontVista Software, Inc.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-#include <asm/hardware.h>
-#include <asm/io.h>
-
-
-#ifndef CONFIG_ARCH_OMAP
-#error This is for OMAP architecture only
-#endif
-
-//these lines need be moved to a hardware header file
-#define OMAP_TOTO_FLASH_BASE 0xd8000000
-#define OMAP_TOTO_FLASH_SIZE 0x80000
-
-static struct map_info omap_toto_map_flash = {
-	.name =		"OMAP Toto flash",
-	.bankwidth =	2,
-	.virt =		(void __iomem *)OMAP_TOTO_FLASH_BASE,
-};
-
-
-static struct mtd_partition toto_flash_partitions[] = {
-	{
-		.name =		"BootLoader",
-		.size =		0x00040000,     /* hopefully u-boot will stay 128k + 128*/
-		.offset =	0,
-		.mask_flags =	MTD_WRITEABLE,  /* force read-only */
-	}, {
-		.name =		"ReservedSpace",
-		.size =		0x00030000,
-		.offset =	MTDPART_OFS_APPEND,
-		//mask_flags:	MTD_WRITEABLE,  /* force read-only */
-	}, {
-		.name =		"EnvArea",      /* bottom 64KiB for env vars */
-		.size =		MTDPART_SIZ_FULL,
-		.offset =	MTDPART_OFS_APPEND,
-	}
-};
-
-static struct mtd_partition *parsed_parts;
-
-static struct mtd_info *flash_mtd;
-
-static int __init init_flash (void)
-{
-
-	struct mtd_partition *parts;
-	int nb_parts = 0;
-	int parsed_nr_parts = 0;
-	const char *part_type;
-
-	/*
-	 * Static partition definition selection
-	 */
-	part_type = "static";
-
- 	parts = toto_flash_partitions;
-	nb_parts = ARRAY_SIZE(toto_flash_partitions);
-	omap_toto_map_flash.size = OMAP_TOTO_FLASH_SIZE;
-	omap_toto_map_flash.phys = virt_to_phys(OMAP_TOTO_FLASH_BASE);
-
-	simple_map_init(&omap_toto_map_flash);
-	/*
-	 * Now let's probe for the actual flash.  Do it here since
-	 * specific machine settings might have been set above.
-	 */
-	printk(KERN_NOTICE "OMAP toto flash: probing %d-bit flash bus\n",
-		omap_toto_map_flash.bankwidth*8);
-	flash_mtd = do_map_probe("jedec_probe", &omap_toto_map_flash);
-	if (!flash_mtd)
-		return -ENXIO;
-
- 	if (parsed_nr_parts > 0) {
-		parts = parsed_parts;
-		nb_parts = parsed_nr_parts;
-	}
-
-	if (nb_parts == 0) {
-		printk(KERN_NOTICE "OMAP toto flash: no partition info available,"
-			"registering whole flash at once\n");
-		if (add_mtd_device(flash_mtd)){
-            return -ENXIO;
-        }
-	} else {
-		printk(KERN_NOTICE "Using %s partition definition\n",
-			part_type);
-		return add_mtd_partitions(flash_mtd, parts, nb_parts);
-	}
-	return 0;
-}
-
-int __init omap_toto_mtd_init(void)
-{
-	int status;
-
- 	if (status = init_flash()) {
-		printk(KERN_ERR "OMAP Toto Flash: unable to init map for toto flash\n");
-	}
-    return status;
-}
-
-static void  __exit omap_toto_mtd_cleanup(void)
-{
-	if (flash_mtd) {
-		del_mtd_partitions(flash_mtd);
-		map_destroy(flash_mtd);
-		kfree(parsed_parts);
-	}
-}
-
-module_init(omap_toto_mtd_init);
-module_exit(omap_toto_mtd_cleanup);
-
-MODULE_AUTHOR("Jian Zhang");
-MODULE_DESCRIPTION("OMAP Toto board map driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/maps/pci.c b/drivers/mtd/maps/pci.c
index 5c6a25c9038..48f4cf5cb9d 100644
--- a/drivers/mtd/maps/pci.c
+++ b/drivers/mtd/maps/pci.c
@@ -203,15 +203,8 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
 		 * not enabled, should we be allocating a new resource for it
 		 * or simply enabling it?
 		 */
-		if (!(pci_resource_flags(dev, PCI_ROM_RESOURCE) &
-				    IORESOURCE_ROM_ENABLE)) {
-		     	u32 val;
-			pci_resource_flags(dev, PCI_ROM_RESOURCE) |= IORESOURCE_ROM_ENABLE;
-			pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
-			val |= PCI_ROM_ADDRESS_ENABLE;
-			pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
-			printk("%s: enabling expansion ROM\n", pci_name(dev));
-		}
+		pci_enable_rom(dev);
+		printk("%s: enabling expansion ROM\n", pci_name(dev));
 	}
 
 	if (!len || !base)
@@ -232,18 +225,13 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
 static void
 intel_dc21285_exit(struct pci_dev *dev, struct map_pci_info *map)
 {
-	u32 val;
-
 	if (map->base)
 		iounmap(map->base);
 
 	/*
 	 * We need to undo the PCI BAR2/PCI ROM BAR address alteration.
 	 */
-	pci_resource_flags(dev, PCI_ROM_RESOURCE) &= ~IORESOURCE_ROM_ENABLE;
-	pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
-	val &= ~PCI_ROM_ADDRESS_ENABLE;
-	pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
+	pci_disable_rom(dev);
 }
 
 static unsigned long
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 49acd417189..5fcfec034a9 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -230,8 +230,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 
 #ifdef CONFIG_MTD_OF_PARTS
 	if (err == 0) {
-		err = of_mtd_parse_partitions(&dev->dev, info->mtd,
-		                              dp, &info->parts);
+		err = of_mtd_parse_partitions(&dev->dev, dp, &info->parts);
 		if (err < 0)
 			return err;
 	}
diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c
deleted file mode 100644
index e243476c817..00000000000
--- a/drivers/mtd/maps/walnut.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Mapping for Walnut flash
- * (used ebony.c as a "framework")
- *
- * Heikki Lindholm <holindho@infradead.org>
- *
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm4xx.h>
-#include <platforms/4xx/walnut.h>
-
-/* these should be in platforms/4xx/walnut.h ? */
-#define WALNUT_FLASH_ONBD_N(x)		(x & 0x02)
-#define WALNUT_FLASH_SRAM_SEL(x)	(x & 0x01)
-#define WALNUT_FLASH_LOW		0xFFF00000
-#define WALNUT_FLASH_HIGH		0xFFF80000
-#define WALNUT_FLASH_SIZE		0x80000
-
-static struct mtd_info *flash;
-
-static struct map_info walnut_map = {
-	.name =		"Walnut flash",
-	.size =		WALNUT_FLASH_SIZE,
-	.bankwidth =	1,
-};
-
-/* Actually, OpenBIOS is the last 128 KiB of the flash - better
- * partitioning could be made */
-static struct mtd_partition walnut_partitions[] = {
-	{
-		.name =   "OpenBIOS",
-		.offset = 0x0,
-		.size =   WALNUT_FLASH_SIZE,
-		/*.mask_flags = MTD_WRITEABLE, */ /* force read-only */
-	}
-};
-
-int __init init_walnut(void)
-{
-	u8 fpga_brds1;
-	void *fpga_brds1_adr;
-	void *fpga_status_adr;
-	unsigned long flash_base;
-
-	/* this should already be mapped (platform/4xx/walnut.c) */
-	fpga_status_adr = ioremap(WALNUT_FPGA_BASE, 8);
-	if (!fpga_status_adr)
-		return -ENOMEM;
-
-	fpga_brds1_adr = fpga_status_adr+5;
-	fpga_brds1 = readb(fpga_brds1_adr);
-	/* iounmap(fpga_status_adr); */
-
-	if (WALNUT_FLASH_ONBD_N(fpga_brds1)) {
-		printk("The on-board flash is disabled (U79 sw 5)!");
-		iounmap(fpga_status_adr);
-		return -EIO;
-	}
-	if (WALNUT_FLASH_SRAM_SEL(fpga_brds1))
-		flash_base = WALNUT_FLASH_LOW;
-	else
-		flash_base = WALNUT_FLASH_HIGH;
-
-	walnut_map.phys = flash_base;
-	walnut_map.virt =
-		(void __iomem *)ioremap(flash_base, walnut_map.size);
-
-	if (!walnut_map.virt) {
-		printk("Failed to ioremap flash.\n");
-		iounmap(fpga_status_adr);
-		return -EIO;
-	}
-
-	simple_map_init(&walnut_map);
-
-	flash = do_map_probe("jedec_probe", &walnut_map);
-	if (flash) {
-		flash->owner = THIS_MODULE;
-		add_mtd_partitions(flash, walnut_partitions,
-					ARRAY_SIZE(walnut_partitions));
-	} else {
-		printk("map probe failed for flash\n");
-		iounmap(fpga_status_adr);
-		return -ENXIO;
-	}
-
-	iounmap(fpga_status_adr);
-	return 0;
-}
-
-static void __exit cleanup_walnut(void)
-{
-	if (flash) {
-		del_mtd_partitions(flash);
-		map_destroy(flash);
-	}
-
-	if (walnut_map.virt) {
-		iounmap((void *)walnut_map.virt);
-		walnut_map.virt = 0;
-	}
-}
-
-module_init(init_walnut);
-module_exit(cleanup_walnut);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Heikki Lindholm <holindho@infradead.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 405GP Walnut boards");
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 1c74762dec8..963840e9b5b 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -348,7 +348,7 @@ static void mtdchar_erase_callback (struct erase_info *instr)
 	wake_up((wait_queue_head_t *)instr->priv);
 }
 
-#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP)
+#ifdef CONFIG_HAVE_MTD_OTP
 static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
 {
 	struct mtd_info *mtd = mfi->mtd;
@@ -665,7 +665,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
 		break;
 	}
 
-#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP)
+#ifdef CONFIG_HAVE_MTD_OTP
 	case OTPSELECT:
 	{
 		int mode;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 2972a5edb73..789842d0e6f 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -444,7 +444,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
 			return -EINVAL;
 	}
 
-	instr->fail_addr = 0xffffffff;
+	instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
 	/* make a local copy of instr to avoid modifying the caller's struct */
 	erase = kmalloc(sizeof (struct erase_info), GFP_KERNEL);
@@ -493,7 +493,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
 			/* sanity check: should never happen since
 			 * block alignment has been checked above */
 			BUG_ON(err == -EINVAL);
-			if (erase->fail_addr != 0xffffffff)
+			if (erase->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 				instr->fail_addr = erase->fail_addr + offset;
 			break;
 		}
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 5a680e1e61f..aebb3b27edb 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -33,6 +33,7 @@
 #include <linux/interrupt.h>
 #include <linux/mtd/mtd.h>
 
+#define MTDOOPS_KERNMSG_MAGIC 0x5d005d00
 #define OOPS_PAGE_SIZE 4096
 
 static struct mtdoops_context {
@@ -99,7 +100,7 @@ static void mtdoops_inc_counter(struct mtdoops_context *cxt)
 	int ret;
 
 	cxt->nextpage++;
-	if (cxt->nextpage > cxt->oops_pages)
+	if (cxt->nextpage >= cxt->oops_pages)
 		cxt->nextpage = 0;
 	cxt->nextcount++;
 	if (cxt->nextcount == 0xffffffff)
@@ -141,7 +142,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
 	mod = (cxt->nextpage * OOPS_PAGE_SIZE) % mtd->erasesize;
 	if (mod != 0) {
 		cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / OOPS_PAGE_SIZE);
-		if (cxt->nextpage > cxt->oops_pages)
+		if (cxt->nextpage >= cxt->oops_pages)
 			cxt->nextpage = 0;
 	}
 
@@ -158,7 +159,7 @@ badblock:
 				cxt->nextpage * OOPS_PAGE_SIZE);
 		i++;
 		cxt->nextpage = cxt->nextpage + (mtd->erasesize / OOPS_PAGE_SIZE);
-		if (cxt->nextpage > cxt->oops_pages)
+		if (cxt->nextpage >= cxt->oops_pages)
 			cxt->nextpage = 0;
 		if (i == (cxt->oops_pages / (mtd->erasesize / OOPS_PAGE_SIZE))) {
 			printk(KERN_ERR "mtdoops: All blocks bad!\n");
@@ -224,40 +225,40 @@ static void find_next_position(struct mtdoops_context *cxt)
 {
 	struct mtd_info *mtd = cxt->mtd;
 	int ret, page, maxpos = 0;
-	u32 count, maxcount = 0xffffffff;
+	u32 count[2], maxcount = 0xffffffff;
 	size_t retlen;
 
 	for (page = 0; page < cxt->oops_pages; page++) {
-		ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 4, &retlen, (u_char *) &count);
-		if ((retlen != 4) || ((ret < 0) && (ret != -EUCLEAN))) {
-			printk(KERN_ERR "mtdoops: Read failure at %d (%td of 4 read)"
+		ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 8, &retlen, (u_char *) &count[0]);
+		if ((retlen != 8) || ((ret < 0) && (ret != -EUCLEAN))) {
+			printk(KERN_ERR "mtdoops: Read failure at %d (%td of 8 read)"
 				", err %d.\n", page * OOPS_PAGE_SIZE, retlen, ret);
 			continue;
 		}
 
-		if (count == 0xffffffff)
+		if (count[1] != MTDOOPS_KERNMSG_MAGIC)
+			continue;
+		if (count[0] == 0xffffffff)
 			continue;
 		if (maxcount == 0xffffffff) {
-			maxcount = count;
+			maxcount = count[0];
 			maxpos = page;
-		} else if ((count < 0x40000000) && (maxcount > 0xc0000000)) {
-			maxcount = count;
+		} else if ((count[0] < 0x40000000) && (maxcount > 0xc0000000)) {
+			maxcount = count[0];
 			maxpos = page;
-		} else if ((count > maxcount) && (count < 0xc0000000)) {
-			maxcount = count;
+		} else if ((count[0] > maxcount) && (count[0] < 0xc0000000)) {
+			maxcount = count[0];
 			maxpos = page;
-		} else if ((count > maxcount) && (count > 0xc0000000)
+		} else if ((count[0] > maxcount) && (count[0] > 0xc0000000)
 					&& (maxcount > 0x80000000)) {
-			maxcount = count;
+			maxcount = count[0];
 			maxpos = page;
 		}
 	}
 	if (maxcount == 0xffffffff) {
 		cxt->nextpage = 0;
 		cxt->nextcount = 1;
-		cxt->ready = 1;
-		printk(KERN_DEBUG "mtdoops: Ready %d, %d (first init)\n",
-				cxt->nextpage, cxt->nextcount);
+		schedule_work(&cxt->work_erase);
 		return;
 	}
 
@@ -358,8 +359,9 @@ mtdoops_console_write(struct console *co, const char *s, unsigned int count)
 
 	if (cxt->writecount == 0) {
 		u32 *stamp = cxt->oops_buf;
-		*stamp = cxt->nextcount;
-		cxt->writecount = 4;
+		*stamp++ = cxt->nextcount;
+		*stamp = MTDOOPS_KERNMSG_MAGIC;
+		cxt->writecount = 8;
 	}
 
 	if ((count + cxt->writecount) > OOPS_PAGE_SIZE)
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 9a06dc93ee0..3728913fa5f 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -214,7 +214,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
 	instr->addr += part->offset;
 	ret = part->master->erase(part->master, instr);
 	if (ret) {
-		if (instr->fail_addr != 0xffffffff)
+		if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 			instr->fail_addr -= part->offset;
 		instr->addr -= part->offset;
 	}
@@ -226,7 +226,7 @@ void mtd_erase_callback(struct erase_info *instr)
 	if (instr->mtd->erase == part_erase) {
 		struct mtd_part *part = PART(instr->mtd);
 
-		if (instr->fail_addr != 0xffffffff)
+		if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 			instr->fail_addr -= part->offset;
 		instr->addr -= part->offset;
 	}
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 41f361c49b3..1c2e9450d66 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -56,6 +56,12 @@ config MTD_NAND_H1900
 	help
 	  This enables the driver for the iPAQ h1900 flash.
 
+config MTD_NAND_GPIO
+	tristate "GPIO NAND Flash driver"
+	depends on GENERIC_GPIO && ARM
+	help
+	  This enables a GPIO based NAND flash driver.
+
 config MTD_NAND_SPIA
 	tristate "NAND Flash device on SPIA board"
 	depends on ARCH_P720T
@@ -68,12 +74,6 @@ config MTD_NAND_AMS_DELTA
 	help
 	  Support for NAND flash on Amstrad E3 (Delta).
 
-config MTD_NAND_TOTO
-	tristate "NAND Flash device on TOTO board"
-	depends on ARCH_OMAP && BROKEN
-	help
-	  Support for NAND flash on Texas Instruments Toto platform.
-
 config MTD_NAND_TS7250
 	tristate "NAND Flash device on TS-7250 board"
 	depends on MACH_TS72XX
@@ -163,13 +163,6 @@ config MTD_NAND_S3C2410_HWECC
 	  incorrect ECC generation, and if using these, the default of
 	  software ECC is preferable.
 
-config MTD_NAND_NDFC
-	tristate "NDFC NanD Flash Controller"
-	depends on 4xx && !PPC_MERGE
-	select MTD_NAND_ECC_SMC
-	help
-	 NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs
-
 config MTD_NAND_S3C2410_CLKSTOP
 	bool "S3C2410 NAND IDLE clock stop"
 	depends on MTD_NAND_S3C2410
@@ -340,6 +333,13 @@ config MTD_NAND_PXA3xx
 	  This enables the driver for the NAND flash device found on
 	  PXA3xx processors
 
+config MTD_NAND_PXA3xx_BUILTIN
+	bool "Use builtin definitions for some NAND chips (deprecated)"
+	depends on MTD_NAND_PXA3xx
+	help
+	  This enables builtin definitions for some NAND chips. This
+	  is deprecated in favor of platform specific data.
+
 config MTD_NAND_CM_X270
 	tristate "Support for NAND Flash on CM-X270 modules"
 	depends on MTD_NAND && MACH_ARMCORE
@@ -400,10 +400,24 @@ config MTD_NAND_FSL_ELBC
 
 config MTD_NAND_FSL_UPM
 	tristate "Support for NAND on Freescale UPM"
-	depends on MTD_NAND && OF_GPIO && (PPC_83xx || PPC_85xx)
+	depends on MTD_NAND && (PPC_83xx || PPC_85xx)
 	select FSL_LBC
 	help
 	  Enables support for NAND Flash chips wired onto Freescale PowerPC
 	  processor localbus with User-Programmable Machine support.
 
+config MTD_NAND_MXC
+	tristate "MXC NAND support"
+	depends on ARCH_MX2
+	help
+	  This enables the driver for the NAND flash controller on the
+	  MXC processors.
+
+config MTD_NAND_SH_FLCTL
+	tristate "Support for NAND on Renesas SuperH FLCTL"
+	depends on MTD_NAND && SUPERH && CPU_SUBTYPE_SH7723
+	help
+	  Several Renesas SuperH CPU has FLCTL. This option enables support
+	  for NAND Flash using FLCTL. This driver support SH7723.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index b786c5da82d..b661586afbf 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_MTD_NAND_IDS)		+= nand_ids.o
 obj-$(CONFIG_MTD_NAND_CAFE)		+= cafe_nand.o
 obj-$(CONFIG_MTD_NAND_SPIA)		+= spia.o
 obj-$(CONFIG_MTD_NAND_AMS_DELTA)	+= ams-delta.o
-obj-$(CONFIG_MTD_NAND_TOTO)		+= toto.o
 obj-$(CONFIG_MTD_NAND_AUTCPU12)		+= autcpu12.o
 obj-$(CONFIG_MTD_NAND_EDB7312)		+= edb7312.o
 obj-$(CONFIG_MTD_NAND_AU1550)		+= au1550nd.o
@@ -24,6 +23,7 @@ obj-$(CONFIG_MTD_NAND_NANDSIM)		+= nandsim.o
 obj-$(CONFIG_MTD_NAND_CS553X)		+= cs553x_nand.o
 obj-$(CONFIG_MTD_NAND_NDFC)		+= ndfc.o
 obj-$(CONFIG_MTD_NAND_ATMEL)		+= atmel_nand.o
+obj-$(CONFIG_MTD_NAND_GPIO)		+= gpio.o
 obj-$(CONFIG_MTD_NAND_CM_X270)		+= cmx270_nand.o
 obj-$(CONFIG_MTD_NAND_BASLER_EXCITE)	+= excite_nandflash.o
 obj-$(CONFIG_MTD_NAND_PXA3xx)		+= pxa3xx_nand.o
@@ -34,5 +34,7 @@ obj-$(CONFIG_MTD_NAND_PASEMI)		+= pasemi_nand.o
 obj-$(CONFIG_MTD_NAND_ORION)		+= orion_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_ELBC)		+= fsl_elbc_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_UPM)		+= fsl_upm.o
+obj-$(CONFIG_MTD_NAND_SH_FLCTL)		+= sh_flctl.o
+obj-$(CONFIG_MTD_NAND_MXC)		+= mxc_nand.o
 
 nand-objs := nand_base.o nand_bbt.o
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 3387e0d5076..c98c1570a40 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -174,48 +174,6 @@ static void atmel_write_buf16(struct mtd_info *mtd, const u8 *buf, int len)
 }
 
 /*
- * write oob for small pages
- */
-static int atmel_nand_write_oob_512(struct mtd_info *mtd,
-		struct nand_chip *chip, int page)
-{
-	int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad;
-	int eccsize = chip->ecc.size, length = mtd->oobsize;
-	int len, pos, status = 0;
-	const uint8_t *bufpoi = chip->oob_poi;
-
-	pos = eccsize + chunk;
-
-	chip->cmdfunc(mtd, NAND_CMD_SEQIN, pos, page);
-	len = min_t(int, length, chunk);
-	chip->write_buf(mtd, bufpoi, len);
-	bufpoi += len;
-	length -= len;
-	if (length > 0)
-		chip->write_buf(mtd, bufpoi, length);
-
-	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-	status = chip->waitfunc(mtd, chip);
-
-	return status & NAND_STATUS_FAIL ? -EIO : 0;
-
-}
-
-/*
- * read oob for small pages
- */
-static int atmel_nand_read_oob_512(struct mtd_info *mtd,
-		struct nand_chip *chip,	int page, int sndcmd)
-{
-	if (sndcmd) {
-		chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
-		sndcmd = 0;
-	}
-	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
-	return sndcmd;
-}
-
-/*
  * Calculate HW ECC
  *
  * function called after a write
@@ -235,14 +193,14 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
 	/* get the first 2 ECC bytes */
 	ecc_value = ecc_readl(host->ecc, PR);
 
-	ecc_code[eccpos[0]] = ecc_value & 0xFF;
-	ecc_code[eccpos[1]] = (ecc_value >> 8) & 0xFF;
+	ecc_code[0] = ecc_value & 0xFF;
+	ecc_code[1] = (ecc_value >> 8) & 0xFF;
 
 	/* get the last 2 ECC bytes */
 	ecc_value = ecc_readl(host->ecc, NPR) & ATMEL_ECC_NPARITY;
 
-	ecc_code[eccpos[2]] = ecc_value & 0xFF;
-	ecc_code[eccpos[3]] = (ecc_value >> 8) & 0xFF;
+	ecc_code[2] = ecc_value & 0xFF;
+	ecc_code[3] = (ecc_value >> 8) & 0xFF;
 
 	return 0;
 }
@@ -476,14 +434,12 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
 			res = -EIO;
 			goto err_ecc_ioremap;
 		}
-		nand_chip->ecc.mode = NAND_ECC_HW_SYNDROME;
+		nand_chip->ecc.mode = NAND_ECC_HW;
 		nand_chip->ecc.calculate = atmel_nand_calculate;
 		nand_chip->ecc.correct = atmel_nand_correct;
 		nand_chip->ecc.hwctl = atmel_nand_hwctl;
 		nand_chip->ecc.read_page = atmel_nand_read_page;
 		nand_chip->ecc.bytes = 4;
-		nand_chip->ecc.prepad = 0;
-		nand_chip->ecc.postpad = 0;
 	}
 
 	nand_chip->chip_delay = 20;		/* 20us command delay time */
@@ -514,7 +470,7 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
 		goto err_scan_ident;
 	}
 
-	if (nand_chip->ecc.mode == NAND_ECC_HW_SYNDROME) {
+	if (nand_chip->ecc.mode == NAND_ECC_HW) {
 		/* ECC is calculated for the whole page (1 step) */
 		nand_chip->ecc.size = mtd->writesize;
 
@@ -522,8 +478,6 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
 		switch (mtd->writesize) {
 		case 512:
 			nand_chip->ecc.layout = &atmel_oobinfo_small;
-			nand_chip->ecc.read_oob = atmel_nand_read_oob_512;
-			nand_chip->ecc.write_oob = atmel_nand_write_oob_512;
 			ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_528);
 			break;
 		case 1024:
diff --git a/drivers/mtd/nand/cs553x_nand.c b/drivers/mtd/nand/cs553x_nand.c
index 3370a800fd3..9f1b451005c 100644
--- a/drivers/mtd/nand/cs553x_nand.c
+++ b/drivers/mtd/nand/cs553x_nand.c
@@ -289,8 +289,10 @@ static int __init cs553x_init(void)
 	int i;
 	uint64_t val;
 
+#ifdef CONFIG_MTD_PARTITIONS
 	int mtd_parts_nb = 0;
 	struct mtd_partition *mtd_parts = NULL;
+#endif
 
 	/* If the CPU isn't a Geode GX or LX, abort */
 	if (!is_geode())
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 98ad3cefcaf..4aa5bd6158d 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -918,8 +918,7 @@ static int __devinit fsl_elbc_chip_probe(struct fsl_elbc_ctrl *ctrl,
 
 #ifdef CONFIG_MTD_OF_PARTS
 	if (ret == 0) {
-		ret = of_mtd_parse_partitions(priv->dev, &priv->mtd,
-		                              node, &parts);
+		ret = of_mtd_parse_partitions(priv->dev, node, &parts);
 		if (ret < 0)
 			goto err;
 	}
diff --git a/drivers/mtd/nand/fsl_upm.c b/drivers/mtd/nand/fsl_upm.c
index 1ebfd87f00b..024e3fffd4b 100644
--- a/drivers/mtd/nand/fsl_upm.c
+++ b/drivers/mtd/nand/fsl_upm.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/partitions.h>
@@ -36,8 +37,6 @@ struct fsl_upm_nand {
 	uint8_t upm_cmd_offset;
 	void __iomem *io_base;
 	int rnb_gpio;
-	const uint32_t *wait_pattern;
-	const uint32_t *wait_write;
 	int chip_delay;
 };
 
@@ -61,10 +60,11 @@ static void fun_wait_rnb(struct fsl_upm_nand *fun)
 	if (fun->rnb_gpio >= 0) {
 		while (--cnt && !fun_chip_ready(&fun->mtd))
 			cpu_relax();
+		if (!cnt)
+			dev_err(fun->dev, "tired waiting for RNB\n");
+	} else {
+		ndelay(100);
 	}
-
-	if (!cnt)
-		dev_err(fun->dev, "tired waiting for RNB\n");
 }
 
 static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
@@ -89,8 +89,7 @@ static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 
 	fsl_upm_run_pattern(&fun->upm, fun->io_base, cmd);
 
-	if (fun->wait_pattern)
-		fun_wait_rnb(fun);
+	fun_wait_rnb(fun);
 }
 
 static uint8_t fun_read_byte(struct mtd_info *mtd)
@@ -116,14 +115,16 @@ static void fun_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 
 	for (i = 0; i < len; i++) {
 		out_8(fun->chip.IO_ADDR_W, buf[i]);
-		if (fun->wait_write)
-			fun_wait_rnb(fun);
+		fun_wait_rnb(fun);
 	}
 }
 
-static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
+static int __devinit fun_chip_init(struct fsl_upm_nand *fun,
+				   const struct device_node *upm_np,
+				   const struct resource *io_res)
 {
 	int ret;
+	struct device_node *flash_np;
 #ifdef CONFIG_MTD_PARTITIONS
 	static const char *part_types[] = { "cmdlinepart", NULL, };
 #endif
@@ -143,18 +144,37 @@ static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
 	fun->mtd.priv = &fun->chip;
 	fun->mtd.owner = THIS_MODULE;
 
+	flash_np = of_get_next_child(upm_np, NULL);
+	if (!flash_np)
+		return -ENODEV;
+
+	fun->mtd.name = kasprintf(GFP_KERNEL, "%x.%s", io_res->start,
+				  flash_np->name);
+	if (!fun->mtd.name) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
 	ret = nand_scan(&fun->mtd, 1);
 	if (ret)
-		return ret;
-
-	fun->mtd.name = fun->dev->bus_id;
+		goto err;
 
 #ifdef CONFIG_MTD_PARTITIONS
 	ret = parse_mtd_partitions(&fun->mtd, part_types, &fun->parts, 0);
+
+#ifdef CONFIG_MTD_OF_PARTS
+	if (ret == 0)
+		ret = of_mtd_parse_partitions(fun->dev, &fun->mtd,
+					      flash_np, &fun->parts);
+#endif
 	if (ret > 0)
-		return add_mtd_partitions(&fun->mtd, fun->parts, ret);
+		ret = add_mtd_partitions(&fun->mtd, fun->parts, ret);
+	else
 #endif
-	return add_mtd_device(&fun->mtd);
+		ret = add_mtd_device(&fun->mtd);
+err:
+	of_node_put(flash_np);
+	return ret;
 }
 
 static int __devinit fun_probe(struct of_device *ofdev,
@@ -211,6 +231,12 @@ static int __devinit fun_probe(struct of_device *ofdev,
 		goto err2;
 	}
 
+	prop = of_get_property(ofdev->node, "chip-delay", NULL);
+	if (prop)
+		fun->chip_delay = *prop;
+	else
+		fun->chip_delay = 50;
+
 	fun->io_base = devm_ioremap_nocache(&ofdev->dev, io_res.start,
 					  io_res.end - io_res.start + 1);
 	if (!fun->io_base) {
@@ -220,17 +246,8 @@ static int __devinit fun_probe(struct of_device *ofdev,
 
 	fun->dev = &ofdev->dev;
 	fun->last_ctrl = NAND_CLE;
-	fun->wait_pattern = of_get_property(ofdev->node, "fsl,wait-pattern",
-					    NULL);
-	fun->wait_write = of_get_property(ofdev->node, "fsl,wait-write", NULL);
-
-	prop = of_get_property(ofdev->node, "chip-delay", NULL);
-	if (prop)
-		fun->chip_delay = *prop;
-	else
-		fun->chip_delay = 50;
 
-	ret = fun_chip_init(fun);
+	ret = fun_chip_init(fun, ofdev->node, &io_res);
 	if (ret)
 		goto err2;
 
@@ -251,6 +268,7 @@ static int __devexit fun_remove(struct of_device *ofdev)
 	struct fsl_upm_nand *fun = dev_get_drvdata(&ofdev->dev);
 
 	nand_release(&fun->mtd);
+	kfree(fun->mtd.name);
 
 	if (fun->rnb_gpio >= 0)
 		gpio_free(fun->rnb_gpio);
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
new file mode 100644
index 00000000000..8f902e75aa8
--- /dev/null
+++ b/drivers/mtd/nand/gpio.c
@@ -0,0 +1,375 @@
+/*
+ * drivers/mtd/nand/gpio.c
+ *
+ * Updated, and converted to generic GPIO based driver by Russell King.
+ *
+ * Written by Ben Dooks <ben@simtec.co.uk>
+ *   Based on 2.4 version by Mark Whittaker
+ *
+ * © 2004 Simtec Electronics
+ *
+ * Device driver for NAND connected via GPIO
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/nand-gpio.h>
+
+struct gpiomtd {
+	void __iomem		*io_sync;
+	struct mtd_info		mtd_info;
+	struct nand_chip	nand_chip;
+	struct gpio_nand_platdata plat;
+};
+
+#define gpio_nand_getpriv(x) container_of(x, struct gpiomtd, mtd_info)
+
+
+#ifdef CONFIG_ARM
+/* gpio_nand_dosync()
+ *
+ * Make sure the GPIO state changes occur in-order with writes to NAND
+ * memory region.
+ * Needed on PXA due to bus-reordering within the SoC itself (see section on
+ * I/O ordering in PXA manual (section 2.3, p35)
+ */
+static void gpio_nand_dosync(struct gpiomtd *gpiomtd)
+{
+	unsigned long tmp;
+
+	if (gpiomtd->io_sync) {
+		/*
+		 * Linux memory barriers don't cater for what's required here.
+		 * What's required is what's here - a read from a separate
+		 * region with a dependency on that read.
+		 */
+		tmp = readl(gpiomtd->io_sync);
+		asm volatile("mov %1, %0\n" : "=r" (tmp) : "r" (tmp));
+	}
+}
+#else
+static inline void gpio_nand_dosync(struct gpiomtd *gpiomtd) {}
+#endif
+
+static void gpio_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+{
+	struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+
+	gpio_nand_dosync(gpiomtd);
+
+	if (ctrl & NAND_CTRL_CHANGE) {
+		gpio_set_value(gpiomtd->plat.gpio_nce, !(ctrl & NAND_NCE));
+		gpio_set_value(gpiomtd->plat.gpio_cle, !!(ctrl & NAND_CLE));
+		gpio_set_value(gpiomtd->plat.gpio_ale, !!(ctrl & NAND_ALE));
+		gpio_nand_dosync(gpiomtd);
+	}
+	if (cmd == NAND_CMD_NONE)
+		return;
+
+	writeb(cmd, gpiomtd->nand_chip.IO_ADDR_W);
+	gpio_nand_dosync(gpiomtd);
+}
+
+static void gpio_nand_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+	struct nand_chip *this = mtd->priv;
+
+	writesb(this->IO_ADDR_W, buf, len);
+}
+
+static void gpio_nand_readbuf(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct nand_chip *this = mtd->priv;
+
+	readsb(this->IO_ADDR_R, buf, len);
+}
+
+static int gpio_nand_verifybuf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+	struct nand_chip *this = mtd->priv;
+	unsigned char read, *p = (unsigned char *) buf;
+	int i, err = 0;
+
+	for (i = 0; i < len; i++) {
+		read = readb(this->IO_ADDR_R);
+		if (read != p[i]) {
+			pr_debug("%s: err at %d (read %04x vs %04x)\n",
+			       __func__, i, read, p[i]);
+			err = -EFAULT;
+		}
+	}
+	return err;
+}
+
+static void gpio_nand_writebuf16(struct mtd_info *mtd, const u_char *buf,
+				 int len)
+{
+	struct nand_chip *this = mtd->priv;
+
+	if (IS_ALIGNED((unsigned long)buf, 2)) {
+		writesw(this->IO_ADDR_W, buf, len>>1);
+	} else {
+		int i;
+		unsigned short *ptr = (unsigned short *)buf;
+
+		for (i = 0; i < len; i += 2, ptr++)
+			writew(*ptr, this->IO_ADDR_W);
+	}
+}
+
+static void gpio_nand_readbuf16(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct nand_chip *this = mtd->priv;
+
+	if (IS_ALIGNED((unsigned long)buf, 2)) {
+		readsw(this->IO_ADDR_R, buf, len>>1);
+	} else {
+		int i;
+		unsigned short *ptr = (unsigned short *)buf;
+
+		for (i = 0; i < len; i += 2, ptr++)
+			*ptr = readw(this->IO_ADDR_R);
+	}
+}
+
+static int gpio_nand_verifybuf16(struct mtd_info *mtd, const u_char *buf,
+				 int len)
+{
+	struct nand_chip *this = mtd->priv;
+	unsigned short read, *p = (unsigned short *) buf;
+	int i, err = 0;
+	len >>= 1;
+
+	for (i = 0; i < len; i++) {
+		read = readw(this->IO_ADDR_R);
+		if (read != p[i]) {
+			pr_debug("%s: err at %d (read %04x vs %04x)\n",
+			       __func__, i, read, p[i]);
+			err = -EFAULT;
+		}
+	}
+	return err;
+}
+
+
+static int gpio_nand_devready(struct mtd_info *mtd)
+{
+	struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+	return gpio_get_value(gpiomtd->plat.gpio_rdy);
+}
+
+static int __devexit gpio_nand_remove(struct platform_device *dev)
+{
+	struct gpiomtd *gpiomtd = platform_get_drvdata(dev);
+	struct resource *res;
+
+	nand_release(&gpiomtd->mtd_info);
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 1);
+	iounmap(gpiomtd->io_sync);
+	if (res)
+		release_mem_region(res->start, res->end - res->start + 1);
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	iounmap(gpiomtd->nand_chip.IO_ADDR_R);
+	release_mem_region(res->start, res->end - res->start + 1);
+
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
+	gpio_set_value(gpiomtd->plat.gpio_nce, 1);
+
+	gpio_free(gpiomtd->plat.gpio_cle);
+	gpio_free(gpiomtd->plat.gpio_ale);
+	gpio_free(gpiomtd->plat.gpio_nce);
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_free(gpiomtd->plat.gpio_nwp);
+	gpio_free(gpiomtd->plat.gpio_rdy);
+
+	kfree(gpiomtd);
+
+	return 0;
+}
+
+static void __iomem *request_and_remap(struct resource *res, size_t size,
+					const char *name, int *err)
+{
+	void __iomem *ptr;
+
+	if (!request_mem_region(res->start, res->end - res->start + 1, name)) {
+		*err = -EBUSY;
+		return NULL;
+	}
+
+	ptr = ioremap(res->start, size);
+	if (!ptr) {
+		release_mem_region(res->start, res->end - res->start + 1);
+		*err = -ENOMEM;
+	}
+	return ptr;
+}
+
+static int __devinit gpio_nand_probe(struct platform_device *dev)
+{
+	struct gpiomtd *gpiomtd;
+	struct nand_chip *this;
+	struct resource *res0, *res1;
+	int ret;
+
+	if (!dev->dev.platform_data)
+		return -EINVAL;
+
+	res0 = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!res0)
+		return -EINVAL;
+
+	gpiomtd = kzalloc(sizeof(*gpiomtd), GFP_KERNEL);
+	if (gpiomtd == NULL) {
+		dev_err(&dev->dev, "failed to create NAND MTD\n");
+		return -ENOMEM;
+	}
+
+	this = &gpiomtd->nand_chip;
+	this->IO_ADDR_R = request_and_remap(res0, 2, "NAND", &ret);
+	if (!this->IO_ADDR_R) {
+		dev_err(&dev->dev, "unable to map NAND\n");
+		goto err_map;
+	}
+
+	res1 = platform_get_resource(dev, IORESOURCE_MEM, 1);
+	if (res1) {
+		gpiomtd->io_sync = request_and_remap(res1, 4, "NAND sync", &ret);
+		if (!gpiomtd->io_sync) {
+			dev_err(&dev->dev, "unable to map sync NAND\n");
+			goto err_sync;
+		}
+	}
+
+	memcpy(&gpiomtd->plat, dev->dev.platform_data, sizeof(gpiomtd->plat));
+
+	ret = gpio_request(gpiomtd->plat.gpio_nce, "NAND NCE");
+	if (ret)
+		goto err_nce;
+	gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp)) {
+		ret = gpio_request(gpiomtd->plat.gpio_nwp, "NAND NWP");
+		if (ret)
+			goto err_nwp;
+		gpio_direction_output(gpiomtd->plat.gpio_nwp, 1);
+	}
+	ret = gpio_request(gpiomtd->plat.gpio_ale, "NAND ALE");
+	if (ret)
+		goto err_ale;
+	gpio_direction_output(gpiomtd->plat.gpio_ale, 0);
+	ret = gpio_request(gpiomtd->plat.gpio_cle, "NAND CLE");
+	if (ret)
+		goto err_cle;
+	gpio_direction_output(gpiomtd->plat.gpio_cle, 0);
+	ret = gpio_request(gpiomtd->plat.gpio_rdy, "NAND RDY");
+	if (ret)
+		goto err_rdy;
+	gpio_direction_input(gpiomtd->plat.gpio_rdy);
+
+
+	this->IO_ADDR_W  = this->IO_ADDR_R;
+	this->ecc.mode   = NAND_ECC_SOFT;
+	this->options    = gpiomtd->plat.options;
+	this->chip_delay = gpiomtd->plat.chip_delay;
+
+	/* install our routines */
+	this->cmd_ctrl   = gpio_nand_cmd_ctrl;
+	this->dev_ready  = gpio_nand_devready;
+
+	if (this->options & NAND_BUSWIDTH_16) {
+		this->read_buf   = gpio_nand_readbuf16;
+		this->write_buf  = gpio_nand_writebuf16;
+		this->verify_buf = gpio_nand_verifybuf16;
+	} else {
+		this->read_buf   = gpio_nand_readbuf;
+		this->write_buf  = gpio_nand_writebuf;
+		this->verify_buf = gpio_nand_verifybuf;
+	}
+
+	/* set the mtd private data for the nand driver */
+	gpiomtd->mtd_info.priv = this;
+	gpiomtd->mtd_info.owner = THIS_MODULE;
+
+	if (nand_scan(&gpiomtd->mtd_info, 1)) {
+		dev_err(&dev->dev, "no nand chips found?\n");
+		ret = -ENXIO;
+		goto err_wp;
+	}
+
+	if (gpiomtd->plat.adjust_parts)
+		gpiomtd->plat.adjust_parts(&gpiomtd->plat,
+					   gpiomtd->mtd_info.size);
+
+	add_mtd_partitions(&gpiomtd->mtd_info, gpiomtd->plat.parts,
+			   gpiomtd->plat.num_parts);
+	platform_set_drvdata(dev, gpiomtd);
+
+	return 0;
+
+err_wp:
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
+	gpio_free(gpiomtd->plat.gpio_rdy);
+err_rdy:
+	gpio_free(gpiomtd->plat.gpio_cle);
+err_cle:
+	gpio_free(gpiomtd->plat.gpio_ale);
+err_ale:
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_free(gpiomtd->plat.gpio_nwp);
+err_nwp:
+	gpio_free(gpiomtd->plat.gpio_nce);
+err_nce:
+	iounmap(gpiomtd->io_sync);
+	if (res1)
+		release_mem_region(res1->start, res1->end - res1->start + 1);
+err_sync:
+	iounmap(gpiomtd->nand_chip.IO_ADDR_R);
+	release_mem_region(res0->start, res0->end - res0->start + 1);
+err_map:
+	kfree(gpiomtd);
+	return ret;
+}
+
+static struct platform_driver gpio_nand_driver = {
+	.probe		= gpio_nand_probe,
+	.remove		= gpio_nand_remove,
+	.driver		= {
+		.name	= "gpio-nand",
+	},
+};
+
+static int __init gpio_nand_init(void)
+{
+	printk(KERN_INFO "GPIO NAND driver, © 2004 Simtec Electronics\n");
+
+	return platform_driver_register(&gpio_nand_driver);
+}
+
+static void __exit gpio_nand_exit(void)
+{
+	platform_driver_unregister(&gpio_nand_driver);
+}
+
+module_init(gpio_nand_init);
+module_exit(gpio_nand_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_DESCRIPTION("GPIO NAND Driver");
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
new file mode 100644
index 00000000000..21fd4f1c480
--- /dev/null
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -0,0 +1,1077 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+#include <asm/mach/flash.h>
+#include <mach/mxc_nand.h>
+
+#define DRIVER_NAME "mxc_nand"
+
+/* Addresses for NFC registers */
+#define NFC_BUF_SIZE		0xE00
+#define NFC_BUF_ADDR		0xE04
+#define NFC_FLASH_ADDR		0xE06
+#define NFC_FLASH_CMD		0xE08
+#define NFC_CONFIG		0xE0A
+#define NFC_ECC_STATUS_RESULT	0xE0C
+#define NFC_RSLTMAIN_AREA	0xE0E
+#define NFC_RSLTSPARE_AREA	0xE10
+#define NFC_WRPROT		0xE12
+#define NFC_UNLOCKSTART_BLKADDR	0xE14
+#define NFC_UNLOCKEND_BLKADDR	0xE16
+#define NFC_NF_WRPRST		0xE18
+#define NFC_CONFIG1		0xE1A
+#define NFC_CONFIG2		0xE1C
+
+/* Addresses for NFC RAM BUFFER Main area 0 */
+#define MAIN_AREA0		0x000
+#define MAIN_AREA1		0x200
+#define MAIN_AREA2		0x400
+#define MAIN_AREA3		0x600
+
+/* Addresses for NFC SPARE BUFFER Spare area 0 */
+#define SPARE_AREA0		0x800
+#define SPARE_AREA1		0x810
+#define SPARE_AREA2		0x820
+#define SPARE_AREA3		0x830
+
+/* Set INT to 0, FCMD to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Command operation */
+#define NFC_CMD            0x1
+
+/* Set INT to 0, FADD to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Address operation */
+#define NFC_ADDR           0x2
+
+/* Set INT to 0, FDI to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Input operation */
+#define NFC_INPUT          0x4
+
+/* Set INT to 0, FDO to 001, rest to 0 in NFC_CONFIG2 Register
+ * for Data Output operation */
+#define NFC_OUTPUT         0x8
+
+/* Set INT to 0, FD0 to 010, rest to 0 in NFC_CONFIG2 Register
+ * for Read ID operation */
+#define NFC_ID             0x10
+
+/* Set INT to 0, FDO to 100, rest to 0 in NFC_CONFIG2 Register
+ * for Read Status operation */
+#define NFC_STATUS         0x20
+
+/* Set INT to 1, rest to 0 in NFC_CONFIG2 Register for Read
+ * Status operation */
+#define NFC_INT            0x8000
+
+#define NFC_SP_EN           (1 << 2)
+#define NFC_ECC_EN          (1 << 3)
+#define NFC_INT_MSK         (1 << 4)
+#define NFC_BIG             (1 << 5)
+#define NFC_RST             (1 << 6)
+#define NFC_CE              (1 << 7)
+#define NFC_ONE_CYCLE       (1 << 8)
+
+struct mxc_nand_host {
+	struct mtd_info		mtd;
+	struct nand_chip	nand;
+	struct mtd_partition	*parts;
+	struct device		*dev;
+
+	void __iomem		*regs;
+	int			spare_only;
+	int			status_request;
+	int			pagesize_2k;
+	uint16_t		col_addr;
+	struct clk		*clk;
+	int			clk_act;
+	int			irq;
+
+	wait_queue_head_t	irq_waitq;
+};
+
+/* Define delays in microsec for NAND device operations */
+#define TROP_US_DELAY   2000
+/* Macros to get byte and bit positions of ECC */
+#define COLPOS(x)  ((x) >> 3)
+#define BITPOS(x) ((x) & 0xf)
+
+/* Define single bit Error positions in Main & Spare area */
+#define MAIN_SINGLEBIT_ERROR 0x4
+#define SPARE_SINGLEBIT_ERROR 0x1
+
+/* OOB placement block for use with hardware ecc generation */
+static struct nand_ecclayout nand_hw_eccoob_8 = {
+	.eccbytes = 5,
+	.eccpos = {6, 7, 8, 9, 10},
+	.oobfree = {{0, 5}, {11, 5}, }
+};
+
+static struct nand_ecclayout nand_hw_eccoob_16 = {
+	.eccbytes = 5,
+	.eccpos = {6, 7, 8, 9, 10},
+	.oobfree = {{0, 6}, {12, 4}, }
+};
+
+#ifdef CONFIG_MTD_PARTITIONS
+static const char *part_probes[] = { "RedBoot", "cmdlinepart", NULL };
+#endif
+
+static irqreturn_t mxc_nfc_irq(int irq, void *dev_id)
+{
+	struct mxc_nand_host *host = dev_id;
+
+	uint16_t tmp;
+
+	tmp = readw(host->regs + NFC_CONFIG1);
+	tmp |= NFC_INT_MSK; /* Disable interrupt */
+	writew(tmp, host->regs + NFC_CONFIG1);
+
+	wake_up(&host->irq_waitq);
+
+	return IRQ_HANDLED;
+}
+
+/* This function polls the NANDFC to wait for the basic operation to
+ * complete by checking the INT bit of config2 register.
+ */
+static void wait_op_done(struct mxc_nand_host *host, int max_retries,
+				uint16_t param, int useirq)
+{
+	uint32_t tmp;
+
+	if (useirq) {
+		if ((readw(host->regs + NFC_CONFIG2) & NFC_INT) == 0) {
+
+			tmp = readw(host->regs + NFC_CONFIG1);
+			tmp  &= ~NFC_INT_MSK;	/* Enable interrupt */
+			writew(tmp, host->regs + NFC_CONFIG1);
+
+			wait_event(host->irq_waitq,
+				readw(host->regs + NFC_CONFIG2) & NFC_INT);
+
+			tmp = readw(host->regs + NFC_CONFIG2);
+			tmp  &= ~NFC_INT;
+			writew(tmp, host->regs + NFC_CONFIG2);
+		}
+	} else {
+		while (max_retries-- > 0) {
+			if (readw(host->regs + NFC_CONFIG2) & NFC_INT) {
+				tmp = readw(host->regs + NFC_CONFIG2);
+				tmp  &= ~NFC_INT;
+				writew(tmp, host->regs + NFC_CONFIG2);
+				break;
+			}
+			udelay(1);
+		}
+		if (max_retries <= 0)
+			DEBUG(MTD_DEBUG_LEVEL0, "%s(%d): INT not set\n",
+			      __func__, param);
+	}
+}
+
+/* This function issues the specified command to the NAND device and
+ * waits for completion. */
+static void send_cmd(struct mxc_nand_host *host, uint16_t cmd, int useirq)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "send_cmd(host, 0x%x, %d)\n", cmd, useirq);
+
+	writew(cmd, host->regs + NFC_FLASH_CMD);
+	writew(NFC_CMD, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, cmd, useirq);
+}
+
+/* This function sends an address (or partial address) to the
+ * NAND device. The address is used to select the source/destination for
+ * a NAND command. */
+static void send_addr(struct mxc_nand_host *host, uint16_t addr, int islast)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "send_addr(host, 0x%x %d)\n", addr, islast);
+
+	writew(addr, host->regs + NFC_FLASH_ADDR);
+	writew(NFC_ADDR, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, addr, islast);
+}
+
+/* This function requests the NANDFC to initate the transfer
+ * of data currently in the NANDFC RAM buffer to the NAND device. */
+static void send_prog_page(struct mxc_nand_host *host, uint8_t buf_id,
+			int spare_only)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "send_prog_page (%d)\n", spare_only);
+
+	/* NANDFC buffer 0 is used for page read/write */
+	writew(buf_id, host->regs + NFC_BUF_ADDR);
+
+	/* Configure spare or page+spare access */
+	if (!host->pagesize_2k) {
+		uint16_t config1 = readw(host->regs + NFC_CONFIG1);
+		if (spare_only)
+			config1 |= NFC_SP_EN;
+		else
+			config1 &= ~(NFC_SP_EN);
+		writew(config1, host->regs + NFC_CONFIG1);
+	}
+
+	writew(NFC_INPUT, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, spare_only, true);
+}
+
+/* Requests NANDFC to initated the transfer of data from the
+ * NAND device into in the NANDFC ram buffer. */
+static void send_read_page(struct mxc_nand_host *host, uint8_t buf_id,
+		int spare_only)
+{
+	DEBUG(MTD_DEBUG_LEVEL3, "send_read_page (%d)\n", spare_only);
+
+	/* NANDFC buffer 0 is used for page read/write */
+	writew(buf_id, host->regs + NFC_BUF_ADDR);
+
+	/* Configure spare or page+spare access */
+	if (!host->pagesize_2k) {
+		uint32_t config1 = readw(host->regs + NFC_CONFIG1);
+		if (spare_only)
+			config1 |= NFC_SP_EN;
+		else
+			config1 &= ~NFC_SP_EN;
+		writew(config1, host->regs + NFC_CONFIG1);
+	}
+
+	writew(NFC_OUTPUT, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, spare_only, true);
+}
+
+/* Request the NANDFC to perform a read of the NAND device ID. */
+static void send_read_id(struct mxc_nand_host *host)
+{
+	struct nand_chip *this = &host->nand;
+	uint16_t tmp;
+
+	/* NANDFC buffer 0 is used for device ID output */
+	writew(0x0, host->regs + NFC_BUF_ADDR);
+
+	/* Read ID into main buffer */
+	tmp = readw(host->regs + NFC_CONFIG1);
+	tmp &= ~NFC_SP_EN;
+	writew(tmp, host->regs + NFC_CONFIG1);
+
+	writew(NFC_ID, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, 0, true);
+
+	if (this->options & NAND_BUSWIDTH_16) {
+		void __iomem *main_buf = host->regs + MAIN_AREA0;
+		/* compress the ID info */
+		writeb(readb(main_buf + 2), main_buf + 1);
+		writeb(readb(main_buf + 4), main_buf + 2);
+		writeb(readb(main_buf + 6), main_buf + 3);
+		writeb(readb(main_buf + 8), main_buf + 4);
+		writeb(readb(main_buf + 10), main_buf + 5);
+	}
+}
+
+/* This function requests the NANDFC to perform a read of the
+ * NAND device status and returns the current status. */
+static uint16_t get_dev_status(struct mxc_nand_host *host)
+{
+	void __iomem *main_buf = host->regs + MAIN_AREA1;
+	uint32_t store;
+	uint16_t ret, tmp;
+	/* Issue status request to NAND device */
+
+	/* store the main area1 first word, later do recovery */
+	store = readl(main_buf);
+	/* NANDFC buffer 1 is used for device status to prevent
+	 * corruption of read/write buffer on status requests. */
+	writew(1, host->regs + NFC_BUF_ADDR);
+
+	/* Read status into main buffer */
+	tmp = readw(host->regs + NFC_CONFIG1);
+	tmp &= ~NFC_SP_EN;
+	writew(tmp, host->regs + NFC_CONFIG1);
+
+	writew(NFC_STATUS, host->regs + NFC_CONFIG2);
+
+	/* Wait for operation to complete */
+	wait_op_done(host, TROP_US_DELAY, 0, true);
+
+	/* Status is placed in first word of main buffer */
+	/* get status, then recovery area 1 data */
+	ret = readw(main_buf);
+	writel(store, main_buf);
+
+	return ret;
+}
+
+/* This functions is used by upper layer to checks if device is ready */
+static int mxc_nand_dev_ready(struct mtd_info *mtd)
+{
+	/*
+	 * NFC handles R/B internally. Therefore, this function
+	 * always returns status as ready.
+	 */
+	return 1;
+}
+
+static void mxc_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+{
+	/*
+	 * If HW ECC is enabled, we turn it on during init. There is
+	 * no need to enable again here.
+	 */
+}
+
+static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
+				 u_char *read_ecc, u_char *calc_ecc)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+
+	/*
+	 * 1-Bit errors are automatically corrected in HW.  No need for
+	 * additional correction.  2-Bit errors cannot be corrected by
+	 * HW ECC, so we need to return failure
+	 */
+	uint16_t ecc_status = readw(host->regs + NFC_ECC_STATUS_RESULT);
+
+	if (((ecc_status & 0x3) == 2) || ((ecc_status >> 2) == 2)) {
+		DEBUG(MTD_DEBUG_LEVEL0,
+		      "MXC_NAND: HWECC uncorrectable 2-bit ECC error\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int mxc_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+				  u_char *ecc_code)
+{
+	return 0;
+}
+
+static u_char mxc_nand_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	uint8_t ret = 0;
+	uint16_t col, rd_word;
+	uint16_t __iomem *main_buf = host->regs + MAIN_AREA0;
+	uint16_t __iomem *spare_buf = host->regs + SPARE_AREA0;
+
+	/* Check for status request */
+	if (host->status_request)
+		return get_dev_status(host) & 0xFF;
+
+	/* Get column for 16-bit access */
+	col = host->col_addr >> 1;
+
+	/* If we are accessing the spare region */
+	if (host->spare_only)
+		rd_word = readw(&spare_buf[col]);
+	else
+		rd_word = readw(&main_buf[col]);
+
+	/* Pick upper/lower byte of word from RAM buffer */
+	if (host->col_addr & 0x1)
+		ret = (rd_word >> 8) & 0xFF;
+	else
+		ret = rd_word & 0xFF;
+
+	/* Update saved column address */
+	host->col_addr++;
+
+	return ret;
+}
+
+static uint16_t mxc_nand_read_word(struct mtd_info *mtd)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	uint16_t col, rd_word, ret;
+	uint16_t __iomem *p;
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "mxc_nand_read_word(col = %d)\n", host->col_addr);
+
+	col = host->col_addr;
+	/* Adjust saved column address */
+	if (col < mtd->writesize && host->spare_only)
+		col += mtd->writesize;
+
+	if (col < mtd->writesize)
+		p = (host->regs + MAIN_AREA0) + (col >> 1);
+	else
+		p = (host->regs + SPARE_AREA0) + ((col - mtd->writesize) >> 1);
+
+	if (col & 1) {
+		rd_word = readw(p);
+		ret = (rd_word >> 8) & 0xff;
+		rd_word = readw(&p[1]);
+		ret |= (rd_word << 8) & 0xff00;
+
+	} else
+		ret = readw(p);
+
+	/* Update saved column address */
+	host->col_addr = col + 2;
+
+	return ret;
+}
+
+/* Write data of length len to buffer buf. The data to be
+ * written on NAND Flash is first copied to RAMbuffer. After the Data Input
+ * Operation by the NFC, the data is written to NAND Flash */
+static void mxc_nand_write_buf(struct mtd_info *mtd,
+				const u_char *buf, int len)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	int n, col, i = 0;
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "mxc_nand_write_buf(col = %d, len = %d)\n", host->col_addr,
+	      len);
+
+	col = host->col_addr;
+
+	/* Adjust saved column address */
+	if (col < mtd->writesize && host->spare_only)
+		col += mtd->writesize;
+
+	n = mtd->writesize + mtd->oobsize - col;
+	n = min(len, n);
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "%s:%d: col = %d, n = %d\n", __func__, __LINE__, col, n);
+
+	while (n) {
+		void __iomem *p;
+
+		if (col < mtd->writesize)
+			p = host->regs + MAIN_AREA0 + (col & ~3);
+		else
+			p = host->regs + SPARE_AREA0 -
+						mtd->writesize + (col & ~3);
+
+		DEBUG(MTD_DEBUG_LEVEL3, "%s:%d: p = %p\n", __func__,
+		      __LINE__, p);
+
+		if (((col | (int)&buf[i]) & 3) || n < 16) {
+			uint32_t data = 0;
+
+			if (col & 3 || n < 4)
+				data = readl(p);
+
+			switch (col & 3) {
+			case 0:
+				if (n) {
+					data = (data & 0xffffff00) |
+					    (buf[i++] << 0);
+					n--;
+					col++;
+				}
+			case 1:
+				if (n) {
+					data = (data & 0xffff00ff) |
+					    (buf[i++] << 8);
+					n--;
+					col++;
+				}
+			case 2:
+				if (n) {
+					data = (data & 0xff00ffff) |
+					    (buf[i++] << 16);
+					n--;
+					col++;
+				}
+			case 3:
+				if (n) {
+					data = (data & 0x00ffffff) |
+					    (buf[i++] << 24);
+					n--;
+					col++;
+				}
+			}
+
+			writel(data, p);
+		} else {
+			int m = mtd->writesize - col;
+
+			if (col >= mtd->writesize)
+				m += mtd->oobsize;
+
+			m = min(n, m) & ~3;
+
+			DEBUG(MTD_DEBUG_LEVEL3,
+			      "%s:%d: n = %d, m = %d, i = %d, col = %d\n",
+			      __func__,  __LINE__, n, m, i, col);
+
+			memcpy(p, &buf[i], m);
+			col += m;
+			i += m;
+			n -= m;
+		}
+	}
+	/* Update saved column address */
+	host->col_addr = col;
+}
+
+/* Read the data buffer from the NAND Flash. To read the data from NAND
+ * Flash first the data output cycle is initiated by the NFC, which copies
+ * the data to RAMbuffer. This data of length len is then copied to buffer buf.
+ */
+static void mxc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	int n, col, i = 0;
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "mxc_nand_read_buf(col = %d, len = %d)\n", host->col_addr, len);
+
+	col = host->col_addr;
+
+	/* Adjust saved column address */
+	if (col < mtd->writesize && host->spare_only)
+		col += mtd->writesize;
+
+	n = mtd->writesize + mtd->oobsize - col;
+	n = min(len, n);
+
+	while (n) {
+		void __iomem *p;
+
+		if (col < mtd->writesize)
+			p = host->regs + MAIN_AREA0 + (col & ~3);
+		else
+			p = host->regs + SPARE_AREA0 -
+					mtd->writesize + (col & ~3);
+
+		if (((col | (int)&buf[i]) & 3) || n < 16) {
+			uint32_t data;
+
+			data = readl(p);
+			switch (col & 3) {
+			case 0:
+				if (n) {
+					buf[i++] = (uint8_t) (data);
+					n--;
+					col++;
+				}
+			case 1:
+				if (n) {
+					buf[i++] = (uint8_t) (data >> 8);
+					n--;
+					col++;
+				}
+			case 2:
+				if (n) {
+					buf[i++] = (uint8_t) (data >> 16);
+					n--;
+					col++;
+				}
+			case 3:
+				if (n) {
+					buf[i++] = (uint8_t) (data >> 24);
+					n--;
+					col++;
+				}
+			}
+		} else {
+			int m = mtd->writesize - col;
+
+			if (col >= mtd->writesize)
+				m += mtd->oobsize;
+
+			m = min(n, m) & ~3;
+			memcpy(&buf[i], p, m);
+			col += m;
+			i += m;
+			n -= m;
+		}
+	}
+	/* Update saved column address */
+	host->col_addr = col;
+
+}
+
+/* Used by the upper layer to verify the data in NAND Flash
+ * with the data in the buf. */
+static int mxc_nand_verify_buf(struct mtd_info *mtd,
+				const u_char *buf, int len)
+{
+	return -EFAULT;
+}
+
+/* This function is used by upper layer for select and
+ * deselect of the NAND chip */
+static void mxc_nand_select_chip(struct mtd_info *mtd, int chip)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+
+#ifdef CONFIG_MTD_NAND_MXC_FORCE_CE
+	if (chip > 0) {
+		DEBUG(MTD_DEBUG_LEVEL0,
+		      "ERROR:  Illegal chip select (chip = %d)\n", chip);
+		return;
+	}
+
+	if (chip == -1) {
+		writew(readw(host->regs + NFC_CONFIG1) & ~NFC_CE,
+				host->regs + NFC_CONFIG1);
+		return;
+	}
+
+	writew(readw(host->regs + NFC_CONFIG1) | NFC_CE,
+			host->regs + NFC_CONFIG1);
+#endif
+
+	switch (chip) {
+	case -1:
+		/* Disable the NFC clock */
+		if (host->clk_act) {
+			clk_disable(host->clk);
+			host->clk_act = 0;
+		}
+		break;
+	case 0:
+		/* Enable the NFC clock */
+		if (!host->clk_act) {
+			clk_enable(host->clk);
+			host->clk_act = 1;
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+/* Used by the upper layer to write command to NAND Flash for
+ * different operations to be carried out on NAND Flash */
+static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
+				int column, int page_addr)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	int useirq = true;
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+	      "mxc_nand_command (cmd = 0x%x, col = 0x%x, page = 0x%x)\n",
+	      command, column, page_addr);
+
+	/* Reset command state information */
+	host->status_request = false;
+
+	/* Command pre-processing step */
+	switch (command) {
+
+	case NAND_CMD_STATUS:
+		host->col_addr = 0;
+		host->status_request = true;
+		break;
+
+	case NAND_CMD_READ0:
+		host->col_addr = column;
+		host->spare_only = false;
+		useirq = false;
+		break;
+
+	case NAND_CMD_READOOB:
+		host->col_addr = column;
+		host->spare_only = true;
+		useirq = false;
+		if (host->pagesize_2k)
+			command = NAND_CMD_READ0; /* only READ0 is valid */
+		break;
+
+	case NAND_CMD_SEQIN:
+		if (column >= mtd->writesize) {
+			/*
+			 * FIXME: before send SEQIN command for write OOB,
+			 * We must read one page out.
+			 * For K9F1GXX has no READ1 command to set current HW
+			 * pointer to spare area, we must write the whole page
+			 * including OOB together.
+			 */
+			if (host->pagesize_2k)
+				/* call ourself to read a page */
+				mxc_nand_command(mtd, NAND_CMD_READ0, 0,
+						page_addr);
+
+			host->col_addr = column - mtd->writesize;
+			host->spare_only = true;
+
+			/* Set program pointer to spare region */
+			if (!host->pagesize_2k)
+				send_cmd(host, NAND_CMD_READOOB, false);
+		} else {
+			host->spare_only = false;
+			host->col_addr = column;
+
+			/* Set program pointer to page start */
+			if (!host->pagesize_2k)
+				send_cmd(host, NAND_CMD_READ0, false);
+		}
+		useirq = false;
+		break;
+
+	case NAND_CMD_PAGEPROG:
+		send_prog_page(host, 0, host->spare_only);
+
+		if (host->pagesize_2k) {
+			/* data in 4 areas datas */
+			send_prog_page(host, 1, host->spare_only);
+			send_prog_page(host, 2, host->spare_only);
+			send_prog_page(host, 3, host->spare_only);
+		}
+
+		break;
+
+	case NAND_CMD_ERASE1:
+		useirq = false;
+		break;
+	}
+
+	/* Write out the command to the device. */
+	send_cmd(host, command, useirq);
+
+	/* Write out column address, if necessary */
+	if (column != -1) {
+		/*
+		 * MXC NANDFC can only perform full page+spare or
+		 * spare-only read/write.  When the upper layers
+		 * layers perform a read/write buf operation,
+		 * we will used the saved column adress to index into
+		 * the full page.
+		 */
+		send_addr(host, 0, page_addr == -1);
+		if (host->pagesize_2k)
+			/* another col addr cycle for 2k page */
+			send_addr(host, 0, false);
+	}
+
+	/* Write out page address, if necessary */
+	if (page_addr != -1) {
+		/* paddr_0 - p_addr_7 */
+		send_addr(host, (page_addr & 0xff), false);
+
+		if (host->pagesize_2k) {
+			send_addr(host, (page_addr >> 8) & 0xFF, false);
+			if (mtd->size >= 0x40000000)
+				send_addr(host, (page_addr >> 16) & 0xff, true);
+		} else {
+			/* One more address cycle for higher density devices */
+			if (mtd->size >= 0x4000000) {
+				/* paddr_8 - paddr_15 */
+				send_addr(host, (page_addr >> 8) & 0xff, false);
+				send_addr(host, (page_addr >> 16) & 0xff, true);
+			} else
+				/* paddr_8 - paddr_15 */
+				send_addr(host, (page_addr >> 8) & 0xff, true);
+		}
+	}
+
+	/* Command post-processing step */
+	switch (command) {
+
+	case NAND_CMD_RESET:
+		break;
+
+	case NAND_CMD_READOOB:
+	case NAND_CMD_READ0:
+		if (host->pagesize_2k) {
+			/* send read confirm command */
+			send_cmd(host, NAND_CMD_READSTART, true);
+			/* read for each AREA */
+			send_read_page(host, 0, host->spare_only);
+			send_read_page(host, 1, host->spare_only);
+			send_read_page(host, 2, host->spare_only);
+			send_read_page(host, 3, host->spare_only);
+		} else
+			send_read_page(host, 0, host->spare_only);
+		break;
+
+	case NAND_CMD_READID:
+		send_read_id(host);
+		break;
+
+	case NAND_CMD_PAGEPROG:
+		break;
+
+	case NAND_CMD_STATUS:
+		break;
+
+	case NAND_CMD_ERASE2:
+		break;
+	}
+}
+
+static int __init mxcnd_probe(struct platform_device *pdev)
+{
+	struct nand_chip *this;
+	struct mtd_info *mtd;
+	struct mxc_nand_platform_data *pdata = pdev->dev.platform_data;
+	struct mxc_nand_host *host;
+	struct resource *res;
+	uint16_t tmp;
+	int err = 0, nr_parts = 0;
+
+	/* Allocate memory for MTD device structure and private data */
+	host = kzalloc(sizeof(struct mxc_nand_host), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
+
+	host->dev = &pdev->dev;
+	/* structures must be linked */
+	this = &host->nand;
+	mtd = &host->mtd;
+	mtd->priv = this;
+	mtd->owner = THIS_MODULE;
+
+	/* 50 us command delay time */
+	this->chip_delay = 5;
+
+	this->priv = host;
+	this->dev_ready = mxc_nand_dev_ready;
+	this->cmdfunc = mxc_nand_command;
+	this->select_chip = mxc_nand_select_chip;
+	this->read_byte = mxc_nand_read_byte;
+	this->read_word = mxc_nand_read_word;
+	this->write_buf = mxc_nand_write_buf;
+	this->read_buf = mxc_nand_read_buf;
+	this->verify_buf = mxc_nand_verify_buf;
+
+	host->clk = clk_get(&pdev->dev, "nfc_clk");
+	if (IS_ERR(host->clk))
+		goto eclk;
+
+	clk_enable(host->clk);
+	host->clk_act = 1;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -ENODEV;
+		goto eres;
+	}
+
+	host->regs = ioremap(res->start, res->end - res->start + 1);
+	if (!host->regs) {
+		err = -EIO;
+		goto eres;
+	}
+
+	tmp = readw(host->regs + NFC_CONFIG1);
+	tmp |= NFC_INT_MSK;
+	writew(tmp, host->regs + NFC_CONFIG1);
+
+	init_waitqueue_head(&host->irq_waitq);
+
+	host->irq = platform_get_irq(pdev, 0);
+
+	err = request_irq(host->irq, mxc_nfc_irq, 0, "mxc_nd", host);
+	if (err)
+		goto eirq;
+
+	if (pdata->hw_ecc) {
+		this->ecc.calculate = mxc_nand_calculate_ecc;
+		this->ecc.hwctl = mxc_nand_enable_hwecc;
+		this->ecc.correct = mxc_nand_correct_data;
+		this->ecc.mode = NAND_ECC_HW;
+		this->ecc.size = 512;
+		this->ecc.bytes = 3;
+		this->ecc.layout = &nand_hw_eccoob_8;
+		tmp = readw(host->regs + NFC_CONFIG1);
+		tmp |= NFC_ECC_EN;
+		writew(tmp, host->regs + NFC_CONFIG1);
+	} else {
+		this->ecc.size = 512;
+		this->ecc.bytes = 3;
+		this->ecc.layout = &nand_hw_eccoob_8;
+		this->ecc.mode = NAND_ECC_SOFT;
+		tmp = readw(host->regs + NFC_CONFIG1);
+		tmp &= ~NFC_ECC_EN;
+		writew(tmp, host->regs + NFC_CONFIG1);
+	}
+
+	/* Reset NAND */
+	this->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
+	/* preset operation */
+	/* Unlock the internal RAM Buffer */
+	writew(0x2, host->regs + NFC_CONFIG);
+
+	/* Blocks to be unlocked */
+	writew(0x0, host->regs + NFC_UNLOCKSTART_BLKADDR);
+	writew(0x4000, host->regs + NFC_UNLOCKEND_BLKADDR);
+
+	/* Unlock Block Command for given address range */
+	writew(0x4, host->regs + NFC_WRPROT);
+
+	/* NAND bus width determines access funtions used by upper layer */
+	if (pdata->width == 2) {
+		this->options |= NAND_BUSWIDTH_16;
+		this->ecc.layout = &nand_hw_eccoob_16;
+	}
+
+	host->pagesize_2k = 0;
+
+	/* Scan to find existence of the device */
+	if (nand_scan(mtd, 1)) {
+		DEBUG(MTD_DEBUG_LEVEL0,
+		      "MXC_ND: Unable to find any NAND device.\n");
+		err = -ENXIO;
+		goto escan;
+	}
+
+	/* Register the partitions */
+#ifdef CONFIG_MTD_PARTITIONS
+	nr_parts =
+	    parse_mtd_partitions(mtd, part_probes, &host->parts, 0);
+	if (nr_parts > 0)
+		add_mtd_partitions(mtd, host->parts, nr_parts);
+	else
+#endif
+	{
+		pr_info("Registering %s as whole device\n", mtd->name);
+		add_mtd_device(mtd);
+	}
+
+	platform_set_drvdata(pdev, host);
+
+	return 0;
+
+escan:
+	free_irq(host->irq, NULL);
+eirq:
+	iounmap(host->regs);
+eres:
+	clk_put(host->clk);
+eclk:
+	kfree(host);
+
+	return err;
+}
+
+static int __devexit mxcnd_remove(struct platform_device *pdev)
+{
+	struct mxc_nand_host *host = platform_get_drvdata(pdev);
+
+	clk_put(host->clk);
+
+	platform_set_drvdata(pdev, NULL);
+
+	nand_release(&host->mtd);
+	free_irq(host->irq, NULL);
+	iounmap(host->regs);
+	kfree(host);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct mtd_info *info = platform_get_drvdata(pdev);
+	int ret = 0;
+
+	DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n");
+	if (info)
+		ret = info->suspend(info);
+
+	/* Disable the NFC clock */
+	clk_disable(nfc_clk);	/* FIXME */
+
+	return ret;
+}
+
+static int mxcnd_resume(struct platform_device *pdev)
+{
+	struct mtd_info *info = platform_get_drvdata(pdev);
+	int ret = 0;
+
+	DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n");
+	/* Enable the NFC clock */
+	clk_enable(nfc_clk);	/* FIXME */
+
+	if (info)
+		info->resume(info);
+
+	return ret;
+}
+
+#else
+# define mxcnd_suspend   NULL
+# define mxcnd_resume    NULL
+#endif				/* CONFIG_PM */
+
+static struct platform_driver mxcnd_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   },
+	.remove = __exit_p(mxcnd_remove),
+	.suspend = mxcnd_suspend,
+	.resume = mxcnd_resume,
+};
+
+static int __init mxc_nd_init(void)
+{
+	/* Register the device driver structure. */
+	pr_info("MXC MTD nand Driver\n");
+	if (platform_driver_probe(&mxcnd_driver, mxcnd_probe) != 0) {
+		printk(KERN_ERR "Driver register failed for mxcnd_driver\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void __exit mxc_nd_cleanup(void)
+{
+	/* Unregister the device structure */
+	platform_driver_unregister(&mxcnd_driver);
+}
+
+module_init(mxc_nd_init);
+module_exit(mxc_nd_cleanup);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("MXC NAND MTD driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d1129bae6c2..0a9c9cd33f9 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -801,9 +801,9 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
  * nand_read_subpage - [REPLACABLE] software ecc based sub-page read function
  * @mtd:	mtd info structure
  * @chip:	nand chip info structure
- * @dataofs	offset of requested data within the page
- * @readlen	data length
- * @buf:	buffer to store read data
+ * @data_offs:	offset of requested data within the page
+ * @readlen:	data length
+ * @bufpoi:	buffer to store read data
  */
 static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi)
 {
@@ -2042,7 +2042,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		return -EINVAL;
 	}
 
-	instr->fail_addr = 0xffffffff;
+	instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
 	/* Grab the lock and see if the device is available */
 	nand_get_device(chip, mtd, FL_ERASING);
@@ -2318,6 +2318,12 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	/* Select the device */
 	chip->select_chip(mtd, 0);
 
+	/*
+	 * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
+	 * after power-up
+	 */
+	chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
 	/* Send the command for reading device ID */
 	chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 
@@ -2488,6 +2494,8 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
 	/* Check for a chip array */
 	for (i = 1; i < maxchips; i++) {
 		chip->select_chip(mtd, i);
+		/* See comment in nand_get_flash_type for reset */
+		chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
 		/* Send the command for reading device ID */
 		chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 		/* Read manufacturer and device IDs */
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 918a806a847..868147acce2 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -1,13 +1,18 @@
 /*
- * This file contains an ECC algorithm from Toshiba that detects and
- * corrects 1 bit errors in a 256 byte block of data.
+ * This file contains an ECC algorithm that detects and corrects 1 bit
+ * errors in a 256 byte block of data.
  *
  * drivers/mtd/nand/nand_ecc.c
  *
- * Copyright (C) 2000-2004 Steven J. Hill (sjhill@realitydiluted.com)
- *                         Toshiba America Electronics Components, Inc.
+ * Copyright © 2008 Koninklijke Philips Electronics NV.
+ *                  Author: Frans Meulenbroeks
  *
- * Copyright (C) 2006 Thomas Gleixner <tglx@linutronix.de>
+ * Completely replaces the previous ECC implementation which was written by:
+ *   Steven J. Hill (sjhill@realitydiluted.com)
+ *   Thomas Gleixner (tglx@linutronix.de)
+ *
+ * Information on how this algorithm works and how it was developed
+ * can be found in Documentation/mtd/nand_ecc.txt
  *
  * This file is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -23,174 +28,475 @@
  * with this file; if not, write to the Free Software Foundation, Inc.,
  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  *
- * As a special exception, if other files instantiate templates or use
- * macros or inline functions from these files, or you compile these
- * files and link them with other works to produce a work based on these
- * files, these files do not by themselves cause the resulting work to be
- * covered by the GNU General Public License. However the source code for
- * these files must still be made available in accordance with section (3)
- * of the GNU General Public License.
- *
- * This exception does not invalidate any other reasons why a work based on
- * this file might be covered by the GNU General Public License.
  */
 
+/*
+ * The STANDALONE macro is useful when running the code outside the kernel
+ * e.g. when running the code in a testbed or a benchmark program.
+ * When STANDALONE is used, the module related macros are commented out
+ * as well as the linux include files.
+ * Instead a private definition of mtd_info is given to satisfy the compiler
+ * (the code does not use mtd_info, so the code does not care)
+ */
+#ifndef STANDALONE
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
+#include <asm/byteorder.h>
+#else
+#include <stdint.h>
+struct mtd_info;
+#define EXPORT_SYMBOL(x)  /* x */
+
+#define MODULE_LICENSE(x)	/* x */
+#define MODULE_AUTHOR(x)	/* x */
+#define MODULE_DESCRIPTION(x)	/* x */
+
+#define printk printf
+#define KERN_ERR		""
+#endif
+
+/*
+ * invparity is a 256 byte table that contains the odd parity
+ * for each byte. So if the number of bits in a byte is even,
+ * the array element is 1, and when the number of bits is odd
+ * the array eleemnt is 0.
+ */
+static const char invparity[256] = {
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
+};
+
+/*
+ * bitsperbyte contains the number of bits per byte
+ * this is only used for testing and repairing parity
+ * (a precalculated value slightly improves performance)
+ */
+static const char bitsperbyte[256] = {
+	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
 
 /*
- * Pre-calculated 256-way 1 byte column parity
+ * addressbits is a lookup table to filter out the bits from the xor-ed
+ * ecc data that identify the faulty location.
+ * this is only used for repairing parity
+ * see the comments in nand_correct_data for more details
  */
-static const u_char nand_ecc_precalc_table[] = {
-	0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00,
-	0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65,
-	0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66,
-	0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03,
-	0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69,
-	0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c,
-	0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f,
-	0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a,
-	0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a,
-	0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f,
-	0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c,
-	0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69,
-	0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03,
-	0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66,
-	0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65,
-	0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00
+static const char addressbits[256] = {
+	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
 };
 
 /**
- * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block
+ * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
+ *			 block
  * @mtd:	MTD block structure
- * @dat:	raw data
- * @ecc_code:	buffer for ECC
+ * @buf:	input buffer with raw data
+ * @code:	output buffer with ECC
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
-		       u_char *ecc_code)
+int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+		       unsigned char *code)
 {
-	uint8_t idx, reg1, reg2, reg3, tmp1, tmp2;
 	int i;
+	const uint32_t *bp = (uint32_t *)buf;
+	/* 256 or 512 bytes/ecc  */
+	const uint32_t eccsize_mult =
+			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+	uint32_t cur;		/* current value in buffer */
+	/* rp0..rp15..rp17 are the various accumulated parities (per byte) */
+	uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+	uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15, rp16;
+	uint32_t uninitialized_var(rp17);	/* to make compiler happy */
+	uint32_t par;		/* the cumulative parity for all data */
+	uint32_t tmppar;	/* the cumulative parity for this iteration;
+				   for rp12, rp14 and rp16 at the end of the
+				   loop */
+
+	par = 0;
+	rp4 = 0;
+	rp6 = 0;
+	rp8 = 0;
+	rp10 = 0;
+	rp12 = 0;
+	rp14 = 0;
+	rp16 = 0;
+
+	/*
+	 * The loop is unrolled a number of times;
+	 * This avoids if statements to decide on which rp value to update
+	 * Also we process the data by longwords.
+	 * Note: passing unaligned data might give a performance penalty.
+	 * It is assumed that the buffers are aligned.
+	 * tmppar is the cumulative sum of this iteration.
+	 * needed for calculating rp12, rp14, rp16 and par
+	 * also used as a performance improvement for rp6, rp8 and rp10
+	 */
+	for (i = 0; i < eccsize_mult << 2; i++) {
+		cur = *bp++;
+		tmppar = cur;
+		rp4 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp6 ^= tmppar;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp8 ^= tmppar;
 
-	/* Initialize variables */
-	reg1 = reg2 = reg3 = 0;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		rp6 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp6 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp10 ^= tmppar;
 
-	/* Build up column parity */
-	for(i = 0; i < 256; i++) {
-		/* Get CP0 - CP5 from table */
-		idx = nand_ecc_precalc_table[*dat++];
-		reg1 ^= (idx & 0x3f);
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		rp6 ^= cur;
+		rp8 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp6 ^= cur;
+		rp8 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		rp8 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp8 ^= cur;
 
-		/* All bit XOR = 1 ? */
-		if (idx & 0x40) {
-			reg3 ^= (uint8_t) i;
-			reg2 ^= ~((uint8_t) i);
-		}
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		rp6 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp6 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+		rp4 ^= cur;
+		cur = *bp++;
+		tmppar ^= cur;
+
+		par ^= tmppar;
+		if ((i & 0x1) == 0)
+			rp12 ^= tmppar;
+		if ((i & 0x2) == 0)
+			rp14 ^= tmppar;
+		if (eccsize_mult == 2 && (i & 0x4) == 0)
+			rp16 ^= tmppar;
 	}
 
-	/* Create non-inverted ECC code from line parity */
-	tmp1  = (reg3 & 0x80) >> 0; /* B7 -> B7 */
-	tmp1 |= (reg2 & 0x80) >> 1; /* B7 -> B6 */
-	tmp1 |= (reg3 & 0x40) >> 1; /* B6 -> B5 */
-	tmp1 |= (reg2 & 0x40) >> 2; /* B6 -> B4 */
-	tmp1 |= (reg3 & 0x20) >> 2; /* B5 -> B3 */
-	tmp1 |= (reg2 & 0x20) >> 3; /* B5 -> B2 */
-	tmp1 |= (reg3 & 0x10) >> 3; /* B4 -> B1 */
-	tmp1 |= (reg2 & 0x10) >> 4; /* B4 -> B0 */
-
-	tmp2  = (reg3 & 0x08) << 4; /* B3 -> B7 */
-	tmp2 |= (reg2 & 0x08) << 3; /* B3 -> B6 */
-	tmp2 |= (reg3 & 0x04) << 3; /* B2 -> B5 */
-	tmp2 |= (reg2 & 0x04) << 2; /* B2 -> B4 */
-	tmp2 |= (reg3 & 0x02) << 2; /* B1 -> B3 */
-	tmp2 |= (reg2 & 0x02) << 1; /* B1 -> B2 */
-	tmp2 |= (reg3 & 0x01) << 1; /* B0 -> B1 */
-	tmp2 |= (reg2 & 0x01) << 0; /* B7 -> B0 */
-
-	/* Calculate final ECC code */
-#ifdef CONFIG_MTD_NAND_ECC_SMC
-	ecc_code[0] = ~tmp2;
-	ecc_code[1] = ~tmp1;
+	/*
+	 * handle the fact that we use longword operations
+	 * we'll bring rp4..rp14..rp16 back to single byte entities by
+	 * shifting and xoring first fold the upper and lower 16 bits,
+	 * then the upper and lower 8 bits.
+	 */
+	rp4 ^= (rp4 >> 16);
+	rp4 ^= (rp4 >> 8);
+	rp4 &= 0xff;
+	rp6 ^= (rp6 >> 16);
+	rp6 ^= (rp6 >> 8);
+	rp6 &= 0xff;
+	rp8 ^= (rp8 >> 16);
+	rp8 ^= (rp8 >> 8);
+	rp8 &= 0xff;
+	rp10 ^= (rp10 >> 16);
+	rp10 ^= (rp10 >> 8);
+	rp10 &= 0xff;
+	rp12 ^= (rp12 >> 16);
+	rp12 ^= (rp12 >> 8);
+	rp12 &= 0xff;
+	rp14 ^= (rp14 >> 16);
+	rp14 ^= (rp14 >> 8);
+	rp14 &= 0xff;
+	if (eccsize_mult == 2) {
+		rp16 ^= (rp16 >> 16);
+		rp16 ^= (rp16 >> 8);
+		rp16 &= 0xff;
+	}
+
+	/*
+	 * we also need to calculate the row parity for rp0..rp3
+	 * This is present in par, because par is now
+	 * rp3 rp3 rp2 rp2 in little endian and
+	 * rp2 rp2 rp3 rp3 in big endian
+	 * as well as
+	 * rp1 rp0 rp1 rp0 in little endian and
+	 * rp0 rp1 rp0 rp1 in big endian
+	 * First calculate rp2 and rp3
+	 */
+#ifdef __BIG_ENDIAN
+	rp2 = (par >> 16);
+	rp2 ^= (rp2 >> 8);
+	rp2 &= 0xff;
+	rp3 = par & 0xffff;
+	rp3 ^= (rp3 >> 8);
+	rp3 &= 0xff;
 #else
-	ecc_code[0] = ~tmp1;
-	ecc_code[1] = ~tmp2;
+	rp3 = (par >> 16);
+	rp3 ^= (rp3 >> 8);
+	rp3 &= 0xff;
+	rp2 = par & 0xffff;
+	rp2 ^= (rp2 >> 8);
+	rp2 &= 0xff;
 #endif
-	ecc_code[2] = ((~reg1) << 2) | 0x03;
 
-	return 0;
-}
-EXPORT_SYMBOL(nand_calculate_ecc);
+	/* reduce par to 16 bits then calculate rp1 and rp0 */
+	par ^= (par >> 16);
+#ifdef __BIG_ENDIAN
+	rp0 = (par >> 8) & 0xff;
+	rp1 = (par & 0xff);
+#else
+	rp1 = (par >> 8) & 0xff;
+	rp0 = (par & 0xff);
+#endif
 
-static inline int countbits(uint32_t byte)
-{
-	int res = 0;
+	/* finally reduce par to 8 bits */
+	par ^= (par >> 8);
+	par &= 0xff;
 
-	for (;byte; byte >>= 1)
-		res += byte & 0x01;
-	return res;
+	/*
+	 * and calculate rp5..rp15..rp17
+	 * note that par = rp4 ^ rp5 and due to the commutative property
+	 * of the ^ operator we can say:
+	 * rp5 = (par ^ rp4);
+	 * The & 0xff seems superfluous, but benchmarking learned that
+	 * leaving it out gives slightly worse results. No idea why, probably
+	 * it has to do with the way the pipeline in pentium is organized.
+	 */
+	rp5 = (par ^ rp4) & 0xff;
+	rp7 = (par ^ rp6) & 0xff;
+	rp9 = (par ^ rp8) & 0xff;
+	rp11 = (par ^ rp10) & 0xff;
+	rp13 = (par ^ rp12) & 0xff;
+	rp15 = (par ^ rp14) & 0xff;
+	if (eccsize_mult == 2)
+		rp17 = (par ^ rp16) & 0xff;
+
+	/*
+	 * Finally calculate the ecc bits.
+	 * Again here it might seem that there are performance optimisations
+	 * possible, but benchmarks showed that on the system this is developed
+	 * the code below is the fastest
+	 */
+#ifdef CONFIG_MTD_NAND_ECC_SMC
+	code[0] =
+	    (invparity[rp7] << 7) |
+	    (invparity[rp6] << 6) |
+	    (invparity[rp5] << 5) |
+	    (invparity[rp4] << 4) |
+	    (invparity[rp3] << 3) |
+	    (invparity[rp2] << 2) |
+	    (invparity[rp1] << 1) |
+	    (invparity[rp0]);
+	code[1] =
+	    (invparity[rp15] << 7) |
+	    (invparity[rp14] << 6) |
+	    (invparity[rp13] << 5) |
+	    (invparity[rp12] << 4) |
+	    (invparity[rp11] << 3) |
+	    (invparity[rp10] << 2) |
+	    (invparity[rp9] << 1)  |
+	    (invparity[rp8]);
+#else
+	code[1] =
+	    (invparity[rp7] << 7) |
+	    (invparity[rp6] << 6) |
+	    (invparity[rp5] << 5) |
+	    (invparity[rp4] << 4) |
+	    (invparity[rp3] << 3) |
+	    (invparity[rp2] << 2) |
+	    (invparity[rp1] << 1) |
+	    (invparity[rp0]);
+	code[0] =
+	    (invparity[rp15] << 7) |
+	    (invparity[rp14] << 6) |
+	    (invparity[rp13] << 5) |
+	    (invparity[rp12] << 4) |
+	    (invparity[rp11] << 3) |
+	    (invparity[rp10] << 2) |
+	    (invparity[rp9] << 1)  |
+	    (invparity[rp8]);
+#endif
+	if (eccsize_mult == 1)
+		code[2] =
+		    (invparity[par & 0xf0] << 7) |
+		    (invparity[par & 0x0f] << 6) |
+		    (invparity[par & 0xcc] << 5) |
+		    (invparity[par & 0x33] << 4) |
+		    (invparity[par & 0xaa] << 3) |
+		    (invparity[par & 0x55] << 2) |
+		    3;
+	else
+		code[2] =
+		    (invparity[par & 0xf0] << 7) |
+		    (invparity[par & 0x0f] << 6) |
+		    (invparity[par & 0xcc] << 5) |
+		    (invparity[par & 0x33] << 4) |
+		    (invparity[par & 0xaa] << 3) |
+		    (invparity[par & 0x55] << 2) |
+		    (invparity[rp17] << 1) |
+		    (invparity[rp16] << 0);
+	return 0;
 }
+EXPORT_SYMBOL(nand_calculate_ecc);
 
 /**
  * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
  * @mtd:	MTD block structure
- * @dat:	raw data read from the chip
+ * @buf:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
  *
- * Detect and correct a 1 bit error for 256 byte block
+ * Detect and correct a 1 bit error for 256/512 byte block
  */
-int nand_correct_data(struct mtd_info *mtd, u_char *dat,
-		      u_char *read_ecc, u_char *calc_ecc)
+int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+		      unsigned char *read_ecc, unsigned char *calc_ecc)
 {
-	uint8_t s0, s1, s2;
+	unsigned char b0, b1, b2;
+	unsigned char byte_addr, bit_addr;
+	/* 256 or 512 bytes/ecc  */
+	const uint32_t eccsize_mult =
+			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
 
+	/*
+	 * b0 to b2 indicate which bit is faulty (if any)
+	 * we might need the xor result  more than once,
+	 * so keep them in a local var
+	*/
 #ifdef CONFIG_MTD_NAND_ECC_SMC
-	s0 = calc_ecc[0] ^ read_ecc[0];
-	s1 = calc_ecc[1] ^ read_ecc[1];
-	s2 = calc_ecc[2] ^ read_ecc[2];
+	b0 = read_ecc[0] ^ calc_ecc[0];
+	b1 = read_ecc[1] ^ calc_ecc[1];
 #else
-	s1 = calc_ecc[0] ^ read_ecc[0];
-	s0 = calc_ecc[1] ^ read_ecc[1];
-	s2 = calc_ecc[2] ^ read_ecc[2];
+	b0 = read_ecc[1] ^ calc_ecc[1];
+	b1 = read_ecc[0] ^ calc_ecc[0];
 #endif
-	if ((s0 | s1 | s2) == 0)
-		return 0;
-
-	/* Check for a single bit error */
-	if( ((s0 ^ (s0 >> 1)) & 0x55) == 0x55 &&
-	    ((s1 ^ (s1 >> 1)) & 0x55) == 0x55 &&
-	    ((s2 ^ (s2 >> 1)) & 0x54) == 0x54) {
+	b2 = read_ecc[2] ^ calc_ecc[2];
 
-		uint32_t byteoffs, bitnum;
+	/* check if there are any bitfaults */
 
-		byteoffs = (s1 << 0) & 0x80;
-		byteoffs |= (s1 << 1) & 0x40;
-		byteoffs |= (s1 << 2) & 0x20;
-		byteoffs |= (s1 << 3) & 0x10;
+	/* repeated if statements are slightly more efficient than switch ... */
+	/* ordered in order of likelihood */
 
-		byteoffs |= (s0 >> 4) & 0x08;
-		byteoffs |= (s0 >> 3) & 0x04;
-		byteoffs |= (s0 >> 2) & 0x02;
-		byteoffs |= (s0 >> 1) & 0x01;
-
-		bitnum = (s2 >> 5) & 0x04;
-		bitnum |= (s2 >> 4) & 0x02;
-		bitnum |= (s2 >> 3) & 0x01;
-
-		dat[byteoffs] ^= (1 << bitnum);
+	if ((b0 | b1 | b2) == 0)
+		return 0;	/* no error */
 
+	if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
+	    (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
+	    ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
+	     (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
+	/* single bit error */
+		/*
+		 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
+		 * byte, cp 5/3/1 indicate the faulty bit.
+		 * A lookup table (called addressbits) is used to filter
+		 * the bits from the byte they are in.
+		 * A marginal optimisation is possible by having three
+		 * different lookup tables.
+		 * One as we have now (for b0), one for b2
+		 * (that would avoid the >> 1), and one for b1 (with all values
+		 * << 4). However it was felt that introducing two more tables
+		 * hardly justify the gain.
+		 *
+		 * The b2 shift is there to get rid of the lowest two bits.
+		 * We could also do addressbits[b2] >> 1 but for the
+		 * performace it does not make any difference
+		 */
+		if (eccsize_mult == 1)
+			byte_addr = (addressbits[b1] << 4) + addressbits[b0];
+		else
+			byte_addr = (addressbits[b2 & 0x3] << 8) +
+				    (addressbits[b1] << 4) + addressbits[b0];
+		bit_addr = addressbits[b2 >> 2];
+		/* flip the bit */
+		buf[byte_addr] ^= (1 << bit_addr);
 		return 1;
-	}
 
-	if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1)
-		return 1;
+	}
+	/* count nr of bits; use table lookup, faster than calculating it */
+	if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
+		return 1;	/* error in ecc data; no action needed */
 
-	return -EBADMSG;
+	printk(KERN_ERR "uncorrectable error : ");
+	return -1;
 }
 EXPORT_SYMBOL(nand_correct_data);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>");
+MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
 MODULE_DESCRIPTION("Generic NAND ECC support");
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 556e8131ecd..ae7c57781a6 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -38,7 +38,6 @@
 #include <linux/delay.h>
 #include <linux/list.h>
 #include <linux/random.h>
-#include <asm/div64.h>
 
 /* Default simulator parameters values */
 #if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE)  || \
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index a64ad15b8fd..c0fa9c9edf0 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -115,55 +115,11 @@ enum {
 	STATE_PIO_WRITING,
 };
 
-struct pxa3xx_nand_timing {
-	unsigned int	tCH;  /* Enable signal hold time */
-	unsigned int	tCS;  /* Enable signal setup time */
-	unsigned int	tWH;  /* ND_nWE high duration */
-	unsigned int	tWP;  /* ND_nWE pulse time */
-	unsigned int	tRH;  /* ND_nRE high duration */
-	unsigned int	tRP;  /* ND_nRE pulse width */
-	unsigned int	tR;   /* ND_nWE high to ND_nRE low for read */
-	unsigned int	tWHR; /* ND_nWE high to ND_nRE low for status read */
-	unsigned int	tAR;  /* ND_ALE low to ND_nRE low delay */
-};
-
-struct pxa3xx_nand_cmdset {
-	uint16_t	read1;
-	uint16_t	read2;
-	uint16_t	program;
-	uint16_t	read_status;
-	uint16_t	read_id;
-	uint16_t	erase;
-	uint16_t	reset;
-	uint16_t	lock;
-	uint16_t	unlock;
-	uint16_t	lock_status;
-};
-
-struct pxa3xx_nand_flash {
-	struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
-	struct pxa3xx_nand_cmdset *cmdset;
-
-	uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
-	uint32_t page_size;	/* Page size in bytes (PAGE_SZ) */
-	uint32_t flash_width;	/* Width of Flash memory (DWIDTH_M) */
-	uint32_t dfc_width;	/* Width of flash controller(DWIDTH_C) */
-	uint32_t num_blocks;	/* Number of physical blocks in Flash */
-	uint32_t chip_id;
-
-	/* NOTE: these are automatically calculated, do not define */
-	size_t		oob_size;
-	size_t		read_id_bytes;
-
-	unsigned int	col_addr_cycles;
-	unsigned int	row_addr_cycles;
-};
-
 struct pxa3xx_nand_info {
 	struct nand_chip	nand_chip;
 
 	struct platform_device	 *pdev;
-	struct pxa3xx_nand_flash *flash_info;
+	const struct pxa3xx_nand_flash *flash_info;
 
 	struct clk		*clk;
 	void __iomem		*mmio_base;
@@ -202,12 +158,20 @@ struct pxa3xx_nand_info {
 	uint32_t		ndcb0;
 	uint32_t		ndcb1;
 	uint32_t		ndcb2;
+
+	/* calculated from pxa3xx_nand_flash data */
+	size_t		oob_size;
+	size_t		read_id_bytes;
+
+	unsigned int	col_addr_cycles;
+	unsigned int	row_addr_cycles;
 };
 
 static int use_dma = 1;
 module_param(use_dma, bool, 0444);
 MODULE_PARM_DESC(use_dma, "enable DMA for data transfering to/from NAND HW");
 
+#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
 static struct pxa3xx_nand_cmdset smallpage_cmdset = {
 	.read1		= 0x0000,
 	.read2		= 0x0050,
@@ -291,11 +255,35 @@ static struct pxa3xx_nand_flash micron1GbX16 = {
 	.chip_id	= 0xb12c,
 };
 
+static struct pxa3xx_nand_timing stm2GbX16_timing = {
+	.tCH = 10,
+	.tCS = 35,
+	.tWH = 15,
+	.tWP = 25,
+	.tRH = 15,
+	.tRP = 25,
+	.tR = 25000,
+	.tWHR = 60,
+	.tAR = 10,
+};
+
+static struct pxa3xx_nand_flash stm2GbX16 = {
+	.timing = &stm2GbX16_timing,
+	.page_per_block = 64,
+	.page_size = 2048,
+	.flash_width = 16,
+	.dfc_width = 16,
+	.num_blocks = 2048,
+	.chip_id = 0xba20,
+};
+
 static struct pxa3xx_nand_flash *builtin_flash_types[] = {
 	&samsung512MbX16,
 	&micron1GbX8,
 	&micron1GbX16,
+	&stm2GbX16,
 };
+#endif /* CONFIG_MTD_NAND_PXA3xx_BUILTIN */
 
 #define NDTR0_tCH(c)	(min((c), 7) << 19)
 #define NDTR0_tCS(c)	(min((c), 7) << 16)
@@ -312,7 +300,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = {
 #define ns2cycle(ns, clk)	(int)(((ns) * (clk / 1000000) / 1000) + 1)
 
 static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info,
-				   struct pxa3xx_nand_timing *t)
+				   const struct pxa3xx_nand_timing *t)
 {
 	unsigned long nand_clk = clk_get_rate(info->clk);
 	uint32_t ndtr0, ndtr1;
@@ -354,8 +342,8 @@ static int wait_for_event(struct pxa3xx_nand_info *info, uint32_t event)
 static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
 			uint16_t cmd, int column, int page_addr)
 {
-	struct pxa3xx_nand_flash *f = info->flash_info;
-	struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
+	const struct pxa3xx_nand_flash *f = info->flash_info;
+	const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
 
 	/* calculate data size */
 	switch (f->page_size) {
@@ -373,14 +361,14 @@ static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
 	info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
 	info->ndcb1 = 0;
 	info->ndcb2 = 0;
-	info->ndcb0 |= NDCB0_ADDR_CYC(f->row_addr_cycles + f->col_addr_cycles);
+	info->ndcb0 |= NDCB0_ADDR_CYC(info->row_addr_cycles + info->col_addr_cycles);
 
-	if (f->col_addr_cycles == 2) {
+	if (info->col_addr_cycles == 2) {
 		/* large block, 2 cycles for column address
 		 * row address starts from 3rd cycle
 		 */
 		info->ndcb1 |= (page_addr << 16) | (column & 0xffff);
-		if (f->row_addr_cycles == 3)
+		if (info->row_addr_cycles == 3)
 			info->ndcb2 = (page_addr >> 16) & 0xff;
 	} else
 		/* small block, 1 cycles for column address
@@ -406,7 +394,7 @@ static int prepare_erase_cmd(struct pxa3xx_nand_info *info,
 
 static int prepare_other_cmd(struct pxa3xx_nand_info *info, uint16_t cmd)
 {
-	struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
+	const struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
 
 	info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
 	info->ndcb1 = 0;
@@ -641,8 +629,8 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 				int column, int page_addr)
 {
 	struct pxa3xx_nand_info *info = mtd->priv;
-	struct pxa3xx_nand_flash *flash_info = info->flash_info;
-	struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
+	const struct pxa3xx_nand_flash *flash_info = info->flash_info;
+	const struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
 	int ret;
 
 	info->use_dma = (use_dma) ? 1 : 0;
@@ -720,7 +708,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 		info->use_dma = 0;	/* force PIO read */
 		info->buf_start = 0;
 		info->buf_count = (command == NAND_CMD_READID) ?
-				flash_info->read_id_bytes : 1;
+				info->read_id_bytes : 1;
 
 		if (prepare_other_cmd(info, (command == NAND_CMD_READID) ?
 				cmdset->read_id : cmdset->read_status))
@@ -861,8 +849,8 @@ static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
 
 static int __readid(struct pxa3xx_nand_info *info, uint32_t *id)
 {
-	struct pxa3xx_nand_flash *f = info->flash_info;
-	struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
+	const struct pxa3xx_nand_flash *f = info->flash_info;
+	const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
 	uint32_t ndcr;
 	uint8_t  id_buff[8];
 
@@ -891,7 +879,7 @@ fail_timeout:
 }
 
 static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
-				    struct pxa3xx_nand_flash *f)
+				    const struct pxa3xx_nand_flash *f)
 {
 	struct platform_device *pdev = info->pdev;
 	struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
@@ -904,25 +892,25 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
 		return -EINVAL;
 
 	/* calculate flash information */
-	f->oob_size = (f->page_size == 2048) ? 64 : 16;
-	f->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
+	info->oob_size = (f->page_size == 2048) ? 64 : 16;
+	info->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
 
 	/* calculate addressing information */
-	f->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
+	info->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
 
 	if (f->num_blocks * f->page_per_block > 65536)
-		f->row_addr_cycles = 3;
+		info->row_addr_cycles = 3;
 	else
-		f->row_addr_cycles = 2;
+		info->row_addr_cycles = 2;
 
 	ndcr |= (pdata->enable_arbiter) ? NDCR_ND_ARB_EN : 0;
-	ndcr |= (f->col_addr_cycles == 2) ? NDCR_RA_START : 0;
+	ndcr |= (info->col_addr_cycles == 2) ? NDCR_RA_START : 0;
 	ndcr |= (f->page_per_block == 64) ? NDCR_PG_PER_BLK : 0;
 	ndcr |= (f->page_size == 2048) ? NDCR_PAGE_SZ : 0;
 	ndcr |= (f->flash_width == 16) ? NDCR_DWIDTH_M : 0;
 	ndcr |= (f->dfc_width == 16) ? NDCR_DWIDTH_C : 0;
 
-	ndcr |= NDCR_RD_ID_CNT(f->read_id_bytes);
+	ndcr |= NDCR_RD_ID_CNT(info->read_id_bytes);
 	ndcr |= NDCR_SPARE_EN; /* enable spare by default */
 
 	info->reg_ndcr = ndcr;
@@ -932,12 +920,27 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
 	return 0;
 }
 
-static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info)
+static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
+				    const struct pxa3xx_nand_platform_data *pdata)
 {
-	struct pxa3xx_nand_flash *f;
-	uint32_t id;
+	const struct pxa3xx_nand_flash *f;
+	uint32_t id = -1;
 	int i;
 
+	for (i = 0; i<pdata->num_flash; ++i) {
+		f = pdata->flash + i;
+
+		if (pxa3xx_nand_config_flash(info, f))
+			continue;
+
+		if (__readid(info, &id))
+			continue;
+
+		if (id == f->chip_id)
+			return 0;
+	}
+
+#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
 	for (i = 0; i < ARRAY_SIZE(builtin_flash_types); i++) {
 
 		f = builtin_flash_types[i];
@@ -951,7 +954,11 @@ static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info)
 		if (id == f->chip_id)
 			return 0;
 	}
+#endif
 
+	dev_warn(&info->pdev->dev,
+		 "failed to detect configured nand flash; found %04x instead of\n",
+		 id);
 	return -ENODEV;
 }
 
@@ -1014,7 +1021,7 @@ static struct nand_ecclayout hw_largepage_ecclayout = {
 static void pxa3xx_nand_init_mtd(struct mtd_info *mtd,
 				 struct pxa3xx_nand_info *info)
 {
-	struct pxa3xx_nand_flash *f = info->flash_info;
+	const struct pxa3xx_nand_flash *f = info->flash_info;
 	struct nand_chip *this = &info->nand_chip;
 
 	this->options = (f->flash_width == 16) ? NAND_BUSWIDTH_16: 0;
@@ -1135,7 +1142,7 @@ static int pxa3xx_nand_probe(struct platform_device *pdev)
 		goto fail_free_buf;
 	}
 
-	ret = pxa3xx_nand_detect_flash(info);
+	ret = pxa3xx_nand_detect_flash(info, pdata);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to detect flash\n");
 		ret = -ENODEV;
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
new file mode 100644
index 00000000000..821acb08ff1
--- /dev/null
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -0,0 +1,878 @@
+/*
+ * SuperH FLCTL nand controller
+ *
+ * Copyright © 2008 Renesas Solutions Corp.
+ * Copyright © 2008 Atom Create Engineering Co., Ltd.
+ *
+ * Based on fsl_elbc_nand.c, Copyright © 2006-2007 Freescale Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/sh_flctl.h>
+
+static struct nand_ecclayout flctl_4secc_oob_16 = {
+	.eccbytes = 10,
+	.eccpos = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+	.oobfree = {
+		{.offset = 12,
+		. length = 4} },
+};
+
+static struct nand_ecclayout flctl_4secc_oob_64 = {
+	.eccbytes = 10,
+	.eccpos = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57},
+	.oobfree = {
+		{.offset = 60,
+		. length = 4} },
+};
+
+static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
+
+static struct nand_bbt_descr flctl_4secc_smallpage = {
+	.options = NAND_BBT_SCAN2NDPAGE,
+	.offs = 11,
+	.len = 1,
+	.pattern = scan_ff_pattern,
+};
+
+static struct nand_bbt_descr flctl_4secc_largepage = {
+	.options = 0,
+	.offs = 58,
+	.len = 2,
+	.pattern = scan_ff_pattern,
+};
+
+static void empty_fifo(struct sh_flctl *flctl)
+{
+	writel(0x000c0000, FLINTDMACR(flctl));	/* FIFO Clear */
+	writel(0x00000000, FLINTDMACR(flctl));	/* Clear Error flags */
+}
+
+static void start_translation(struct sh_flctl *flctl)
+{
+	writeb(TRSTRT, FLTRCR(flctl));
+}
+
+static void wait_completion(struct sh_flctl *flctl)
+{
+	uint32_t timeout = LOOP_TIMEOUT_MAX;
+
+	while (timeout--) {
+		if (readb(FLTRCR(flctl)) & TREND) {
+			writeb(0x0, FLTRCR(flctl));
+			return;
+		}
+		udelay(1);
+	}
+
+	printk(KERN_ERR "wait_completion(): Timeout occured \n");
+	writeb(0x0, FLTRCR(flctl));
+}
+
+static void set_addr(struct mtd_info *mtd, int column, int page_addr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint32_t addr = 0;
+
+	if (column == -1) {
+		addr = page_addr;	/* ERASE1 */
+	} else if (page_addr != -1) {
+		/* SEQIN, READ0, etc.. */
+		if (flctl->page_size) {
+			addr = column & 0x0FFF;
+			addr |= (page_addr & 0xff) << 16;
+			addr |= ((page_addr >> 8) & 0xff) << 24;
+			/* big than 128MB */
+			if (flctl->rw_ADRCNT == ADRCNT2_E) {
+				uint32_t 	addr2;
+				addr2 = (page_addr >> 16) & 0xff;
+				writel(addr2, FLADR2(flctl));
+			}
+		} else {
+			addr = column;
+			addr |= (page_addr & 0xff) << 8;
+			addr |= ((page_addr >> 8) & 0xff) << 16;
+			addr |= ((page_addr >> 16) & 0xff) << 24;
+		}
+	}
+	writel(addr, FLADR(flctl));
+}
+
+static void wait_rfifo_ready(struct sh_flctl *flctl)
+{
+	uint32_t timeout = LOOP_TIMEOUT_MAX;
+
+	while (timeout--) {
+		uint32_t val;
+		/* check FIFO */
+		val = readl(FLDTCNTR(flctl)) >> 16;
+		if (val & 0xFF)
+			return;
+		udelay(1);
+	}
+	printk(KERN_ERR "wait_rfifo_ready(): Timeout occured \n");
+}
+
+static void wait_wfifo_ready(struct sh_flctl *flctl)
+{
+	uint32_t len, timeout = LOOP_TIMEOUT_MAX;
+
+	while (timeout--) {
+		/* check FIFO */
+		len = (readl(FLDTCNTR(flctl)) >> 16) & 0xFF;
+		if (len >= 4)
+			return;
+		udelay(1);
+	}
+	printk(KERN_ERR "wait_wfifo_ready(): Timeout occured \n");
+}
+
+static int wait_recfifo_ready(struct sh_flctl *flctl)
+{
+	uint32_t timeout = LOOP_TIMEOUT_MAX;
+	int checked[4];
+	void __iomem *ecc_reg[4];
+	int i;
+	uint32_t data, size;
+
+	memset(checked, 0, sizeof(checked));
+
+	while (timeout--) {
+		size = readl(FLDTCNTR(flctl)) >> 24;
+		if (size & 0xFF)
+			return 0;	/* success */
+
+		if (readl(FL4ECCCR(flctl)) & _4ECCFA)
+			return 1;	/* can't correct */
+
+		udelay(1);
+		if (!(readl(FL4ECCCR(flctl)) & _4ECCEND))
+			continue;
+
+		/* start error correction */
+		ecc_reg[0] = FL4ECCRESULT0(flctl);
+		ecc_reg[1] = FL4ECCRESULT1(flctl);
+		ecc_reg[2] = FL4ECCRESULT2(flctl);
+		ecc_reg[3] = FL4ECCRESULT3(flctl);
+
+		for (i = 0; i < 3; i++) {
+			data = readl(ecc_reg[i]);
+			if (data != INIT_FL4ECCRESULT_VAL && !checked[i]) {
+				uint8_t org;
+				int index;
+
+				index = data >> 16;
+				org = flctl->done_buff[index];
+				flctl->done_buff[index] = org ^ (data & 0xFF);
+				checked[i] = 1;
+			}
+		}
+
+		writel(0, FL4ECCCR(flctl));
+	}
+
+	printk(KERN_ERR "wait_recfifo_ready(): Timeout occured \n");
+	return 1;	/* timeout */
+}
+
+static void wait_wecfifo_ready(struct sh_flctl *flctl)
+{
+	uint32_t timeout = LOOP_TIMEOUT_MAX;
+	uint32_t len;
+
+	while (timeout--) {
+		/* check FLECFIFO */
+		len = (readl(FLDTCNTR(flctl)) >> 24) & 0xFF;
+		if (len >= 4)
+			return;
+		udelay(1);
+	}
+	printk(KERN_ERR "wait_wecfifo_ready(): Timeout occured \n");
+}
+
+static void read_datareg(struct sh_flctl *flctl, int offset)
+{
+	unsigned long data;
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
+
+	wait_completion(flctl);
+
+	data = readl(FLDATAR(flctl));
+	*buf = le32_to_cpu(data);
+}
+
+static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+{
+	int i, len_4align;
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
+	void *fifo_addr = (void *)FLDTFIFO(flctl);
+
+	len_4align = (rlen + 3) / 4;
+
+	for (i = 0; i < len_4align; i++) {
+		wait_rfifo_ready(flctl);
+		buf[i] = readl(fifo_addr);
+		buf[i] = be32_to_cpu(buf[i]);
+	}
+}
+
+static int read_ecfiforeg(struct sh_flctl *flctl, uint8_t *buff)
+{
+	int i;
+	unsigned long *ecc_buf = (unsigned long *)buff;
+	void *fifo_addr = (void *)FLECFIFO(flctl);
+
+	for (i = 0; i < 4; i++) {
+		if (wait_recfifo_ready(flctl))
+			return 1;
+		ecc_buf[i] = readl(fifo_addr);
+		ecc_buf[i] = be32_to_cpu(ecc_buf[i]);
+	}
+
+	return 0;
+}
+
+static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+{
+	int i, len_4align;
+	unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
+	void *fifo_addr = (void *)FLDTFIFO(flctl);
+
+	len_4align = (rlen + 3) / 4;
+	for (i = 0; i < len_4align; i++) {
+		wait_wfifo_ready(flctl);
+		writel(cpu_to_be32(data[i]), fifo_addr);
+	}
+}
+
+static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_val)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+	uint32_t flcmdcr_val, addr_len_bytes = 0;
+
+	/* Set SNAND bit if page size is 2048byte */
+	if (flctl->page_size)
+		flcmncr_val |= SNAND_E;
+	else
+		flcmncr_val &= ~SNAND_E;
+
+	/* default FLCMDCR val */
+	flcmdcr_val = DOCMD1_E | DOADR_E;
+
+	/* Set for FLCMDCR */
+	switch (cmd) {
+	case NAND_CMD_ERASE1:
+		addr_len_bytes = flctl->erase_ADRCNT;
+		flcmdcr_val |= DOCMD2_E;
+		break;
+	case NAND_CMD_READ0:
+	case NAND_CMD_READOOB:
+		addr_len_bytes = flctl->rw_ADRCNT;
+		flcmdcr_val |= CDSRC_E;
+		break;
+	case NAND_CMD_SEQIN:
+		/* This case is that cmd is READ0 or READ1 or READ00 */
+		flcmdcr_val &= ~DOADR_E;	/* ONLY execute 1st cmd */
+		break;
+	case NAND_CMD_PAGEPROG:
+		addr_len_bytes = flctl->rw_ADRCNT;
+		flcmdcr_val |= DOCMD2_E | CDSRC_E | SELRW;
+		break;
+	case NAND_CMD_READID:
+		flcmncr_val &= ~SNAND_E;
+		addr_len_bytes = ADRCNT_1;
+		break;
+	case NAND_CMD_STATUS:
+	case NAND_CMD_RESET:
+		flcmncr_val &= ~SNAND_E;
+		flcmdcr_val &= ~(DOADR_E | DOSR_E);
+		break;
+	default:
+		break;
+	}
+
+	/* Set address bytes parameter */
+	flcmdcr_val |= addr_len_bytes;
+
+	/* Now actually write */
+	writel(flcmncr_val, FLCMNCR(flctl));
+	writel(flcmdcr_val, FLCMDCR(flctl));
+	writel(flcmcdr_val, FLCMCDR(flctl));
+}
+
+static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf)
+{
+	int i, eccsize = chip->ecc.size;
+	int eccbytes = chip->ecc.bytes;
+	int eccsteps = chip->ecc.steps;
+	uint8_t *p = buf;
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
+		chip->read_buf(mtd, p, eccsize);
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+		if (flctl->hwecc_cant_correct[i])
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += 0;
+	}
+
+	return 0;
+}
+
+static void flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+				   const uint8_t *buf)
+{
+	int i, eccsize = chip->ecc.size;
+	int eccbytes = chip->ecc.bytes;
+	int eccsteps = chip->ecc.steps;
+	const uint8_t *p = buf;
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
+		chip->write_buf(mtd, p, eccsize);
+}
+
+static void execmd_read_page_sector(struct mtd_info *mtd, int page_addr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int sector, page_sectors;
+
+	if (flctl->page_size)
+		page_sectors = 4;
+	else
+		page_sectors = 1;
+
+	writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE | _4ECCCORRECT,
+		 FLCMNCR(flctl));
+
+	set_cmd_regs(mtd, NAND_CMD_READ0,
+		(NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
+
+	for (sector = 0; sector < page_sectors; sector++) {
+		int ret;
+
+		empty_fifo(flctl);
+		writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
+		writel(page_addr << 2 | sector, FLADR(flctl));
+
+		start_translation(flctl);
+		read_fiforeg(flctl, 512, 512 * sector);
+
+		ret = read_ecfiforeg(flctl,
+			&flctl->done_buff[mtd->writesize + 16 * sector]);
+
+		if (ret)
+			flctl->hwecc_cant_correct[sector] = 1;
+
+		writel(0x0, FL4ECCCR(flctl));
+		wait_completion(flctl);
+	}
+	writel(readl(FLCMNCR(flctl)) & ~(ACM_SACCES_MODE | _4ECCCORRECT),
+			FLCMNCR(flctl));
+}
+
+static void execmd_read_oob(struct mtd_info *mtd, int page_addr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+
+	set_cmd_regs(mtd, NAND_CMD_READ0,
+		(NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
+
+	empty_fifo(flctl);
+	if (flctl->page_size) {
+		int i;
+		/* In case that the page size is 2k */
+		for (i = 0; i < 16 * 3; i++)
+			flctl->done_buff[i] = 0xFF;
+
+		set_addr(mtd, 3 * 528 + 512, page_addr);
+		writel(16, FLDTCNTR(flctl));
+
+		start_translation(flctl);
+		read_fiforeg(flctl, 16, 16 * 3);
+		wait_completion(flctl);
+	} else {
+		/* In case that the page size is 512b */
+		set_addr(mtd, 512, page_addr);
+		writel(16, FLDTCNTR(flctl));
+
+		start_translation(flctl);
+		read_fiforeg(flctl, 16, 0);
+		wait_completion(flctl);
+	}
+}
+
+static void execmd_write_page_sector(struct mtd_info *mtd)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int i, page_addr = flctl->seqin_page_addr;
+	int sector, page_sectors;
+
+	if (flctl->page_size)
+		page_sectors = 4;
+	else
+		page_sectors = 1;
+
+	writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE, FLCMNCR(flctl));
+
+	set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
+			(NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
+
+	for (sector = 0; sector < page_sectors; sector++) {
+		empty_fifo(flctl);
+		writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
+		writel(page_addr << 2 | sector, FLADR(flctl));
+
+		start_translation(flctl);
+		write_fiforeg(flctl, 512, 512 * sector);
+
+		for (i = 0; i < 4; i++) {
+			wait_wecfifo_ready(flctl); /* wait for write ready */
+			writel(0xFFFFFFFF, FLECFIFO(flctl));
+		}
+		wait_completion(flctl);
+	}
+
+	writel(readl(FLCMNCR(flctl)) & ~ACM_SACCES_MODE, FLCMNCR(flctl));
+}
+
+static void execmd_write_oob(struct mtd_info *mtd)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int page_addr = flctl->seqin_page_addr;
+	int sector, page_sectors;
+
+	if (flctl->page_size) {
+		sector = 3;
+		page_sectors = 4;
+	} else {
+		sector = 0;
+		page_sectors = 1;
+	}
+
+	set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
+			(NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
+
+	for (; sector < page_sectors; sector++) {
+		empty_fifo(flctl);
+		set_addr(mtd, sector * 528 + 512, page_addr);
+		writel(16, FLDTCNTR(flctl));	/* set read size */
+
+		start_translation(flctl);
+		write_fiforeg(flctl, 16, 16 * sector);
+		wait_completion(flctl);
+	}
+}
+
+static void flctl_cmdfunc(struct mtd_info *mtd, unsigned int command,
+			int column, int page_addr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint32_t read_cmd = 0;
+
+	flctl->read_bytes = 0;
+	if (command != NAND_CMD_PAGEPROG)
+		flctl->index = 0;
+
+	switch (command) {
+	case NAND_CMD_READ1:
+	case NAND_CMD_READ0:
+		if (flctl->hwecc) {
+			/* read page with hwecc */
+			execmd_read_page_sector(mtd, page_addr);
+			break;
+		}
+		empty_fifo(flctl);
+		if (flctl->page_size)
+			set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
+				| command);
+		else
+			set_cmd_regs(mtd, command, command);
+
+		set_addr(mtd, 0, page_addr);
+
+		flctl->read_bytes = mtd->writesize + mtd->oobsize;
+		flctl->index += column;
+		goto read_normal_exit;
+
+	case NAND_CMD_READOOB:
+		if (flctl->hwecc) {
+			/* read page with hwecc */
+			execmd_read_oob(mtd, page_addr);
+			break;
+		}
+
+		empty_fifo(flctl);
+		if (flctl->page_size) {
+			set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
+				| NAND_CMD_READ0);
+			set_addr(mtd, mtd->writesize, page_addr);
+		} else {
+			set_cmd_regs(mtd, command, command);
+			set_addr(mtd, 0, page_addr);
+		}
+		flctl->read_bytes = mtd->oobsize;
+		goto read_normal_exit;
+
+	case NAND_CMD_READID:
+		empty_fifo(flctl);
+		set_cmd_regs(mtd, command, command);
+		set_addr(mtd, 0, 0);
+
+		flctl->read_bytes = 4;
+		writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+		start_translation(flctl);
+		read_datareg(flctl, 0);	/* read and end */
+		break;
+
+	case NAND_CMD_ERASE1:
+		flctl->erase1_page_addr = page_addr;
+		break;
+
+	case NAND_CMD_ERASE2:
+		set_cmd_regs(mtd, NAND_CMD_ERASE1,
+			(command << 8) | NAND_CMD_ERASE1);
+		set_addr(mtd, -1, flctl->erase1_page_addr);
+		start_translation(flctl);
+		wait_completion(flctl);
+		break;
+
+	case NAND_CMD_SEQIN:
+		if (!flctl->page_size) {
+			/* output read command */
+			if (column >= mtd->writesize) {
+				column -= mtd->writesize;
+				read_cmd = NAND_CMD_READOOB;
+			} else if (column < 256) {
+				read_cmd = NAND_CMD_READ0;
+			} else {
+				column -= 256;
+				read_cmd = NAND_CMD_READ1;
+			}
+		}
+		flctl->seqin_column = column;
+		flctl->seqin_page_addr = page_addr;
+		flctl->seqin_read_cmd = read_cmd;
+		break;
+
+	case NAND_CMD_PAGEPROG:
+		empty_fifo(flctl);
+		if (!flctl->page_size) {
+			set_cmd_regs(mtd, NAND_CMD_SEQIN,
+					flctl->seqin_read_cmd);
+			set_addr(mtd, -1, -1);
+			writel(0, FLDTCNTR(flctl));	/* set 0 size */
+			start_translation(flctl);
+			wait_completion(flctl);
+		}
+		if (flctl->hwecc) {
+			/* write page with hwecc */
+			if (flctl->seqin_column == mtd->writesize)
+				execmd_write_oob(mtd);
+			else if (!flctl->seqin_column)
+				execmd_write_page_sector(mtd);
+			else
+				printk(KERN_ERR "Invalid address !?\n");
+			break;
+		}
+		set_cmd_regs(mtd, command, (command << 8) | NAND_CMD_SEQIN);
+		set_addr(mtd, flctl->seqin_column, flctl->seqin_page_addr);
+		writel(flctl->index, FLDTCNTR(flctl));	/* set write size */
+		start_translation(flctl);
+		write_fiforeg(flctl, flctl->index, 0);
+		wait_completion(flctl);
+		break;
+
+	case NAND_CMD_STATUS:
+		set_cmd_regs(mtd, command, command);
+		set_addr(mtd, -1, -1);
+
+		flctl->read_bytes = 1;
+		writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+		start_translation(flctl);
+		read_datareg(flctl, 0); /* read and end */
+		break;
+
+	case NAND_CMD_RESET:
+		set_cmd_regs(mtd, command, command);
+		set_addr(mtd, -1, -1);
+
+		writel(0, FLDTCNTR(flctl));	/* set 0 size */
+		start_translation(flctl);
+		wait_completion(flctl);
+		break;
+
+	default:
+		break;
+	}
+	return;
+
+read_normal_exit:
+	writel(flctl->read_bytes, FLDTCNTR(flctl));	/* set read size */
+	start_translation(flctl);
+	read_fiforeg(flctl, flctl->read_bytes, 0);
+	wait_completion(flctl);
+	return;
+}
+
+static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+
+	switch (chipnr) {
+	case -1:
+		flcmncr_val &= ~CE0_ENABLE;
+		writel(flcmncr_val, FLCMNCR(flctl));
+		break;
+	case 0:
+		flcmncr_val |= CE0_ENABLE;
+		writel(flcmncr_val, FLCMNCR(flctl));
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int i, index = flctl->index;
+
+	for (i = 0; i < len; i++)
+		flctl->done_buff[index + i] = buf[i];
+	flctl->index += len;
+}
+
+static uint8_t flctl_read_byte(struct mtd_info *mtd)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	int index = flctl->index;
+	uint8_t data;
+
+	data = flctl->done_buff[index];
+	flctl->index++;
+	return data;
+}
+
+static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		buf[i] = flctl_read_byte(mtd);
+}
+
+static int flctl_verify_buf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		if (buf[i] != flctl_read_byte(mtd))
+			return -EFAULT;
+	return 0;
+}
+
+static void flctl_register_init(struct sh_flctl *flctl, unsigned long val)
+{
+	writel(val, FLCMNCR(flctl));
+}
+
+static int flctl_chip_init_tail(struct mtd_info *mtd)
+{
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	struct nand_chip *chip = &flctl->chip;
+
+	if (mtd->writesize == 512) {
+		flctl->page_size = 0;
+		if (chip->chipsize > (32 << 20)) {
+			/* big than 32MB */
+			flctl->rw_ADRCNT = ADRCNT_4;
+			flctl->erase_ADRCNT = ADRCNT_3;
+		} else if (chip->chipsize > (2 << 16)) {
+			/* big than 128KB */
+			flctl->rw_ADRCNT = ADRCNT_3;
+			flctl->erase_ADRCNT = ADRCNT_2;
+		} else {
+			flctl->rw_ADRCNT = ADRCNT_2;
+			flctl->erase_ADRCNT = ADRCNT_1;
+		}
+	} else {
+		flctl->page_size = 1;
+		if (chip->chipsize > (128 << 20)) {
+			/* big than 128MB */
+			flctl->rw_ADRCNT = ADRCNT2_E;
+			flctl->erase_ADRCNT = ADRCNT_3;
+		} else if (chip->chipsize > (8 << 16)) {
+			/* big than 512KB */
+			flctl->rw_ADRCNT = ADRCNT_4;
+			flctl->erase_ADRCNT = ADRCNT_2;
+		} else {
+			flctl->rw_ADRCNT = ADRCNT_3;
+			flctl->erase_ADRCNT = ADRCNT_1;
+		}
+	}
+
+	if (flctl->hwecc) {
+		if (mtd->writesize == 512) {
+			chip->ecc.layout = &flctl_4secc_oob_16;
+			chip->badblock_pattern = &flctl_4secc_smallpage;
+		} else {
+			chip->ecc.layout = &flctl_4secc_oob_64;
+			chip->badblock_pattern = &flctl_4secc_largepage;
+		}
+
+		chip->ecc.size = 512;
+		chip->ecc.bytes = 10;
+		chip->ecc.read_page = flctl_read_page_hwecc;
+		chip->ecc.write_page = flctl_write_page_hwecc;
+		chip->ecc.mode = NAND_ECC_HW;
+
+		/* 4 symbols ECC enabled */
+		writel(readl(FLCMNCR(flctl)) | _4ECCEN | ECCPOS2 | ECCPOS_02,
+				FLCMNCR(flctl));
+	} else {
+		chip->ecc.mode = NAND_ECC_SOFT;
+	}
+
+	return 0;
+}
+
+static int __init flctl_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct sh_flctl *flctl;
+	struct mtd_info *flctl_mtd;
+	struct nand_chip *nand;
+	struct sh_flctl_platform_data *pdata;
+	int ret;
+
+	pdata = pdev->dev.platform_data;
+	if (pdata == NULL) {
+		printk(KERN_ERR "sh_flctl platform_data not found.\n");
+		return -ENODEV;
+	}
+
+	flctl = kzalloc(sizeof(struct sh_flctl), GFP_KERNEL);
+	if (!flctl) {
+		printk(KERN_ERR "Unable to allocate NAND MTD dev structure.\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		printk(KERN_ERR "%s: resource not found.\n", __func__);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	flctl->reg = ioremap(res->start, res->end - res->start + 1);
+	if (flctl->reg == NULL) {
+		printk(KERN_ERR "%s: ioremap error.\n", __func__);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, flctl);
+	flctl_mtd = &flctl->mtd;
+	nand = &flctl->chip;
+	flctl_mtd->priv = nand;
+	flctl->hwecc = pdata->has_hwecc;
+
+	flctl_register_init(flctl, pdata->flcmncr_val);
+
+	nand->options = NAND_NO_AUTOINCR;
+
+	/* Set address of hardware control function */
+	/* 20 us command delay time */
+	nand->chip_delay = 20;
+
+	nand->read_byte = flctl_read_byte;
+	nand->write_buf = flctl_write_buf;
+	nand->read_buf = flctl_read_buf;
+	nand->verify_buf = flctl_verify_buf;
+	nand->select_chip = flctl_select_chip;
+	nand->cmdfunc = flctl_cmdfunc;
+
+	ret = nand_scan_ident(flctl_mtd, 1);
+	if (ret)
+		goto err;
+
+	ret = flctl_chip_init_tail(flctl_mtd);
+	if (ret)
+		goto err;
+
+	ret = nand_scan_tail(flctl_mtd);
+	if (ret)
+		goto err;
+
+	add_mtd_partitions(flctl_mtd, pdata->parts, pdata->nr_parts);
+
+	return 0;
+
+err:
+	kfree(flctl);
+	return ret;
+}
+
+static int __exit flctl_remove(struct platform_device *pdev)
+{
+	struct sh_flctl *flctl = platform_get_drvdata(pdev);
+
+	nand_release(&flctl->mtd);
+	kfree(flctl);
+
+	return 0;
+}
+
+static struct platform_driver flctl_driver = {
+	.probe		= flctl_probe,
+	.remove		= flctl_remove,
+	.driver = {
+		.name	= "sh_flctl",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init flctl_nand_init(void)
+{
+	return platform_driver_register(&flctl_driver);
+}
+
+static void __exit flctl_nand_cleanup(void)
+{
+	platform_driver_unregister(&flctl_driver);
+}
+
+module_init(flctl_nand_init);
+module_exit(flctl_nand_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("SuperH FLCTL driver");
+MODULE_ALIAS("platform:sh_flctl");
diff --git a/drivers/mtd/nand/toto.c b/drivers/mtd/nand/toto.c
deleted file mode 100644
index bbf492e6830..00000000000
--- a/drivers/mtd/nand/toto.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- *  drivers/mtd/nand/toto.c
- *
- *  Copyright (c) 2003 Texas Instruments
- *
- *  Derived from drivers/mtd/autcpu12.c
- *
- *  Copyright (c) 2002 Thomas Gleixner <tgxl@linutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Overview:
- *   This is a device driver for the NAND flash device found on the
- *   TI fido board. It supports 32MiB and 64MiB cards
- */
-
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/arch/hardware.h>
-#include <asm/sizes.h>
-#include <asm/arch/toto.h>
-#include <asm/arch-omap1510/hardware.h>
-#include <asm/arch/gpio.h>
-
-#define CONFIG_NAND_WORKAROUND 1
-
-/*
- * MTD structure for TOTO board
- */
-static struct mtd_info *toto_mtd = NULL;
-
-static unsigned long toto_io_base = OMAP_FLASH_1_BASE;
-
-/*
- * Define partitions for flash devices
- */
-
-static struct mtd_partition partition_info64M[] = {
-	{ .name =	"toto kernel partition 1",
-	  .offset =	0,
-	  .size	=	2 * SZ_1M },
-	{ .name =	"toto file sys partition 2",
-	  .offset =	2 * SZ_1M,
-	  .size =	14 * SZ_1M },
-	{ .name =	"toto user partition 3",
-	  .offset =	16 * SZ_1M,
-	  .size =	16 * SZ_1M },
-	{ .name =	"toto devboard extra partition 4",
-	  .offset =	32 * SZ_1M,
-	  .size =	32 * SZ_1M },
-};
-
-static struct mtd_partition partition_info32M[] = {
-	{ .name =	"toto kernel partition 1",
-	  .offset =	0,
-	  .size =	2 * SZ_1M },
-	{ .name =	"toto file sys partition 2",
-	  .offset =	2 * SZ_1M,
-	  .size =	14 * SZ_1M },
-	{ .name =	"toto user partition 3",
-	  .offset =	16 * SZ_1M,
-	  .size =	16 * SZ_1M },
-};
-
-#define NUM_PARTITIONS32M 3
-#define NUM_PARTITIONS64M 4
-
-/*
- *	hardware specific access to control-lines
- *
- *	ctrl:
- *	NAND_NCE: bit 0 -> bit 14 (0x4000)
- *	NAND_CLE: bit 1 -> bit 12 (0x1000)
- *	NAND_ALE: bit 2 -> bit 1  (0x0002)
- */
-static void toto_hwcontrol(struct mtd_info *mtd, int cmd,
-			   unsigned int ctrl)
-{
-	struct nand_chip *chip = mtd->priv;
-
-	if (ctrl & NAND_CTRL_CHANGE) {
-		unsigned long bits;
-
-		/* hopefully enough time for tc make proceding write to clear */
-		udelay(1);
-
-		bits = (~ctrl & NAND_NCE) << 14;
-		bits |= (ctrl & NAND_CLE) << 12;
-		bits |= (ctrl & NAND_ALE) >> 1;
-
-#warning Wild guess as gpiosetout() is nowhere defined in the kernel source - tglx
-		gpiosetout(0x5002, bits);
-
-#ifdef CONFIG_NAND_WORKAROUND
-		/* "some" dev boards busted, blue wired to rts2 :( */
-		rts2setout(2, (ctrl & NAND_CLE) << 1);
-#endif
-		/* allow time to ensure gpio state to over take memory write */
-		udelay(1);
-	}
-
-	if (cmd != NAND_CMD_NONE)
-		writeb(cmd, chip->IO_ADDR_W);
-}
-
-/*
- * Main initialization routine
- */
-static int __init toto_init(void)
-{
-	struct nand_chip *this;
-	int err = 0;
-
-	/* Allocate memory for MTD device structure and private data */
-	toto_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
-	if (!toto_mtd) {
-		printk(KERN_WARNING "Unable to allocate toto NAND MTD device structure.\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	/* Get pointer to private data */
-	this = (struct nand_chip *)(&toto_mtd[1]);
-
-	/* Initialize structures */
-	memset(toto_mtd, 0, sizeof(struct mtd_info));
-	memset(this, 0, sizeof(struct nand_chip));
-
-	/* Link the private data with the MTD structure */
-	toto_mtd->priv = this;
-	toto_mtd->owner = THIS_MODULE;
-
-	/* Set address of NAND IO lines */
-	this->IO_ADDR_R = toto_io_base;
-	this->IO_ADDR_W = toto_io_base;
-	this->cmd_ctrl = toto_hwcontrol;
-	this->dev_ready = NULL;
-	/* 25 us command delay time */
-	this->chip_delay = 30;
-	this->ecc.mode = NAND_ECC_SOFT;
-
-	/* Scan to find existance of the device */
-	if (nand_scan(toto_mtd, 1)) {
-		err = -ENXIO;
-		goto out_mtd;
-	}
-
-	/* Register the partitions */
-	switch (toto_mtd->size) {
-	case SZ_64M:
-		add_mtd_partitions(toto_mtd, partition_info64M, NUM_PARTITIONS64M);
-		break;
-	case SZ_32M:
-		add_mtd_partitions(toto_mtd, partition_info32M, NUM_PARTITIONS32M);
-		break;
-	default:{
-			printk(KERN_WARNING "Unsupported Nand device\n");
-			err = -ENXIO;
-			goto out_buf;
-		}
-	}
-
-	gpioreserve(NAND_MASK);	/* claim our gpios */
-	archflashwp(0, 0);	/* open up flash for writing */
-
-	goto out;
-
- out_mtd:
-	kfree(toto_mtd);
- out:
-	return err;
-}
-
-module_init(toto_init);
-
-/*
- * Clean up routine
- */
-static void __exit toto_cleanup(void)
-{
-	/* Release resources, unregister device */
-	nand_release(toto_mtd);
-
-	/* Free the MTD device structure */
-	kfree(toto_mtd);
-
-	/* stop flash writes */
-	archflashwp(0, 1);
-
-	/* release gpios to system */
-	gpiorelease(NAND_MASK);
-}
-
-module_exit(toto_cleanup);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Richard Woodruff <r-woodruff2@ti.com>");
-MODULE_DESCRIPTION("Glue layer for NAND flash on toto board");
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 4f80c2fd89a..9e45b3f39c0 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -20,7 +20,6 @@
 #include <linux/mtd/partitions.h>
 
 int __devinit of_mtd_parse_partitions(struct device *dev,
-                                      struct mtd_info *mtd,
                                       struct device_node *node,
                                       struct mtd_partition **pparts)
 {
diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index cb41cbca64f..79fa79e8f8d 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -27,8 +27,16 @@ config MTD_ONENAND_GENERIC
 	help
 	  Support for OneNAND flash via platform device driver.
 
+config MTD_ONENAND_OMAP2
+	tristate "OneNAND on OMAP2/OMAP3 support"
+	depends on MTD_ONENAND && (ARCH_OMAP2 || ARCH_OMAP3)
+	help
+	  Support for a OneNAND flash device connected to an OMAP2/OMAP3 CPU
+	  via the GPMC memory controller.
+
 config MTD_ONENAND_OTP
 	bool "OneNAND OTP Support"
+	select HAVE_MTD_OTP
 	help
 	  One Block of the NAND Flash Array memory is reserved as
 	  a One-Time Programmable Block memory area.
diff --git a/drivers/mtd/onenand/Makefile b/drivers/mtd/onenand/Makefile
index 4d2eacfd7e1..64b6cc61a52 100644
--- a/drivers/mtd/onenand/Makefile
+++ b/drivers/mtd/onenand/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_MTD_ONENAND)		+= onenand.o
 
 # Board specific.
 obj-$(CONFIG_MTD_ONENAND_GENERIC)	+= generic.o
+obj-$(CONFIG_MTD_ONENAND_OMAP2)		+= omap2.o
 
 # Simulator
 obj-$(CONFIG_MTD_ONENAND_SIM)		+= onenand_sim.o
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
new file mode 100644
index 00000000000..8387e05daae
--- /dev/null
+++ b/drivers/mtd/onenand/omap2.c
@@ -0,0 +1,802 @@
+/*
+ *  linux/drivers/mtd/onenand/omap2.c
+ *
+ *  OneNAND driver for OMAP2 / OMAP3
+ *
+ *  Copyright © 2005-2006 Nokia Corporation
+ *
+ *  Author: Jarkko Lavinen <jarkko.lavinen@nokia.com> and Juha Yrjölä
+ *  IRQ and DMA support written by Timo Teras
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; see the file COPYING. If not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/onenand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/mach/flash.h>
+#include <asm/arch/gpmc.h>
+#include <asm/arch/onenand.h>
+#include <asm/arch/gpio.h>
+#include <asm/arch/gpmc.h>
+#include <asm/arch/pm.h>
+
+#include <linux/dma-mapping.h>
+#include <asm/dma-mapping.h>
+#include <asm/arch/dma.h>
+
+#include <asm/arch/board.h>
+
+#define DRIVER_NAME "omap2-onenand"
+
+#define ONENAND_IO_SIZE		SZ_128K
+#define ONENAND_BUFRAM_SIZE	(1024 * 5)
+
+struct omap2_onenand {
+	struct platform_device *pdev;
+	int gpmc_cs;
+	unsigned long phys_base;
+	int gpio_irq;
+	struct mtd_info mtd;
+	struct mtd_partition *parts;
+	struct onenand_chip onenand;
+	struct completion irq_done;
+	struct completion dma_done;
+	int dma_channel;
+	int freq;
+	int (*setup)(void __iomem *base, int freq);
+};
+
+static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data)
+{
+	struct omap2_onenand *c = data;
+
+	complete(&c->dma_done);
+}
+
+static irqreturn_t omap2_onenand_interrupt(int irq, void *dev_id)
+{
+	struct omap2_onenand *c = dev_id;
+
+	complete(&c->irq_done);
+
+	return IRQ_HANDLED;
+}
+
+static inline unsigned short read_reg(struct omap2_onenand *c, int reg)
+{
+	return readw(c->onenand.base + reg);
+}
+
+static inline void write_reg(struct omap2_onenand *c, unsigned short value,
+			     int reg)
+{
+	writew(value, c->onenand.base + reg);
+}
+
+static void wait_err(char *msg, int state, unsigned int ctrl, unsigned int intr)
+{
+	printk(KERN_ERR "onenand_wait: %s! state %d ctrl 0x%04x intr 0x%04x\n",
+	       msg, state, ctrl, intr);
+}
+
+static void wait_warn(char *msg, int state, unsigned int ctrl,
+		      unsigned int intr)
+{
+	printk(KERN_WARNING "onenand_wait: %s! state %d ctrl 0x%04x "
+	       "intr 0x%04x\n", msg, state, ctrl, intr);
+}
+
+static int omap2_onenand_wait(struct mtd_info *mtd, int state)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	unsigned int intr = 0;
+	unsigned int ctrl;
+	unsigned long timeout;
+	u32 syscfg;
+
+	if (state == FL_RESETING) {
+		int i;
+
+		for (i = 0; i < 20; i++) {
+			udelay(1);
+			intr = read_reg(c, ONENAND_REG_INTERRUPT);
+			if (intr & ONENAND_INT_MASTER)
+				break;
+		}
+		ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+		if (ctrl & ONENAND_CTRL_ERROR) {
+			wait_err("controller error", state, ctrl, intr);
+			return -EIO;
+		}
+		if (!(intr & ONENAND_INT_RESET)) {
+			wait_err("timeout", state, ctrl, intr);
+			return -EIO;
+		}
+		return 0;
+	}
+
+	if (state != FL_READING) {
+		int result;
+
+		/* Turn interrupts on */
+		syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+		if (!(syscfg & ONENAND_SYS_CFG1_IOBE)) {
+			syscfg |= ONENAND_SYS_CFG1_IOBE;
+			write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+			if (cpu_is_omap34xx())
+				/* Add a delay to let GPIO settle */
+				syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+		}
+
+		INIT_COMPLETION(c->irq_done);
+		if (c->gpio_irq) {
+			result = omap_get_gpio_datain(c->gpio_irq);
+			if (result == -1) {
+				ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+				intr = read_reg(c, ONENAND_REG_INTERRUPT);
+				wait_err("gpio error", state, ctrl, intr);
+				return -EIO;
+			}
+		} else
+			result = 0;
+		if (result == 0) {
+			int retry_cnt = 0;
+retry:
+			result = wait_for_completion_timeout(&c->irq_done,
+						    msecs_to_jiffies(20));
+			if (result == 0) {
+				/* Timeout after 20ms */
+				ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+				if (ctrl & ONENAND_CTRL_ONGO) {
+					/*
+					 * The operation seems to be still going
+					 * so give it some more time.
+					 */
+					retry_cnt += 1;
+					if (retry_cnt < 3)
+						goto retry;
+					intr = read_reg(c,
+							ONENAND_REG_INTERRUPT);
+					wait_err("timeout", state, ctrl, intr);
+					return -EIO;
+				}
+				intr = read_reg(c, ONENAND_REG_INTERRUPT);
+				if ((intr & ONENAND_INT_MASTER) == 0)
+					wait_warn("timeout", state, ctrl, intr);
+			}
+		}
+	} else {
+		int retry_cnt = 0;
+
+		/* Turn interrupts off */
+		syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+		syscfg &= ~ONENAND_SYS_CFG1_IOBE;
+		write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+
+		timeout = jiffies + msecs_to_jiffies(20);
+		while (1) {
+			if (time_before(jiffies, timeout)) {
+				intr = read_reg(c, ONENAND_REG_INTERRUPT);
+				if (intr & ONENAND_INT_MASTER)
+					break;
+			} else {
+				/* Timeout after 20ms */
+				ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+				if (ctrl & ONENAND_CTRL_ONGO) {
+					/*
+					 * The operation seems to be still going
+					 * so give it some more time.
+					 */
+					retry_cnt += 1;
+					if (retry_cnt < 3) {
+						timeout = jiffies +
+							  msecs_to_jiffies(20);
+						continue;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+	intr = read_reg(c, ONENAND_REG_INTERRUPT);
+	ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+
+	if (intr & ONENAND_INT_READ) {
+		int ecc = read_reg(c, ONENAND_REG_ECC_STATUS);
+
+		if (ecc) {
+			unsigned int addr1, addr8;
+
+			addr1 = read_reg(c, ONENAND_REG_START_ADDRESS1);
+			addr8 = read_reg(c, ONENAND_REG_START_ADDRESS8);
+			if (ecc & ONENAND_ECC_2BIT_ALL) {
+				printk(KERN_ERR "onenand_wait: ECC error = "
+				       "0x%04x, addr1 %#x, addr8 %#x\n",
+				       ecc, addr1, addr8);
+				mtd->ecc_stats.failed++;
+				return -EBADMSG;
+			} else if (ecc & ONENAND_ECC_1BIT_ALL) {
+				printk(KERN_NOTICE "onenand_wait: correctable "
+				       "ECC error = 0x%04x, addr1 %#x, "
+				       "addr8 %#x\n", ecc, addr1, addr8);
+				mtd->ecc_stats.corrected++;
+			}
+		}
+	} else if (state == FL_READING) {
+		wait_err("timeout", state, ctrl, intr);
+		return -EIO;
+	}
+
+	if (ctrl & ONENAND_CTRL_ERROR) {
+		wait_err("controller error", state, ctrl, intr);
+		if (ctrl & ONENAND_CTRL_LOCK)
+			printk(KERN_ERR "onenand_wait: "
+					"Device is write protected!!!\n");
+		return -EIO;
+	}
+
+	if (ctrl & 0xFE9F)
+		wait_warn("unexpected controller status", state, ctrl, intr);
+
+	return 0;
+}
+
+static inline int omap2_onenand_bufferram_offset(struct mtd_info *mtd, int area)
+{
+	struct onenand_chip *this = mtd->priv;
+
+	if (ONENAND_CURRENT_BUFFERRAM(this)) {
+		if (area == ONENAND_DATARAM)
+			return mtd->writesize;
+		if (area == ONENAND_SPARERAM)
+			return mtd->oobsize;
+	}
+
+	return 0;
+}
+
+#if defined(CONFIG_ARCH_OMAP3) || defined(MULTI_OMAP2)
+
+static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
+					unsigned char *buffer, int offset,
+					size_t count)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
+	dma_addr_t dma_src, dma_dst;
+	int bram_offset;
+	unsigned long timeout;
+	void *buf = (void *)buffer;
+	size_t xtra;
+	volatile unsigned *done;
+
+	bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+	if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
+		goto out_copy;
+
+	if (buf >= high_memory) {
+		struct page *p1;
+
+		if (((size_t)buf & PAGE_MASK) !=
+		    ((size_t)(buf + count - 1) & PAGE_MASK))
+			goto out_copy;
+		p1 = vmalloc_to_page(buf);
+		if (!p1)
+			goto out_copy;
+		buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
+	}
+
+	xtra = count & 3;
+	if (xtra) {
+		count -= xtra;
+		memcpy(buf + count, this->base + bram_offset + count, xtra);
+	}
+
+	dma_src = c->phys_base + bram_offset;
+	dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+		dev_err(&c->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n",
+			count);
+		goto out_copy;
+	}
+
+	omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+				     count >> 2, 1, 0, 0, 0);
+	omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_src, 0, 0);
+	omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				 dma_dst, 0, 0);
+
+	INIT_COMPLETION(c->dma_done);
+	omap_start_dma(c->dma_channel);
+
+	timeout = jiffies + msecs_to_jiffies(20);
+	done = &c->dma_done.done;
+	while (time_before(jiffies, timeout))
+		if (*done)
+			break;
+
+	dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
+
+	if (!*done) {
+		dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+		goto out_copy;
+	}
+
+	return 0;
+
+out_copy:
+	memcpy(buf, this->base + bram_offset, count);
+	return 0;
+}
+
+static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
+					 const unsigned char *buffer,
+					 int offset, size_t count)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
+	dma_addr_t dma_src, dma_dst;
+	int bram_offset;
+	unsigned long timeout;
+	void *buf = (void *)buffer;
+	volatile unsigned *done;
+
+	bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+	if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
+		goto out_copy;
+
+	/* panic_write() may be in an interrupt context */
+	if (in_interrupt())
+		goto out_copy;
+
+	if (buf >= high_memory) {
+		struct page *p1;
+
+		if (((size_t)buf & PAGE_MASK) !=
+		    ((size_t)(buf + count - 1) & PAGE_MASK))
+			goto out_copy;
+		p1 = vmalloc_to_page(buf);
+		if (!p1)
+			goto out_copy;
+		buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
+	}
+
+	dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE);
+	dma_dst = c->phys_base + bram_offset;
+	if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+		dev_err(&c->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n",
+			count);
+		return -1;
+	}
+
+	omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+				     count >> 2, 1, 0, 0, 0);
+	omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_src, 0, 0);
+	omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				 dma_dst, 0, 0);
+
+	INIT_COMPLETION(c->dma_done);
+	omap_start_dma(c->dma_channel);
+
+	timeout = jiffies + msecs_to_jiffies(20);
+	done = &c->dma_done.done;
+	while (time_before(jiffies, timeout))
+		if (*done)
+			break;
+
+	dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
+
+	if (!*done) {
+		dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+		goto out_copy;
+	}
+
+	return 0;
+
+out_copy:
+	memcpy(this->base + bram_offset, buf, count);
+	return 0;
+}
+
+#else
+
+int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
+				 unsigned char *buffer, int offset,
+				 size_t count);
+
+int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
+				  const unsigned char *buffer,
+				  int offset, size_t count);
+
+#endif
+
+#if defined(CONFIG_ARCH_OMAP2) || defined(MULTI_OMAP2)
+
+static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
+					unsigned char *buffer, int offset,
+					size_t count)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
+	dma_addr_t dma_src, dma_dst;
+	int bram_offset;
+
+	bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+	/* DMA is not used.  Revisit PM requirements before enabling it. */
+	if (1 || (c->dma_channel < 0) ||
+	    ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
+	    (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
+		memcpy(buffer, (__force void *)(this->base + bram_offset),
+		       count);
+		return 0;
+	}
+
+	dma_src = c->phys_base + bram_offset;
+	dma_dst = dma_map_single(&c->pdev->dev, buffer, count,
+				 DMA_FROM_DEVICE);
+	if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+		dev_err(&c->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n",
+			count);
+		return -1;
+	}
+
+	omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+				     count / 4, 1, 0, 0, 0);
+	omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_src, 0, 0);
+	omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				 dma_dst, 0, 0);
+
+	INIT_COMPLETION(c->dma_done);
+	omap_start_dma(c->dma_channel);
+	wait_for_completion(&c->dma_done);
+
+	dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
+
+	return 0;
+}
+
+static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
+					 const unsigned char *buffer,
+					 int offset, size_t count)
+{
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
+	dma_addr_t dma_src, dma_dst;
+	int bram_offset;
+
+	bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+	/* DMA is not used.  Revisit PM requirements before enabling it. */
+	if (1 || (c->dma_channel < 0) ||
+	    ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
+	    (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
+		memcpy((__force void *)(this->base + bram_offset), buffer,
+		       count);
+		return 0;
+	}
+
+	dma_src = dma_map_single(&c->pdev->dev, (void *) buffer, count,
+				 DMA_TO_DEVICE);
+	dma_dst = c->phys_base + bram_offset;
+	if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+		dev_err(&c->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n",
+			count);
+		return -1;
+	}
+
+	omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S16,
+				     count / 2, 1, 0, 0, 0);
+	omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_src, 0, 0);
+	omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+				 dma_dst, 0, 0);
+
+	INIT_COMPLETION(c->dma_done);
+	omap_start_dma(c->dma_channel);
+	wait_for_completion(&c->dma_done);
+
+	dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+#else
+
+int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
+				 unsigned char *buffer, int offset,
+				 size_t count);
+
+int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
+				  const unsigned char *buffer,
+				  int offset, size_t count);
+
+#endif
+
+static struct platform_driver omap2_onenand_driver;
+
+static int __adjust_timing(struct device *dev, void *data)
+{
+	int ret = 0;
+	struct omap2_onenand *c;
+
+	c = dev_get_drvdata(dev);
+
+	BUG_ON(c->setup == NULL);
+
+	/* DMA is not in use so this is all that is needed */
+	/* Revisit for OMAP3! */
+	ret = c->setup(c->onenand.base, c->freq);
+
+	return ret;
+}
+
+int omap2_onenand_rephase(void)
+{
+	return driver_for_each_device(&omap2_onenand_driver.driver, NULL,
+				      NULL, __adjust_timing);
+}
+
+static void __devexit omap2_onenand_shutdown(struct platform_device *pdev)
+{
+	struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
+
+	/* With certain content in the buffer RAM, the OMAP boot ROM code
+	 * can recognize the flash chip incorrectly. Zero it out before
+	 * soft reset.
+	 */
+	memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE);
+}
+
+static int __devinit omap2_onenand_probe(struct platform_device *pdev)
+{
+	struct omap_onenand_platform_data *pdata;
+	struct omap2_onenand *c;
+	int r;
+
+	pdata = pdev->dev.platform_data;
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "platform data missing\n");
+		return -ENODEV;
+	}
+
+	c = kzalloc(sizeof(struct omap2_onenand), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+
+	init_completion(&c->irq_done);
+	init_completion(&c->dma_done);
+	c->gpmc_cs = pdata->cs;
+	c->gpio_irq = pdata->gpio_irq;
+	c->dma_channel = pdata->dma_channel;
+	if (c->dma_channel < 0) {
+		/* if -1, don't use DMA */
+		c->gpio_irq = 0;
+	}
+
+	r = gpmc_cs_request(c->gpmc_cs, ONENAND_IO_SIZE, &c->phys_base);
+	if (r < 0) {
+		dev_err(&pdev->dev, "Cannot request GPMC CS\n");
+		goto err_kfree;
+	}
+
+	if (request_mem_region(c->phys_base, ONENAND_IO_SIZE,
+			       pdev->dev.driver->name) == NULL) {
+		dev_err(&pdev->dev, "Cannot reserve memory region at 0x%08lx, "
+			"size: 0x%x\n",	c->phys_base, ONENAND_IO_SIZE);
+		r = -EBUSY;
+		goto err_free_cs;
+	}
+	c->onenand.base = ioremap(c->phys_base, ONENAND_IO_SIZE);
+	if (c->onenand.base == NULL) {
+		r = -ENOMEM;
+		goto err_release_mem_region;
+	}
+
+	if (pdata->onenand_setup != NULL) {
+		r = pdata->onenand_setup(c->onenand.base, c->freq);
+		if (r < 0) {
+			dev_err(&pdev->dev, "Onenand platform setup failed: "
+				"%d\n", r);
+			goto err_iounmap;
+		}
+		c->setup = pdata->onenand_setup;
+	}
+
+	if (c->gpio_irq) {
+		if ((r = omap_request_gpio(c->gpio_irq)) < 0) {
+			dev_err(&pdev->dev,  "Failed to request GPIO%d for "
+				"OneNAND\n", c->gpio_irq);
+			goto err_iounmap;
+	}
+	omap_set_gpio_direction(c->gpio_irq, 1);
+
+	if ((r = request_irq(OMAP_GPIO_IRQ(c->gpio_irq),
+			     omap2_onenand_interrupt, IRQF_TRIGGER_RISING,
+			     pdev->dev.driver->name, c)) < 0)
+		goto err_release_gpio;
+	}
+
+	if (c->dma_channel >= 0) {
+		r = omap_request_dma(0, pdev->dev.driver->name,
+				     omap2_onenand_dma_cb, (void *) c,
+				     &c->dma_channel);
+		if (r == 0) {
+			omap_set_dma_write_mode(c->dma_channel,
+						OMAP_DMA_WRITE_NON_POSTED);
+			omap_set_dma_src_data_pack(c->dma_channel, 1);
+			omap_set_dma_src_burst_mode(c->dma_channel,
+						    OMAP_DMA_DATA_BURST_8);
+			omap_set_dma_dest_data_pack(c->dma_channel, 1);
+			omap_set_dma_dest_burst_mode(c->dma_channel,
+						     OMAP_DMA_DATA_BURST_8);
+		} else {
+			dev_info(&pdev->dev,
+				 "failed to allocate DMA for OneNAND, "
+				 "using PIO instead\n");
+			c->dma_channel = -1;
+		}
+	}
+
+	dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual "
+		 "base %p\n", c->gpmc_cs, c->phys_base,
+		 c->onenand.base);
+
+	c->pdev = pdev;
+	c->mtd.name = pdev->dev.bus_id;
+	c->mtd.priv = &c->onenand;
+	c->mtd.owner = THIS_MODULE;
+
+	if (c->dma_channel >= 0) {
+		struct onenand_chip *this = &c->onenand;
+
+		this->wait = omap2_onenand_wait;
+		if (cpu_is_omap34xx()) {
+			this->read_bufferram = omap3_onenand_read_bufferram;
+			this->write_bufferram = omap3_onenand_write_bufferram;
+		} else {
+			this->read_bufferram = omap2_onenand_read_bufferram;
+			this->write_bufferram = omap2_onenand_write_bufferram;
+		}
+	}
+
+	if ((r = onenand_scan(&c->mtd, 1)) < 0)
+		goto err_release_dma;
+
+	switch ((c->onenand.version_id >> 4) & 0xf) {
+	case 0:
+		c->freq = 40;
+		break;
+	case 1:
+		c->freq = 54;
+		break;
+	case 2:
+		c->freq = 66;
+		break;
+	case 3:
+		c->freq = 83;
+		break;
+	}
+
+#ifdef CONFIG_MTD_PARTITIONS
+	if (pdata->parts != NULL)
+		r = add_mtd_partitions(&c->mtd, pdata->parts,
+				       pdata->nr_parts);
+	else
+#endif
+		r = add_mtd_device(&c->mtd);
+	if (r < 0)
+		goto err_release_onenand;
+
+	platform_set_drvdata(pdev, c);
+
+	return 0;
+
+err_release_onenand:
+	onenand_release(&c->mtd);
+err_release_dma:
+	if (c->dma_channel != -1)
+		omap_free_dma(c->dma_channel);
+	if (c->gpio_irq)
+		free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
+err_release_gpio:
+	if (c->gpio_irq)
+		omap_free_gpio(c->gpio_irq);
+err_iounmap:
+	iounmap(c->onenand.base);
+err_release_mem_region:
+	release_mem_region(c->phys_base, ONENAND_IO_SIZE);
+err_free_cs:
+	gpmc_cs_free(c->gpmc_cs);
+err_kfree:
+	kfree(c);
+
+	return r;
+}
+
+static int __devexit omap2_onenand_remove(struct platform_device *pdev)
+{
+	struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
+
+	BUG_ON(c == NULL);
+
+#ifdef CONFIG_MTD_PARTITIONS
+	if (c->parts)
+		del_mtd_partitions(&c->mtd);
+	else
+		del_mtd_device(&c->mtd);
+#else
+	del_mtd_device(&c->mtd);
+#endif
+
+	onenand_release(&c->mtd);
+	if (c->dma_channel != -1)
+		omap_free_dma(c->dma_channel);
+	omap2_onenand_shutdown(pdev);
+	platform_set_drvdata(pdev, NULL);
+	if (c->gpio_irq) {
+		free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
+		omap_free_gpio(c->gpio_irq);
+	}
+	iounmap(c->onenand.base);
+	release_mem_region(c->phys_base, ONENAND_IO_SIZE);
+	kfree(c);
+
+	return 0;
+}
+
+static struct platform_driver omap2_onenand_driver = {
+	.probe		= omap2_onenand_probe,
+	.remove		= omap2_onenand_remove,
+	.shutdown	= omap2_onenand_shutdown,
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.owner  = THIS_MODULE,
+	},
+};
+
+static int __init omap2_onenand_init(void)
+{
+	printk(KERN_INFO "OneNAND driver initializing\n");
+	return platform_driver_register(&omap2_onenand_driver);
+}
+
+static void __exit omap2_onenand_exit(void)
+{
+	platform_driver_unregister(&omap2_onenand_driver);
+}
+
+module_init(omap2_onenand_init);
+module_exit(omap2_onenand_exit);
+
+MODULE_ALIAS(DRIVER_NAME);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
+MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 926cf3a4135..90ed319f26e 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1794,7 +1794,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 		return -EINVAL;
 	}
 
-	instr->fail_addr = 0xffffffff;
+	instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
 	/* Grab the lock and see if the device is available */
 	onenand_get_device(mtd, FL_ERASING);
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index a5f3d60047d..33a5d6ed6f1 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -321,8 +321,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	DEBUG(MTD_DEBUG_LEVEL1,
 		"SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n",
 		ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len,
-		(ssfdc->map_len + MAX_PHYS_BLK_PER_ZONE - 1) /
-		MAX_PHYS_BLK_PER_ZONE);
+		DIV_ROUND_UP(ssfdc->map_len, MAX_PHYS_BLK_PER_ZONE));
 
 	/* Set geometry */
 	ssfdc->heads = 16;
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 03c759b4eeb..b30a0b83d7f 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -104,12 +104,9 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
 	struct ubi_volume_desc *desc;
 	int vol_id = iminor(inode) - 1, mode, ubi_num;
 
-	lock_kernel();
 	ubi_num = ubi_major2num(imajor(inode));
-	if (ubi_num < 0) {
-		unlock_kernel();
+	if (ubi_num < 0)
 		return ubi_num;
-	}
 
 	if (file->f_mode & FMODE_WRITE)
 		mode = UBI_READWRITE;
@@ -119,7 +116,6 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
 	dbg_gen("open volume %d, mode %d", vol_id, mode);
 
 	desc = ubi_open_volume(ubi_num, vol_id, mode);
-	unlock_kernel();
 	if (IS_ERR(desc))
 		return PTR_ERR(desc);
 
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 967bb4406df..4f2daa5bbec 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -387,7 +387,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 		pnum, vol_id, lnum, ec, sqnum, bitflips);
 
 	sv = add_volume(si, vol_id, pnum, vid_hdr);
-	if (IS_ERR(sv) < 0)
+	if (IS_ERR(sv))
 		return PTR_ERR(sv);
 
 	if (si->max_sqnum < sqnum)
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 217d0e111b2..333c8941552 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -244,8 +244,8 @@ static int vtbl_check(const struct ubi_device *ubi,
 		}
 
 		if (reserved_pebs > ubi->good_peb_count) {
-			dbg_err("too large reserved_pebs, good PEBs %d",
-				ubi->good_peb_count);
+			dbg_err("too large reserved_pebs %d, good PEBs %d",
+				reserved_pebs, ubi->good_peb_count);
 			err = 9;
 			goto bad;
 		}
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 491ee16da5c..9ba295d9dd9 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -90,7 +90,7 @@ static int vortex_debug = 1;
 #include <linux/eisa.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
-#include <asm/irq.h>			/* For NR_IRQS only. */
+#include <asm/irq.h>			/* For nr_irqs only. */
 #include <asm/io.h>
 #include <asm/uaccess.h>
 
@@ -1221,7 +1221,7 @@ static int __devinit vortex_probe1(struct device *gendev,
 	if (print_info)
 		printk(", IRQ %d\n", dev->irq);
 	/* Tell them about an invalid IRQ. */
-	if (dev->irq <= 0 || dev->irq >= NR_IRQS)
+	if (dev->irq <= 0 || dev->irq >= nr_irqs)
 		printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
 			   dev->irq);
 
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index ad301ace608..a5a9c6da513 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2504,6 +2504,15 @@ config PASEMI_MAC
 	  This driver supports the on-chip 1/10Gbit Ethernet controller on
 	  PA Semi's PWRficient line of chips.
 
+config MLX4_EN
+	tristate "Mellanox Technologies 10Gbit Ethernet support"
+	depends on PCI && INET
+	select MLX4_CORE
+	select INET_LRO
+	help
+	  This driver supports Mellanox Technologies ConnectX Ethernet
+	  devices.
+
 config MLX4_CORE
 	tristate
 	depends on PCI
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index 17ac6975d70..b6a816e60c0 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -416,10 +416,10 @@ static int ser12_open(struct net_device *dev)
 	if (!dev || !bc)
 		return -ENXIO;
 	if (!dev->base_addr || dev->base_addr > 0xffff-SER12_EXTENT ||
-	    dev->irq < 2 || dev->irq > NR_IRQS) {
+	    dev->irq < 2 || dev->irq > nr_irqs) {
 		printk(KERN_INFO "baycom_ser_fdx: invalid portnumber (max %u) "
 				"or irq (2 <= irq <= %d)\n",
-				0xffff-SER12_EXTENT, NR_IRQS);
+				0xffff-SER12_EXTENT, nr_irqs);
 		return -ENXIO;
 	}
 	if (bc->baud < 300 || bc->baud > 4800) {
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 45ae9d1191d..c17e39bc546 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1465,7 +1465,7 @@ static void z8530_init(void)
 	printk(KERN_INFO "Init Z8530 driver: %u channels, IRQ", Nchips*2);
 	
 	flag=" ";
-	for (k = 0; k < NR_IRQS; k++)
+	for (k = 0; k < nr_irqs; k++)
 		if (Ivec[k].used) 
 		{
 			printk("%s%d", flag, k);
@@ -1728,7 +1728,7 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 			if (hwcfg.irq == 2) hwcfg.irq = 9;
 
-			if (hwcfg.irq < 0 || hwcfg.irq >= NR_IRQS)
+			if (hwcfg.irq < 0 || hwcfg.irq >= nr_irqs)
 				return -EINVAL;
 				
 			if (!Ivec[hwcfg.irq].used && hwcfg.irq)
@@ -2148,7 +2148,7 @@ static void __exit scc_cleanup_driver(void)
 		}
 		
 	/* To unload the port must be closed so no real IRQ pending */
-	for (k=0; k < NR_IRQS ; k++)
+	for (k = 0; k < nr_irqs ; k++)
 		if (Ivec[k].used) free_irq(k, NULL);
 		
 	local_irq_enable();
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 0952a6528f5..a7a97bf998f 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -1,4 +1,9 @@
 obj-$(CONFIG_MLX4_CORE)		+= mlx4_core.o
 
 mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
-		mr.o pd.o profile.o qp.o reset.o srq.o
+		mr.o pd.o port.o profile.o qp.o reset.o srq.o
+
+obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
+
+mlx4_en-y := 	en_main.o en_tx.o en_rx.o en_params.o en_port.o en_cq.o \
+		en_resources.o en_netdev.o
diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index b411b79d72a..ad95d5f7b63 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c
@@ -48,13 +48,16 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)
 
 	obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
 	if (obj >= bitmap->max) {
-		bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
+		bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+				& bitmap->mask;
 		obj = find_first_zero_bit(bitmap->table, bitmap->max);
 	}
 
 	if (obj < bitmap->max) {
 		set_bit(obj, bitmap->table);
-		bitmap->last = (obj + 1) & (bitmap->max - 1);
+		bitmap->last = (obj + 1);
+		if (bitmap->last == bitmap->max)
+			bitmap->last = 0;
 		obj |= bitmap->top;
 	} else
 		obj = -1;
@@ -66,16 +69,90 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)
 
 void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj)
 {
-	obj &= bitmap->max - 1;
+	mlx4_bitmap_free_range(bitmap, obj, 1);
+}
+
+static unsigned long find_aligned_range(unsigned long *bitmap,
+					u32 start, u32 nbits,
+					int len, int align)
+{
+	unsigned long end, i;
+
+again:
+	start = ALIGN(start, align);
+
+	while ((start < nbits) && test_bit(start, bitmap))
+		start += align;
+
+	if (start >= nbits)
+		return -1;
+
+	end = start+len;
+	if (end > nbits)
+		return -1;
+
+	for (i = start + 1; i < end; i++) {
+		if (test_bit(i, bitmap)) {
+			start = i + 1;
+			goto again;
+		}
+	}
+
+	return start;
+}
+
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align)
+{
+	u32 obj, i;
+
+	if (likely(cnt == 1 && align == 1))
+		return mlx4_bitmap_alloc(bitmap);
+
+	spin_lock(&bitmap->lock);
+
+	obj = find_aligned_range(bitmap->table, bitmap->last,
+				 bitmap->max, cnt, align);
+	if (obj >= bitmap->max) {
+		bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+				& bitmap->mask;
+		obj = find_aligned_range(bitmap->table, 0, bitmap->max,
+					 cnt, align);
+	}
+
+	if (obj < bitmap->max) {
+		for (i = 0; i < cnt; i++)
+			set_bit(obj + i, bitmap->table);
+		if (obj == bitmap->last) {
+			bitmap->last = (obj + cnt);
+			if (bitmap->last >= bitmap->max)
+				bitmap->last = 0;
+		}
+		obj |= bitmap->top;
+	} else
+		obj = -1;
+
+	spin_unlock(&bitmap->lock);
+
+	return obj;
+}
+
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
+{
+	u32 i;
+
+	obj &= bitmap->max + bitmap->reserved_top - 1;
 
 	spin_lock(&bitmap->lock);
-	clear_bit(obj, bitmap->table);
+	for (i = 0; i < cnt; i++)
+		clear_bit(obj + i, bitmap->table);
 	bitmap->last = min(bitmap->last, obj);
-	bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
+	bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+			& bitmap->mask;
 	spin_unlock(&bitmap->lock);
 }
 
-int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved)
+int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
+		     u32 reserved_bot, u32 reserved_top)
 {
 	int i;
 
@@ -85,14 +162,16 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved
 
 	bitmap->last = 0;
 	bitmap->top  = 0;
-	bitmap->max  = num;
+	bitmap->max  = num - reserved_top;
 	bitmap->mask = mask;
+	bitmap->reserved_top = reserved_top;
 	spin_lock_init(&bitmap->lock);
-	bitmap->table = kzalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL);
+	bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
+				sizeof (long), GFP_KERNEL);
 	if (!bitmap->table)
 		return -ENOMEM;
 
-	for (i = 0; i < reserved; ++i)
+	for (i = 0; i < reserved_bot; ++i)
 		set_bit(i, bitmap->table);
 
 	return 0;
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index 9bb50e3f897..b7ad2829d67 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -300,7 +300,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
 	INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
 
 	err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs,
-			       dev->caps.num_cqs - 1, dev->caps.reserved_cqs);
+			       dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0);
 	if (err)
 		return err;
 
diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c
new file mode 100644
index 00000000000..1368a8010af
--- /dev/null
+++ b/drivers/net/mlx4/en_cq.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_en.h"
+
+static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event)
+{
+	return;
+}
+
+
+int mlx4_en_create_cq(struct mlx4_en_priv *priv,
+		      struct mlx4_en_cq *cq,
+		      int entries, int ring, enum cq_type mode)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	cq->size = entries;
+	if (mode == RX)
+		cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
+	else
+		cq->buf_size = sizeof(struct mlx4_cqe);
+
+	cq->ring = ring;
+	cq->is_tx = mode;
+	spin_lock_init(&cq->lock);
+
+	err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
+				cq->buf_size, 2 * PAGE_SIZE);
+	if (err)
+		return err;
+
+	err = mlx4_en_map_buffer(&cq->wqres.buf);
+	if (err)
+		mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
+
+	return err;
+}
+
+int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	cq->dev = mdev->pndev[priv->port];
+	cq->mcq.set_ci_db  = cq->wqres.db.db;
+	cq->mcq.arm_db     = cq->wqres.db.db + 1;
+	*cq->mcq.set_ci_db = 0;
+	*cq->mcq.arm_db    = 0;
+	cq->buf = (struct mlx4_cqe *) cq->wqres.buf.direct.buf;
+	memset(cq->buf, 0, cq->buf_size);
+
+	err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
+			    cq->wqres.db.dma, &cq->mcq, cq->is_tx);
+	if (err)
+		return err;
+
+	cq->mcq.comp  = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
+	cq->mcq.event = mlx4_en_cq_event;
+
+	if (cq->is_tx) {
+		init_timer(&cq->timer);
+		cq->timer.function = mlx4_en_poll_tx_cq;
+		cq->timer.data = (unsigned long) cq;
+	} else {
+		netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);
+		napi_enable(&cq->napi);
+	}
+
+	return 0;
+}
+
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	mlx4_en_unmap_buffer(&cq->wqres.buf);
+	mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
+	cq->buf_size = 0;
+	cq->buf = NULL;
+}
+
+void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	if (cq->is_tx)
+		del_timer(&cq->timer);
+	else
+		napi_disable(&cq->napi);
+
+	mlx4_cq_free(mdev->dev, &cq->mcq);
+}
+
+/* Set rx cq moderation parameters */
+int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+	return mlx4_cq_modify(priv->mdev->dev, &cq->mcq,
+			      cq->moder_cnt, cq->moder_time);
+}
+
+int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+	cq->armed = 1;
+	mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map,
+		    &priv->mdev->uar_lock);
+
+	return 0;
+}
+
+
diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
new file mode 100644
index 00000000000..1b0eebf84f7
--- /dev/null
+++ b/drivers/net/mlx4/en_main.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/cpumask.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_en.h"
+
+MODULE_AUTHOR("Liran Liss, Yevgeny Petrilin");
+MODULE_DESCRIPTION("Mellanox ConnectX HCA Ethernet driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION " ("DRV_RELDATE")");
+
+static const char mlx4_en_version[] =
+	DRV_NAME ": Mellanox ConnectX HCA Ethernet driver v"
+	DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
+			  enum mlx4_dev_event event, int port)
+{
+	struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr;
+	struct mlx4_en_priv *priv;
+
+	if (!mdev->pndev[port])
+		return;
+
+	priv = netdev_priv(mdev->pndev[port]);
+	switch (event) {
+	case MLX4_DEV_EVENT_PORT_UP:
+	case MLX4_DEV_EVENT_PORT_DOWN:
+		/* To prevent races, we poll the link state in a separate
+		  task rather than changing it here */
+		priv->link_state = event;
+		queue_work(mdev->workqueue, &priv->linkstate_task);
+		break;
+
+	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+		mlx4_err(mdev, "Internal error detected, restarting device\n");
+		break;
+
+	default:
+		mlx4_warn(mdev, "Unhandled event: %d\n", event);
+	}
+}
+
+static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
+{
+	struct mlx4_en_dev *mdev = endev_ptr;
+	int i;
+
+	mutex_lock(&mdev->state_lock);
+	mdev->device_up = false;
+	mutex_unlock(&mdev->state_lock);
+
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+		if (mdev->pndev[i])
+			mlx4_en_destroy_netdev(mdev->pndev[i]);
+
+	flush_workqueue(mdev->workqueue);
+	destroy_workqueue(mdev->workqueue);
+	mlx4_mr_free(dev, &mdev->mr);
+	mlx4_uar_free(dev, &mdev->priv_uar);
+	mlx4_pd_free(dev, mdev->priv_pdn);
+	kfree(mdev);
+}
+
+static void *mlx4_en_add(struct mlx4_dev *dev)
+{
+	static int mlx4_en_version_printed;
+	struct mlx4_en_dev *mdev;
+	int i;
+	int err;
+
+	if (!mlx4_en_version_printed) {
+		printk(KERN_INFO "%s", mlx4_en_version);
+		mlx4_en_version_printed++;
+	}
+
+	mdev = kzalloc(sizeof *mdev, GFP_KERNEL);
+	if (!mdev) {
+		dev_err(&dev->pdev->dev, "Device struct alloc failed, "
+			"aborting.\n");
+		err = -ENOMEM;
+		goto err_free_res;
+	}
+
+	if (mlx4_pd_alloc(dev, &mdev->priv_pdn))
+		goto err_free_dev;
+
+	if (mlx4_uar_alloc(dev, &mdev->priv_uar))
+		goto err_pd;
+
+	mdev->uar_map = ioremap(mdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	if (!mdev->uar_map)
+		goto err_uar;
+	spin_lock_init(&mdev->uar_lock);
+
+	mdev->dev = dev;
+	mdev->dma_device = &(dev->pdev->dev);
+	mdev->pdev = dev->pdev;
+	mdev->device_up = false;
+
+	mdev->LSO_support = !!(dev->caps.flags & (1 << 15));
+	if (!mdev->LSO_support)
+		mlx4_warn(mdev, "LSO not supported, please upgrade to later "
+				"FW version to enable LSO\n");
+
+	if (mlx4_mr_alloc(mdev->dev, mdev->priv_pdn, 0, ~0ull,
+			 MLX4_PERM_LOCAL_WRITE |  MLX4_PERM_LOCAL_READ,
+			 0, 0, &mdev->mr)) {
+		mlx4_err(mdev, "Failed allocating memory region\n");
+		goto err_uar;
+	}
+	if (mlx4_mr_enable(mdev->dev, &mdev->mr)) {
+		mlx4_err(mdev, "Failed enabling memory region\n");
+		goto err_mr;
+	}
+
+	/* Build device profile according to supplied module parameters */
+	err = mlx4_en_get_profile(mdev);
+	if (err) {
+		mlx4_err(mdev, "Bad module parameters, aborting.\n");
+		goto err_mr;
+	}
+
+	/* Configure wich ports to start according to module parameters */
+	mdev->port_cnt = 0;
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+		mdev->port_cnt++;
+
+	/* If we did not receive an explicit number of Rx rings, default to
+	 * the number of completion vectors populated by the mlx4_core */
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
+		mlx4_info(mdev, "Using %d tx rings for port:%d\n",
+			  mdev->profile.prof[i].tx_ring_num, i);
+		if (!mdev->profile.prof[i].rx_ring_num) {
+			mdev->profile.prof[i].rx_ring_num = 1;
+			mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
+				  1, i);
+		} else
+			mlx4_info(mdev, "Using %d rx rings for port:%d\n",
+				  mdev->profile.prof[i].rx_ring_num, i);
+	}
+
+	/* Create our own workqueue for reset/multicast tasks
+	 * Note: we cannot use the shared workqueue because of deadlocks caused
+	 *       by the rtnl lock */
+	mdev->workqueue = create_singlethread_workqueue("mlx4_en");
+	if (!mdev->workqueue) {
+		err = -ENOMEM;
+		goto err_close_nic;
+	}
+
+	/* At this stage all non-port specific tasks are complete:
+	 * mark the card state as up */
+	mutex_init(&mdev->state_lock);
+	mdev->device_up = true;
+
+	/* Setup ports */
+
+	/* Create a netdev for each port */
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
+		mlx4_info(mdev, "Activating port:%d\n", i);
+		if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) {
+			mdev->pndev[i] = NULL;
+			goto err_free_netdev;
+		}
+	}
+	return mdev;
+
+
+err_free_netdev:
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
+		if (mdev->pndev[i])
+			mlx4_en_destroy_netdev(mdev->pndev[i]);
+	}
+
+	mutex_lock(&mdev->state_lock);
+	mdev->device_up = false;
+	mutex_unlock(&mdev->state_lock);
+	flush_workqueue(mdev->workqueue);
+
+	/* Stop event queue before we drop down to release shared SW state */
+
+err_close_nic:
+	destroy_workqueue(mdev->workqueue);
+err_mr:
+	mlx4_mr_free(dev, &mdev->mr);
+err_uar:
+	mlx4_uar_free(dev, &mdev->priv_uar);
+err_pd:
+	mlx4_pd_free(dev, mdev->priv_pdn);
+err_free_dev:
+	kfree(mdev);
+err_free_res:
+	return NULL;
+}
+
+static struct mlx4_interface mlx4_en_interface = {
+	.add	= mlx4_en_add,
+	.remove	= mlx4_en_remove,
+	.event	= mlx4_en_event,
+};
+
+static int __init mlx4_en_init(void)
+{
+	return mlx4_register_interface(&mlx4_en_interface);
+}
+
+static void __exit mlx4_en_cleanup(void)
+{
+	mlx4_unregister_interface(&mlx4_en_interface);
+}
+
+module_init(mlx4_en_init);
+module_exit(mlx4_en_cleanup);
+
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
new file mode 100644
index 00000000000..a339afbeed3
--- /dev/null
+++ b/drivers/net/mlx4/en_netdev.c
@@ -0,0 +1,1088 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <linux/delay.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/cq.h>
+
+#include "mlx4_en.h"
+#include "en_port.h"
+
+
+static void mlx4_en_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	mlx4_dbg(HW, priv, "Registering VLAN group:%p\n", grp);
+	priv->vlgrp = grp;
+
+	mutex_lock(&mdev->state_lock);
+	if (mdev->device_up && priv->port_up) {
+		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, grp);
+		if (err)
+			mlx4_err(mdev, "Failed configuring VLAN filter\n");
+	}
+	mutex_unlock(&mdev->state_lock);
+}
+
+static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	if (!priv->vlgrp)
+		return;
+
+	mlx4_dbg(HW, priv, "adding VLAN:%d (vlgrp entry:%p)\n",
+		 vid, vlan_group_get_device(priv->vlgrp, vid));
+
+	/* Add VID to port VLAN filter */
+	mutex_lock(&mdev->state_lock);
+	if (mdev->device_up && priv->port_up) {
+		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp);
+		if (err)
+			mlx4_err(mdev, "Failed configuring VLAN filter\n");
+	}
+	mutex_unlock(&mdev->state_lock);
+}
+
+static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	if (!priv->vlgrp)
+		return;
+
+	mlx4_dbg(HW, priv, "Killing VID:%d (vlgrp:%p vlgrp "
+		 "entry:%p)\n", vid, priv->vlgrp,
+		 vlan_group_get_device(priv->vlgrp, vid));
+	vlan_group_set_device(priv->vlgrp, vid, NULL);
+
+	/* Remove VID from port VLAN filter */
+	mutex_lock(&mdev->state_lock);
+	if (mdev->device_up && priv->port_up) {
+		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp);
+		if (err)
+			mlx4_err(mdev, "Failed configuring VLAN filter\n");
+	}
+	mutex_unlock(&mdev->state_lock);
+}
+
+static u64 mlx4_en_mac_to_u64(u8 *addr)
+{
+	u64 mac = 0;
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac <<= 8;
+		mac |= addr[i];
+	}
+	return mac;
+}
+
+static int mlx4_en_set_mac(struct net_device *dev, void *addr)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct sockaddr *saddr = addr;
+
+	if (!is_valid_ether_addr(saddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
+	priv->mac = mlx4_en_mac_to_u64(dev->dev_addr);
+	queue_work(mdev->workqueue, &priv->mac_task);
+	return 0;
+}
+
+static void mlx4_en_do_set_mac(struct work_struct *work)
+{
+	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+						 mac_task);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err = 0;
+
+	mutex_lock(&mdev->state_lock);
+	if (priv->port_up) {
+		/* Remove old MAC and insert the new one */
+		mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
+		err = mlx4_register_mac(mdev->dev, priv->port,
+					priv->mac, &priv->mac_index);
+		if (err)
+			mlx4_err(mdev, "Failed changing HW MAC address\n");
+	} else
+		mlx4_dbg(HW, priv, "Port is down, exiting...\n");
+
+	mutex_unlock(&mdev->state_lock);
+}
+
+static void mlx4_en_clear_list(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct dev_mc_list *plist = priv->mc_list;
+	struct dev_mc_list *next;
+
+	while (plist) {
+		next = plist->next;
+		kfree(plist);
+		plist = next;
+	}
+	priv->mc_list = NULL;
+}
+
+static void mlx4_en_cache_mclist(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct dev_mc_list *mclist;
+	struct dev_mc_list *tmp;
+	struct dev_mc_list *plist = NULL;
+
+	for (mclist = dev->mc_list; mclist; mclist = mclist->next) {
+		tmp = kmalloc(sizeof(struct dev_mc_list), GFP_ATOMIC);
+		if (!tmp) {
+			mlx4_err(mdev, "failed to allocate multicast list\n");
+			mlx4_en_clear_list(dev);
+			return;
+		}
+		memcpy(tmp, mclist, sizeof(struct dev_mc_list));
+		tmp->next = NULL;
+		if (plist)
+			plist->next = tmp;
+		else
+			priv->mc_list = tmp;
+		plist = tmp;
+	}
+}
+
+
+static void mlx4_en_set_multicast(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	if (!priv->port_up)
+		return;
+
+	queue_work(priv->mdev->workqueue, &priv->mcast_task);
+}
+
+static void mlx4_en_do_set_multicast(struct work_struct *work)
+{
+	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+						 mcast_task);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct net_device *dev = priv->dev;
+	struct dev_mc_list *mclist;
+	u64 mcast_addr = 0;
+	int err;
+
+	mutex_lock(&mdev->state_lock);
+	if (!mdev->device_up) {
+		mlx4_dbg(HW, priv, "Card is not up, ignoring "
+				   "multicast change.\n");
+		goto out;
+	}
+	if (!priv->port_up) {
+		mlx4_dbg(HW, priv, "Port is down, ignoring "
+				   "multicast change.\n");
+		goto out;
+	}
+
+	/*
+	 * Promsicuous mode: disable all filters
+	 */
+
+	if (dev->flags & IFF_PROMISC) {
+		if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) {
+			if (netif_msg_rx_status(priv))
+				mlx4_warn(mdev, "Port:%d entering promiscuous mode\n",
+					  priv->port);
+			priv->flags |= MLX4_EN_FLAG_PROMISC;
+
+			/* Enable promiscouos mode */
+			err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
+						     priv->base_qpn, 1);
+			if (err)
+				mlx4_err(mdev, "Failed enabling "
+					 "promiscous mode\n");
+
+			/* Disable port multicast filter (unconditionally) */
+			err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+						  0, MLX4_MCAST_DISABLE);
+			if (err)
+				mlx4_err(mdev, "Failed disabling "
+					 "multicast filter\n");
+
+			/* Disable port VLAN filter */
+			err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL);
+			if (err)
+				mlx4_err(mdev, "Failed disabling "
+					 "VLAN filter\n");
+		}
+		goto out;
+	}
+
+	/*
+	 * Not in promiscous mode
+	 */
+
+	if (priv->flags & MLX4_EN_FLAG_PROMISC) {
+		if (netif_msg_rx_status(priv))
+			mlx4_warn(mdev, "Port:%d leaving promiscuous mode\n",
+				  priv->port);
+		priv->flags &= ~MLX4_EN_FLAG_PROMISC;
+
+		/* Disable promiscouos mode */
+		err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
+					     priv->base_qpn, 0);
+		if (err)
+			mlx4_err(mdev, "Failed disabling promiscous mode\n");
+
+		/* Enable port VLAN filter */
+		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp);
+		if (err)
+			mlx4_err(mdev, "Failed enabling VLAN filter\n");
+	}
+
+	/* Enable/disable the multicast filter according to IFF_ALLMULTI */
+	if (dev->flags & IFF_ALLMULTI) {
+		err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+					  0, MLX4_MCAST_DISABLE);
+		if (err)
+			mlx4_err(mdev, "Failed disabling multicast filter\n");
+	} else {
+		err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+					  0, MLX4_MCAST_DISABLE);
+		if (err)
+			mlx4_err(mdev, "Failed disabling multicast filter\n");
+
+		/* Flush mcast filter and init it with broadcast address */
+		mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST,
+				    1, MLX4_MCAST_CONFIG);
+
+		/* Update multicast list - we cache all addresses so they won't
+		 * change while HW is updated holding the command semaphor */
+		netif_tx_lock_bh(dev);
+		mlx4_en_cache_mclist(dev);
+		netif_tx_unlock_bh(dev);
+		for (mclist = priv->mc_list; mclist; mclist = mclist->next) {
+			mcast_addr = mlx4_en_mac_to_u64(mclist->dmi_addr);
+			mlx4_SET_MCAST_FLTR(mdev->dev, priv->port,
+					    mcast_addr, 0, MLX4_MCAST_CONFIG);
+		}
+		err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+					  0, MLX4_MCAST_ENABLE);
+		if (err)
+			mlx4_err(mdev, "Failed enabling multicast filter\n");
+
+		mlx4_en_clear_list(dev);
+	}
+out:
+	mutex_unlock(&mdev->state_lock);
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void mlx4_en_netpoll(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_cq *cq;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		cq = &priv->rx_cq[i];
+		spin_lock_irqsave(&cq->lock, flags);
+		napi_synchronize(&cq->napi);
+		mlx4_en_process_rx_cq(dev, cq, 0);
+		spin_unlock_irqrestore(&cq->lock, flags);
+	}
+}
+#endif
+
+static void mlx4_en_tx_timeout(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	if (netif_msg_timer(priv))
+		mlx4_warn(mdev, "Tx timeout called on port:%d\n", priv->port);
+
+	if (netif_carrier_ok(dev)) {
+		priv->port_stats.tx_timeout++;
+		mlx4_dbg(DRV, priv, "Scheduling watchdog\n");
+		queue_work(mdev->workqueue, &priv->watchdog_task);
+	}
+}
+
+
+static struct net_device_stats *mlx4_en_get_stats(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	spin_lock_bh(&priv->stats_lock);
+	memcpy(&priv->ret_stats, &priv->stats, sizeof(priv->stats));
+	spin_unlock_bh(&priv->stats_lock);
+
+	return &priv->ret_stats;
+}
+
+static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_cq *cq;
+	int i;
+
+	/* If we haven't received a specific coalescing setting
+	 * (module param), we set the moderation paramters as follows:
+	 * - moder_cnt is set to the number of mtu sized packets to
+	 *   satisfy our coelsing target.
+	 * - moder_time is set to a fixed value.
+	 */
+	priv->rx_frames = (mdev->profile.rx_moder_cnt ==
+			   MLX4_EN_AUTO_CONF) ?
+				MLX4_EN_RX_COAL_TARGET /
+				priv->dev->mtu + 1 :
+				mdev->profile.rx_moder_cnt;
+	priv->rx_usecs = (mdev->profile.rx_moder_time ==
+			  MLX4_EN_AUTO_CONF) ?
+				MLX4_EN_RX_COAL_TIME :
+				mdev->profile.rx_moder_time;
+	mlx4_dbg(INTR, priv, "Default coalesing params for mtu:%d - "
+			     "rx_frames:%d rx_usecs:%d\n",
+		 priv->dev->mtu, priv->rx_frames, priv->rx_usecs);
+
+	/* Setup cq moderation params */
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		cq = &priv->rx_cq[i];
+		cq->moder_cnt = priv->rx_frames;
+		cq->moder_time = priv->rx_usecs;
+	}
+
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		cq = &priv->tx_cq[i];
+		cq->moder_cnt = MLX4_EN_TX_COAL_PKTS;
+		cq->moder_time = MLX4_EN_TX_COAL_TIME;
+	}
+
+	/* Reset auto-moderation params */
+	priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW;
+	priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW;
+	priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH;
+	priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH;
+	priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL;
+	priv->adaptive_rx_coal = mdev->profile.auto_moder;
+	priv->last_moder_time = MLX4_EN_AUTO_CONF;
+	priv->last_moder_jiffies = 0;
+	priv->last_moder_packets = 0;
+	priv->last_moder_tx_packets = 0;
+	priv->last_moder_bytes = 0;
+}
+
+static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv)
+{
+	unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_cq *cq;
+	unsigned long packets;
+	unsigned long rate;
+	unsigned long avg_pkt_size;
+	unsigned long rx_packets;
+	unsigned long rx_bytes;
+	unsigned long tx_packets;
+	unsigned long tx_pkt_diff;
+	unsigned long rx_pkt_diff;
+	int moder_time;
+	int i, err;
+
+	if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ)
+		return;
+
+	spin_lock_bh(&priv->stats_lock);
+	rx_packets = priv->stats.rx_packets;
+	rx_bytes = priv->stats.rx_bytes;
+	tx_packets = priv->stats.tx_packets;
+	spin_unlock_bh(&priv->stats_lock);
+
+	if (!priv->last_moder_jiffies || !period)
+		goto out;
+
+	tx_pkt_diff = ((unsigned long) (tx_packets -
+					priv->last_moder_tx_packets));
+	rx_pkt_diff = ((unsigned long) (rx_packets -
+					priv->last_moder_packets));
+	packets = max(tx_pkt_diff, rx_pkt_diff);
+	rate = packets * HZ / period;
+	avg_pkt_size = packets ? ((unsigned long) (rx_bytes -
+				 priv->last_moder_bytes)) / packets : 0;
+
+	/* Apply auto-moderation only when packet rate exceeds a rate that
+	 * it matters */
+	if (rate > MLX4_EN_RX_RATE_THRESH) {
+		/* If tx and rx packet rates are not balanced, assume that
+		 * traffic is mainly BW bound and apply maximum moderation.
+		 * Otherwise, moderate according to packet rate */
+		if (2 * tx_pkt_diff > 3 * rx_pkt_diff ||
+		    2 * rx_pkt_diff > 3 * tx_pkt_diff) {
+			moder_time = priv->rx_usecs_high;
+		} else {
+			if (rate < priv->pkt_rate_low)
+				moder_time = priv->rx_usecs_low;
+			else if (rate > priv->pkt_rate_high)
+				moder_time = priv->rx_usecs_high;
+			else
+				moder_time = (rate - priv->pkt_rate_low) *
+					(priv->rx_usecs_high - priv->rx_usecs_low) /
+					(priv->pkt_rate_high - priv->pkt_rate_low) +
+					priv->rx_usecs_low;
+		}
+	} else {
+		/* When packet rate is low, use default moderation rather than
+		 * 0 to prevent interrupt storms if traffic suddenly increases */
+		moder_time = priv->rx_usecs;
+	}
+
+	mlx4_dbg(INTR, priv, "tx rate:%lu rx_rate:%lu\n",
+		 tx_pkt_diff * HZ / period, rx_pkt_diff * HZ / period);
+
+	mlx4_dbg(INTR, priv, "Rx moder_time changed from:%d to %d period:%lu "
+		 "[jiff] packets:%lu avg_pkt_size:%lu rate:%lu [p/s])\n",
+		 priv->last_moder_time, moder_time, period, packets,
+		 avg_pkt_size, rate);
+
+	if (moder_time != priv->last_moder_time) {
+		priv->last_moder_time = moder_time;
+		for (i = 0; i < priv->rx_ring_num; i++) {
+			cq = &priv->rx_cq[i];
+			cq->moder_time = moder_time;
+			err = mlx4_en_set_cq_moder(priv, cq);
+			if (err) {
+				mlx4_err(mdev, "Failed modifying moderation for cq:%d "
+					 "on port:%d\n", i, priv->port);
+				break;
+			}
+		}
+	}
+
+out:
+	priv->last_moder_packets = rx_packets;
+	priv->last_moder_tx_packets = tx_packets;
+	priv->last_moder_bytes = rx_bytes;
+	priv->last_moder_jiffies = jiffies;
+}
+
+static void mlx4_en_do_get_stats(struct work_struct *work)
+{
+	struct delayed_work *delay = container_of(work, struct delayed_work, work);
+	struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
+						 stats_task);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0);
+	if (err)
+		mlx4_dbg(HW, priv, "Could not update stats for "
+				   "port:%d\n", priv->port);
+
+	mutex_lock(&mdev->state_lock);
+	if (mdev->device_up) {
+		if (priv->port_up)
+			mlx4_en_auto_moderation(priv);
+
+		queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
+	}
+	mutex_unlock(&mdev->state_lock);
+}
+
+static void mlx4_en_linkstate(struct work_struct *work)
+{
+	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+						 linkstate_task);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int linkstate = priv->link_state;
+
+	mutex_lock(&mdev->state_lock);
+	/* If observable port state changed set carrier state and
+	 * report to system log */
+	if (priv->last_link_state != linkstate) {
+		if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) {
+			if (netif_msg_link(priv))
+				mlx4_info(mdev, "Port %d - link down\n", priv->port);
+			netif_carrier_off(priv->dev);
+		} else {
+			if (netif_msg_link(priv))
+				mlx4_info(mdev, "Port %d - link up\n", priv->port);
+			netif_carrier_on(priv->dev);
+		}
+	}
+	priv->last_link_state = linkstate;
+	mutex_unlock(&mdev->state_lock);
+}
+
+
+static int mlx4_en_start_port(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_cq *cq;
+	struct mlx4_en_tx_ring *tx_ring;
+	struct mlx4_en_rx_ring *rx_ring;
+	int rx_index = 0;
+	int tx_index = 0;
+	u16 stride;
+	int err = 0;
+	int i;
+	int j;
+
+	if (priv->port_up) {
+		mlx4_dbg(DRV, priv, "start port called while port already up\n");
+		return 0;
+	}
+
+	/* Calculate Rx buf size */
+	dev->mtu = min(dev->mtu, priv->max_mtu);
+	mlx4_en_calc_rx_buf(dev);
+	mlx4_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_skb_size);
+	stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+				    DS_SIZE * priv->num_frags);
+	/* Configure rx cq's and rings */
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		cq = &priv->rx_cq[i];
+		rx_ring = &priv->rx_ring[i];
+
+		err = mlx4_en_activate_cq(priv, cq);
+		if (err) {
+			mlx4_err(mdev, "Failed activating Rx CQ\n");
+			goto rx_err;
+		}
+		for (j = 0; j < cq->size; j++)
+			cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+		err = mlx4_en_set_cq_moder(priv, cq);
+		if (err) {
+			mlx4_err(mdev, "Failed setting cq moderation parameters");
+			mlx4_en_deactivate_cq(priv, cq);
+			goto cq_err;
+		}
+		mlx4_en_arm_cq(priv, cq);
+
+		++rx_index;
+	}
+
+	err = mlx4_en_activate_rx_rings(priv);
+	if (err) {
+		mlx4_err(mdev, "Failed to activate RX rings\n");
+		goto cq_err;
+	}
+
+	err = mlx4_en_config_rss_steer(priv);
+	if (err) {
+		mlx4_err(mdev, "Failed configuring rss steering\n");
+		goto rx_err;
+	}
+
+	/* Configure tx cq's and rings */
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		/* Configure cq */
+		cq = &priv->tx_cq[i];
+		err = mlx4_en_activate_cq(priv, cq);
+		if (err) {
+			mlx4_err(mdev, "Failed allocating Tx CQ\n");
+			goto tx_err;
+		}
+		err = mlx4_en_set_cq_moder(priv, cq);
+		if (err) {
+			mlx4_err(mdev, "Failed setting cq moderation parameters");
+			mlx4_en_deactivate_cq(priv, cq);
+			goto tx_err;
+		}
+		mlx4_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i);
+		cq->buf->wqe_index = cpu_to_be16(0xffff);
+
+		/* Configure ring */
+		tx_ring = &priv->tx_ring[i];
+		err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn,
+					       priv->rx_ring[0].srq.srqn);
+		if (err) {
+			mlx4_err(mdev, "Failed allocating Tx ring\n");
+			mlx4_en_deactivate_cq(priv, cq);
+			goto tx_err;
+		}
+		/* Set initial ownership of all Tx TXBBs to SW (1) */
+		for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
+			*((u32 *) (tx_ring->buf + j)) = 0xffffffff;
+		++tx_index;
+	}
+
+	/* Configure port */
+	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+				    priv->rx_skb_size + ETH_FCS_LEN,
+				    mdev->profile.tx_pause,
+				    mdev->profile.tx_ppp,
+				    mdev->profile.rx_pause,
+				    mdev->profile.rx_ppp);
+	if (err) {
+		mlx4_err(mdev, "Failed setting port general configurations"
+			       " for port %d, with error %d\n", priv->port, err);
+		goto tx_err;
+	}
+	/* Set default qp number */
+	err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0);
+	if (err) {
+		mlx4_err(mdev, "Failed setting default qp numbers\n");
+		goto tx_err;
+	}
+	/* Set port mac number */
+	mlx4_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
+	err = mlx4_register_mac(mdev->dev, priv->port,
+				priv->mac, &priv->mac_index);
+	if (err) {
+		mlx4_err(mdev, "Failed setting port mac\n");
+		goto tx_err;
+	}
+
+	/* Init port */
+	mlx4_dbg(HW, priv, "Initializing port\n");
+	err = mlx4_INIT_PORT(mdev->dev, priv->port);
+	if (err) {
+		mlx4_err(mdev, "Failed Initializing port\n");
+		goto mac_err;
+	}
+
+	/* Schedule multicast task to populate multicast list */
+	queue_work(mdev->workqueue, &priv->mcast_task);
+
+	priv->port_up = true;
+	netif_start_queue(dev);
+	return 0;
+
+mac_err:
+	mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
+tx_err:
+	while (tx_index--) {
+		mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]);
+		mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]);
+	}
+
+	mlx4_en_release_rss_steer(priv);
+rx_err:
+	for (i = 0; i < priv->rx_ring_num; i++)
+		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[rx_index]);
+cq_err:
+	while (rx_index--)
+		mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
+
+	return err; /* need to close devices */
+}
+
+
+static void mlx4_en_stop_port(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int i;
+
+	if (!priv->port_up) {
+		mlx4_dbg(DRV, priv, "stop port (%d) called while port already down\n",
+			 priv->port);
+		return;
+	}
+	netif_stop_queue(dev);
+
+	/* Synchronize with tx routine */
+	netif_tx_lock_bh(dev);
+	priv->port_up = false;
+	netif_tx_unlock_bh(dev);
+
+	/* close port*/
+	mlx4_CLOSE_PORT(mdev->dev, priv->port);
+
+	/* Unregister Mac address for the port */
+	mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
+
+	/* Free TX Rings */
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]);
+		mlx4_en_deactivate_cq(priv, &priv->tx_cq[i]);
+	}
+	msleep(10);
+
+	for (i = 0; i < priv->tx_ring_num; i++)
+		mlx4_en_free_tx_buf(dev, &priv->tx_ring[i]);
+
+	/* Free RSS qps */
+	mlx4_en_release_rss_steer(priv);
+
+	/* Free RX Rings */
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
+		while (test_bit(NAPI_STATE_SCHED, &priv->rx_cq[i].napi.state))
+			msleep(1);
+		mlx4_en_deactivate_cq(priv, &priv->rx_cq[i]);
+	}
+}
+
+static void mlx4_en_restart(struct work_struct *work)
+{
+	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+						 watchdog_task);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct net_device *dev = priv->dev;
+
+	mlx4_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port);
+	mlx4_en_stop_port(dev);
+	if (mlx4_en_start_port(dev))
+	    mlx4_err(mdev, "Failed restarting port %d\n", priv->port);
+}
+
+
+static int mlx4_en_open(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int i;
+	int err = 0;
+
+	mutex_lock(&mdev->state_lock);
+
+	if (!mdev->device_up) {
+		mlx4_err(mdev, "Cannot open - device down/disabled\n");
+		err = -EBUSY;
+		goto out;
+	}
+
+	/* Reset HW statistics and performance counters */
+	if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1))
+		mlx4_dbg(HW, priv, "Failed dumping statistics\n");
+
+	memset(&priv->stats, 0, sizeof(priv->stats));
+	memset(&priv->pstats, 0, sizeof(priv->pstats));
+
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		priv->tx_ring[i].bytes = 0;
+		priv->tx_ring[i].packets = 0;
+	}
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		priv->rx_ring[i].bytes = 0;
+		priv->rx_ring[i].packets = 0;
+	}
+
+	mlx4_en_set_default_moderation(priv);
+	err = mlx4_en_start_port(dev);
+	if (err)
+		mlx4_err(mdev, "Failed starting port:%d\n", priv->port);
+
+out:
+	mutex_unlock(&mdev->state_lock);
+	return err;
+}
+
+
+static int mlx4_en_close(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	if (netif_msg_ifdown(priv))
+		mlx4_info(mdev, "Close called for port:%d\n", priv->port);
+
+	mutex_lock(&mdev->state_lock);
+
+	mlx4_en_stop_port(dev);
+	netif_carrier_off(dev);
+
+	mutex_unlock(&mdev->state_lock);
+	return 0;
+}
+
+static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		if (priv->tx_ring[i].tx_info)
+			mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
+		if (priv->tx_cq[i].buf)
+			mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
+	}
+
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		if (priv->rx_ring[i].rx_info)
+			mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i]);
+		if (priv->rx_cq[i].buf)
+			mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
+	}
+}
+
+static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile *prof = priv->prof;
+	int i;
+
+	/* Create tx Rings */
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
+				      prof->tx_ring_size, i, TX))
+			goto err;
+
+		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
+					   prof->tx_ring_size, TXBB_SIZE))
+			goto err;
+	}
+
+	/* Create rx Rings */
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		if (mlx4_en_create_cq(priv, &priv->rx_cq[i],
+				      prof->rx_ring_size, i, RX))
+			goto err;
+
+		if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
+					   prof->rx_ring_size, priv->stride))
+			goto err;
+	}
+
+	return 0;
+
+err:
+	mlx4_err(mdev, "Failed to allocate NIC resources\n");
+	return -ENOMEM;
+}
+
+
+void mlx4_en_destroy_netdev(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	mlx4_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
+
+	/* Unregister device - this will close the port if it was up */
+	if (priv->registered)
+		unregister_netdev(dev);
+
+	if (priv->allocated)
+		mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE);
+
+	cancel_delayed_work(&priv->stats_task);
+	cancel_delayed_work(&priv->refill_task);
+	/* flush any pending task for this netdev */
+	flush_workqueue(mdev->workqueue);
+
+	/* Detach the netdev so tasks would not attempt to access it */
+	mutex_lock(&mdev->state_lock);
+	mdev->pndev[priv->port] = NULL;
+	mutex_unlock(&mdev->state_lock);
+
+	mlx4_en_free_resources(priv);
+	free_netdev(dev);
+}
+
+static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err = 0;
+
+	mlx4_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n",
+		 dev->mtu, new_mtu);
+
+	if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) {
+		mlx4_err(mdev, "Bad MTU size:%d.\n", new_mtu);
+		return -EPERM;
+	}
+	dev->mtu = new_mtu;
+
+	if (netif_running(dev)) {
+		mutex_lock(&mdev->state_lock);
+		if (!mdev->device_up) {
+			/* NIC is probably restarting - let watchdog task reset
+			 * the port */
+			mlx4_dbg(DRV, priv, "Change MTU called with card down!?\n");
+		} else {
+			mlx4_en_stop_port(dev);
+			mlx4_en_set_default_moderation(priv);
+			err = mlx4_en_start_port(dev);
+			if (err) {
+				mlx4_err(mdev, "Failed restarting port:%d\n",
+					 priv->port);
+				queue_work(mdev->workqueue, &priv->watchdog_task);
+			}
+		}
+		mutex_unlock(&mdev->state_lock);
+	}
+	return 0;
+}
+
+int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
+			struct mlx4_en_port_profile *prof)
+{
+	struct net_device *dev;
+	struct mlx4_en_priv *priv;
+	int i;
+	int err;
+
+	dev = alloc_etherdev(sizeof(struct mlx4_en_priv));
+	if (dev == NULL) {
+		mlx4_err(mdev, "Net device allocation failed\n");
+		return -ENOMEM;
+	}
+
+	SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev);
+
+	/*
+	 * Initialize driver private data
+	 */
+
+	priv = netdev_priv(dev);
+	memset(priv, 0, sizeof(struct mlx4_en_priv));
+	priv->dev = dev;
+	priv->mdev = mdev;
+	priv->prof = prof;
+	priv->port = port;
+	priv->port_up = false;
+	priv->rx_csum = 1;
+	priv->flags = prof->flags;
+	priv->tx_ring_num = prof->tx_ring_num;
+	priv->rx_ring_num = prof->rx_ring_num;
+	priv->mc_list = NULL;
+	priv->mac_index = -1;
+	priv->msg_enable = MLX4_EN_MSG_LEVEL;
+	spin_lock_init(&priv->stats_lock);
+	INIT_WORK(&priv->mcast_task, mlx4_en_do_set_multicast);
+	INIT_WORK(&priv->mac_task, mlx4_en_do_set_mac);
+	INIT_DELAYED_WORK(&priv->refill_task, mlx4_en_rx_refill);
+	INIT_WORK(&priv->watchdog_task, mlx4_en_restart);
+	INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
+	INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
+
+	/* Query for default mac and max mtu */
+	priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
+	priv->mac = mdev->dev->caps.def_mac[priv->port];
+	if (ILLEGAL_MAC(priv->mac)) {
+		mlx4_err(mdev, "Port: %d, invalid mac burned: 0x%llx, quiting\n",
+			 priv->port, priv->mac);
+		err = -EINVAL;
+		goto out;
+	}
+
+	priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+					  DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
+	err = mlx4_en_alloc_resources(priv);
+	if (err)
+		goto out;
+
+	/* Populate Rx default RSS mappings */
+	mlx4_en_set_default_rss_map(priv, &priv->rss_map, priv->rx_ring_num *
+						RSS_FACTOR, priv->rx_ring_num);
+	/* Allocate page for receive rings */
+	err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
+				MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE);
+	if (err) {
+		mlx4_err(mdev, "Failed to allocate page for rx qps\n");
+		goto out;
+	}
+	priv->allocated = 1;
+
+	/* Populate Tx priority mappings */
+	mlx4_en_set_prio_map(priv, priv->tx_prio_map, prof->tx_ring_num);
+
+	/*
+	 * Initialize netdev entry points
+	 */
+
+	dev->open = &mlx4_en_open;
+	dev->stop = &mlx4_en_close;
+	dev->hard_start_xmit = &mlx4_en_xmit;
+	dev->get_stats = &mlx4_en_get_stats;
+	dev->set_multicast_list = &mlx4_en_set_multicast;
+	dev->set_mac_address = &mlx4_en_set_mac;
+	dev->change_mtu = &mlx4_en_change_mtu;
+	dev->tx_timeout = &mlx4_en_tx_timeout;
+	dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT;
+	dev->vlan_rx_register = mlx4_en_vlan_rx_register;
+	dev->vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid;
+	dev->vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	dev->poll_controller = mlx4_en_netpoll;
+#endif
+	SET_ETHTOOL_OPS(dev, &mlx4_en_ethtool_ops);
+
+	/* Set defualt MAC */
+	dev->addr_len = ETH_ALEN;
+	for (i = 0; i < ETH_ALEN; i++)
+		dev->dev_addr[ETH_ALEN - 1 - i] =
+		(u8) (priv->mac >> (8 * i));
+
+	/*
+	 * Set driver features
+	 */
+	dev->features |= NETIF_F_SG;
+	dev->features |= NETIF_F_HW_CSUM;
+	dev->features |= NETIF_F_HIGHDMA;
+	dev->features |= NETIF_F_HW_VLAN_TX |
+			 NETIF_F_HW_VLAN_RX |
+			 NETIF_F_HW_VLAN_FILTER;
+	if (mdev->profile.num_lro)
+		dev->features |= NETIF_F_LRO;
+	if (mdev->LSO_support) {
+		dev->features |= NETIF_F_TSO;
+		dev->features |= NETIF_F_TSO6;
+	}
+
+	mdev->pndev[port] = dev;
+
+	netif_carrier_off(dev);
+	err = register_netdev(dev);
+	if (err) {
+		mlx4_err(mdev, "Netdev registration failed\n");
+		goto out;
+	}
+	priv->registered = 1;
+	queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
+	return 0;
+
+out:
+	mlx4_en_destroy_netdev(dev);
+	return err;
+}
+
diff --git a/drivers/net/mlx4/en_params.c b/drivers/net/mlx4/en_params.c
new file mode 100644
index 00000000000..c2e69b1bcd0
--- /dev/null
+++ b/drivers/net/mlx4/en_params.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+
+#include "mlx4_en.h"
+#include "en_port.h"
+
+#define MLX4_EN_PARM_INT(X, def_val, desc) \
+	static unsigned int X = def_val;\
+	module_param(X , uint, 0444); \
+	MODULE_PARM_DESC(X, desc);
+
+
+/*
+ * Device scope module parameters
+ */
+
+
+/* Use a XOR rathern than Toeplitz hash function for RSS */
+MLX4_EN_PARM_INT(rss_xor, 0, "Use XOR hash function for RSS");
+
+/* RSS hash type mask - default to <saddr, daddr, sport, dport> */
+MLX4_EN_PARM_INT(rss_mask, 0xf, "RSS hash type bitmask");
+
+/* Number of LRO sessions per Rx ring (rounded up to a power of two) */
+MLX4_EN_PARM_INT(num_lro, MLX4_EN_MAX_LRO_DESCRIPTORS,
+		 "Number of LRO sessions per ring or disabled (0)");
+
+/* Priority pausing */
+MLX4_EN_PARM_INT(pptx, MLX4_EN_DEF_TX_PAUSE,
+		 "Pause policy on TX: 0 never generate pause frames "
+		 "1 generate pause frames according to RX buffer threshold");
+MLX4_EN_PARM_INT(pprx, MLX4_EN_DEF_RX_PAUSE,
+		 "Pause policy on RX: 0 ignore received pause frames "
+		 "1 respect received pause frames");
+MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]."
+			   " Per priority bit mask");
+MLX4_EN_PARM_INT(pfcrx, 0, "Priority based Flow Control policy on RX[7:0]."
+			   " Per priority bit mask");
+
+/* Interrupt moderation tunning */
+MLX4_EN_PARM_INT(rx_moder_cnt, MLX4_EN_AUTO_CONF,
+	       "Max coalesced descriptors for Rx interrupt moderation");
+MLX4_EN_PARM_INT(rx_moder_time, MLX4_EN_AUTO_CONF,
+	       "Timeout following last packet for Rx interrupt moderation");
+MLX4_EN_PARM_INT(auto_moder, 1, "Enable dynamic interrupt moderation");
+
+MLX4_EN_PARM_INT(rx_ring_num1, 0, "Number or Rx rings for port 1 (0 = #cores)");
+MLX4_EN_PARM_INT(rx_ring_num2, 0, "Number or Rx rings for port 2 (0 = #cores)");
+
+MLX4_EN_PARM_INT(tx_ring_size1, MLX4_EN_AUTO_CONF, "Tx ring size for port 1");
+MLX4_EN_PARM_INT(tx_ring_size2, MLX4_EN_AUTO_CONF, "Tx ring size for port 2");
+MLX4_EN_PARM_INT(rx_ring_size1, MLX4_EN_AUTO_CONF, "Rx ring size for port 1");
+MLX4_EN_PARM_INT(rx_ring_size2, MLX4_EN_AUTO_CONF, "Rx ring size for port 2");
+
+
+int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
+{
+	struct mlx4_en_profile *params = &mdev->profile;
+
+	params->rx_moder_cnt = min_t(int, rx_moder_cnt, MLX4_EN_AUTO_CONF);
+	params->rx_moder_time = min_t(int, rx_moder_time, MLX4_EN_AUTO_CONF);
+	params->auto_moder = auto_moder;
+	params->rss_xor = (rss_xor != 0);
+	params->rss_mask = rss_mask & 0x1f;
+	params->num_lro = min_t(int, num_lro , MLX4_EN_MAX_LRO_DESCRIPTORS);
+	params->rx_pause = pprx;
+	params->rx_ppp = pfcrx;
+	params->tx_pause = pptx;
+	params->tx_ppp = pfctx;
+	if (params->rx_ppp || params->tx_ppp) {
+		params->prof[1].tx_ring_num = MLX4_EN_TX_RING_NUM;
+		params->prof[2].tx_ring_num = MLX4_EN_TX_RING_NUM;
+	} else {
+		params->prof[1].tx_ring_num = 1;
+		params->prof[2].tx_ring_num = 1;
+	}
+	params->prof[1].rx_ring_num = min_t(int, rx_ring_num1, MAX_RX_RINGS);
+	params->prof[2].rx_ring_num = min_t(int, rx_ring_num2, MAX_RX_RINGS);
+
+	if (tx_ring_size1 == MLX4_EN_AUTO_CONF)
+		tx_ring_size1 = MLX4_EN_DEF_TX_RING_SIZE;
+	params->prof[1].tx_ring_size =
+		(tx_ring_size1 < MLX4_EN_MIN_TX_SIZE) ?
+		 MLX4_EN_MIN_TX_SIZE : roundup_pow_of_two(tx_ring_size1);
+
+	if (tx_ring_size2 == MLX4_EN_AUTO_CONF)
+		tx_ring_size2 = MLX4_EN_DEF_TX_RING_SIZE;
+	params->prof[2].tx_ring_size =
+		(tx_ring_size2 < MLX4_EN_MIN_TX_SIZE) ?
+		 MLX4_EN_MIN_TX_SIZE : roundup_pow_of_two(tx_ring_size2);
+
+	if (rx_ring_size1 == MLX4_EN_AUTO_CONF)
+		rx_ring_size1 = MLX4_EN_DEF_RX_RING_SIZE;
+	params->prof[1].rx_ring_size =
+		(rx_ring_size1 < MLX4_EN_MIN_RX_SIZE) ?
+		 MLX4_EN_MIN_RX_SIZE : roundup_pow_of_two(rx_ring_size1);
+
+	if (rx_ring_size2 == MLX4_EN_AUTO_CONF)
+		rx_ring_size2 = MLX4_EN_DEF_RX_RING_SIZE;
+	params->prof[2].rx_ring_size =
+		(rx_ring_size2 < MLX4_EN_MIN_RX_SIZE) ?
+		 MLX4_EN_MIN_RX_SIZE : roundup_pow_of_two(rx_ring_size2);
+	return 0;
+}
+
+
+/*
+ * Ethtool support
+ */
+
+static void mlx4_en_update_lro_stats(struct mlx4_en_priv *priv)
+{
+	int i;
+
+	priv->port_stats.lro_aggregated = 0;
+	priv->port_stats.lro_flushed = 0;
+	priv->port_stats.lro_no_desc = 0;
+
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		priv->port_stats.lro_aggregated += priv->rx_ring[i].lro.stats.aggregated;
+		priv->port_stats.lro_flushed += priv->rx_ring[i].lro.stats.flushed;
+		priv->port_stats.lro_no_desc += priv->rx_ring[i].lro.stats.no_desc;
+	}
+}
+
+static void
+mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	sprintf(drvinfo->driver, DRV_NAME " (%s)", mdev->dev->board_id);
+	strncpy(drvinfo->version, DRV_VERSION " (" DRV_RELDATE ")", 32);
+	sprintf(drvinfo->fw_version, "%d.%d.%d",
+		(u16) (mdev->dev->caps.fw_ver >> 32),
+		(u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff),
+		(u16) (mdev->dev->caps.fw_ver & 0xffff));
+	strncpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), 32);
+	drvinfo->n_stats = 0;
+	drvinfo->regdump_len = 0;
+	drvinfo->eedump_len = 0;
+}
+
+static u32 mlx4_en_get_tso(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_TSO) != 0;
+}
+
+static int mlx4_en_set_tso(struct net_device *dev, u32 data)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	if (data) {
+		if (!priv->mdev->LSO_support)
+			return -EPERM;
+		dev->features |= (NETIF_F_TSO | NETIF_F_TSO6);
+	} else
+		dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
+	return 0;
+}
+
+static u32 mlx4_en_get_rx_csum(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	return priv->rx_csum;
+}
+
+static int mlx4_en_set_rx_csum(struct net_device *dev, u32 data)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	priv->rx_csum = (data != 0);
+	return 0;
+}
+
+static const char main_strings[][ETH_GSTRING_LEN] = {
+	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
+	"tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
+	"rx_length_errors", "rx_over_errors", "rx_crc_errors",
+	"rx_frame_errors", "rx_fifo_errors", "rx_missed_errors",
+	"tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors",
+	"tx_heartbeat_errors", "tx_window_errors",
+
+	/* port statistics */
+	"lro_aggregated", "lro_flushed", "lro_no_desc", "tso_packets",
+	"queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed",
+	"rx_csum_good", "rx_csum_none", "tx_chksum_offload",
+
+	/* packet statistics */
+	"broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3",
+	"rx_prio_4", "rx_prio_5", "rx_prio_6", "rx_prio_7", "tx_prio_0",
+	"tx_prio_1", "tx_prio_2", "tx_prio_3", "tx_prio_4", "tx_prio_5",
+	"tx_prio_6", "tx_prio_7",
+};
+#define NUM_MAIN_STATS	21
+#define NUM_ALL_STATS	(NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + NUM_PERF_STATS)
+
+static u32 mlx4_en_get_msglevel(struct net_device *dev)
+{
+	return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable;
+}
+
+static void mlx4_en_set_msglevel(struct net_device *dev, u32 val)
+{
+	((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable = val;
+}
+
+static void mlx4_en_get_wol(struct net_device *netdev,
+			    struct ethtool_wolinfo *wol)
+{
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	return;
+}
+
+static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	if (sset != ETH_SS_STATS)
+		return -EOPNOTSUPP;
+
+	return NUM_ALL_STATS + (priv->tx_ring_num + priv->rx_ring_num) * 2;
+}
+
+static void mlx4_en_get_ethtool_stats(struct net_device *dev,
+		struct ethtool_stats *stats, uint64_t *data)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int index = 0;
+	int i;
+
+	spin_lock_bh(&priv->stats_lock);
+
+	mlx4_en_update_lro_stats(priv);
+
+	for (i = 0; i < NUM_MAIN_STATS; i++)
+		data[index++] = ((unsigned long *) &priv->stats)[i];
+	for (i = 0; i < NUM_PORT_STATS; i++)
+		data[index++] = ((unsigned long *) &priv->port_stats)[i];
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		data[index++] = priv->tx_ring[i].packets;
+		data[index++] = priv->tx_ring[i].bytes;
+	}
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		data[index++] = priv->rx_ring[i].packets;
+		data[index++] = priv->rx_ring[i].bytes;
+	}
+	for (i = 0; i < NUM_PKT_STATS; i++)
+		data[index++] = ((unsigned long *) &priv->pkstats)[i];
+	spin_unlock_bh(&priv->stats_lock);
+
+}
+
+static void mlx4_en_get_strings(struct net_device *dev,
+				uint32_t stringset, uint8_t *data)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int index = 0;
+	int i;
+
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	/* Add main counters */
+	for (i = 0; i < NUM_MAIN_STATS; i++)
+		strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]);
+	for (i = 0; i < NUM_PORT_STATS; i++)
+		strcpy(data + (index++) * ETH_GSTRING_LEN,
+			main_strings[i + NUM_MAIN_STATS]);
+	for (i = 0; i < priv->tx_ring_num; i++) {
+		sprintf(data + (index++) * ETH_GSTRING_LEN,
+			"tx%d_packets", i);
+		sprintf(data + (index++) * ETH_GSTRING_LEN,
+			"tx%d_bytes", i);
+	}
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		sprintf(data + (index++) * ETH_GSTRING_LEN,
+			"rx%d_packets", i);
+		sprintf(data + (index++) * ETH_GSTRING_LEN,
+			"rx%d_bytes", i);
+	}
+	for (i = 0; i < NUM_PKT_STATS; i++)
+		strcpy(data + (index++) * ETH_GSTRING_LEN,
+			main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]);
+}
+
+static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	cmd->autoneg = AUTONEG_DISABLE;
+	cmd->supported = SUPPORTED_10000baseT_Full;
+	cmd->advertising = SUPPORTED_10000baseT_Full;
+	if (netif_carrier_ok(dev)) {
+		cmd->speed = SPEED_10000;
+		cmd->duplex = DUPLEX_FULL;
+	} else {
+		cmd->speed = -1;
+		cmd->duplex = -1;
+	}
+	return 0;
+}
+
+static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	if ((cmd->autoneg == AUTONEG_ENABLE) ||
+	    (cmd->speed != SPEED_10000) || (cmd->duplex != DUPLEX_FULL))
+		return -EINVAL;
+
+	/* Nothing to change */
+	return 0;
+}
+
+static int mlx4_en_get_coalesce(struct net_device *dev,
+			      struct ethtool_coalesce *coal)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	coal->tx_coalesce_usecs = 0;
+	coal->tx_max_coalesced_frames = 0;
+	coal->rx_coalesce_usecs = priv->rx_usecs;
+	coal->rx_max_coalesced_frames = priv->rx_frames;
+
+	coal->pkt_rate_low = priv->pkt_rate_low;
+	coal->rx_coalesce_usecs_low = priv->rx_usecs_low;
+	coal->pkt_rate_high = priv->pkt_rate_high;
+	coal->rx_coalesce_usecs_high = priv->rx_usecs_high;
+	coal->rate_sample_interval = priv->sample_interval;
+	coal->use_adaptive_rx_coalesce = priv->adaptive_rx_coal;
+	return 0;
+}
+
+static int mlx4_en_set_coalesce(struct net_device *dev,
+			      struct ethtool_coalesce *coal)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int err, i;
+
+	priv->rx_frames = (coal->rx_max_coalesced_frames ==
+			   MLX4_EN_AUTO_CONF) ?
+				MLX4_EN_RX_COAL_TARGET /
+				priv->dev->mtu + 1 :
+				coal->rx_max_coalesced_frames;
+	priv->rx_usecs = (coal->rx_coalesce_usecs ==
+			  MLX4_EN_AUTO_CONF) ?
+				MLX4_EN_RX_COAL_TIME :
+				coal->rx_coalesce_usecs;
+
+	/* Set adaptive coalescing params */
+	priv->pkt_rate_low = coal->pkt_rate_low;
+	priv->rx_usecs_low = coal->rx_coalesce_usecs_low;
+	priv->pkt_rate_high = coal->pkt_rate_high;
+	priv->rx_usecs_high = coal->rx_coalesce_usecs_high;
+	priv->sample_interval = coal->rate_sample_interval;
+	priv->adaptive_rx_coal = coal->use_adaptive_rx_coalesce;
+	priv->last_moder_time = MLX4_EN_AUTO_CONF;
+	if (priv->adaptive_rx_coal)
+		return 0;
+
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		priv->rx_cq[i].moder_cnt = priv->rx_frames;
+		priv->rx_cq[i].moder_time = priv->rx_usecs;
+		err = mlx4_en_set_cq_moder(priv, &priv->rx_cq[i]);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int mlx4_en_set_pauseparam(struct net_device *dev,
+				struct ethtool_pauseparam *pause)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	mdev->profile.tx_pause = pause->tx_pause != 0;
+	mdev->profile.rx_pause = pause->rx_pause != 0;
+	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+				    priv->rx_skb_size + ETH_FCS_LEN,
+				    mdev->profile.tx_pause,
+				    mdev->profile.tx_ppp,
+				    mdev->profile.rx_pause,
+				    mdev->profile.rx_ppp);
+	if (err)
+		mlx4_err(mdev, "Failed setting pause params to\n");
+
+	return err;
+}
+
+static void mlx4_en_get_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	pause->tx_pause = mdev->profile.tx_pause;
+	pause->rx_pause = mdev->profile.rx_pause;
+}
+
+static void mlx4_en_get_ringparam(struct net_device *dev,
+				  struct ethtool_ringparam *param)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	memset(param, 0, sizeof(*param));
+	param->rx_max_pending = mdev->dev->caps.max_rq_sg;
+	param->tx_max_pending = mdev->dev->caps.max_sq_sg;
+	param->rx_pending = mdev->profile.prof[priv->port].rx_ring_size;
+	param->tx_pending = mdev->profile.prof[priv->port].tx_ring_size;
+}
+
+const struct ethtool_ops mlx4_en_ethtool_ops = {
+	.get_drvinfo = mlx4_en_get_drvinfo,
+	.get_settings = mlx4_en_get_settings,
+	.set_settings = mlx4_en_set_settings,
+#ifdef NETIF_F_TSO
+	.get_tso = mlx4_en_get_tso,
+	.set_tso = mlx4_en_set_tso,
+#endif
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = ethtool_op_set_sg,
+	.get_link = ethtool_op_get_link,
+	.get_rx_csum = mlx4_en_get_rx_csum,
+	.set_rx_csum = mlx4_en_set_rx_csum,
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = ethtool_op_set_tx_ipv6_csum,
+	.get_strings = mlx4_en_get_strings,
+	.get_sset_count = mlx4_en_get_sset_count,
+	.get_ethtool_stats = mlx4_en_get_ethtool_stats,
+	.get_wol = mlx4_en_get_wol,
+	.get_msglevel = mlx4_en_get_msglevel,
+	.set_msglevel = mlx4_en_set_msglevel,
+	.get_coalesce = mlx4_en_get_coalesce,
+	.set_coalesce = mlx4_en_set_coalesce,
+	.get_pauseparam = mlx4_en_get_pauseparam,
+	.set_pauseparam = mlx4_en_set_pauseparam,
+	.get_ringparam = mlx4_en_get_ringparam,
+	.get_flags = ethtool_op_get_flags,
+	.set_flags = ethtool_op_set_flags,
+};
+
+
+
+
+
diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c
new file mode 100644
index 00000000000..c5a4c038975
--- /dev/null
+++ b/drivers/net/mlx4/en_port.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/if_vlan.h>
+
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/cmd.h>
+
+#include "en_port.h"
+#include "mlx4_en.h"
+
+
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
+			u64 mac, u64 clear, u8 mode)
+{
+	return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
+			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B);
+}
+
+int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, struct vlan_group *grp)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_vlan_fltr_mbox *filter;
+	int i;
+	int j;
+	int index = 0;
+	u32 entry;
+	int err = 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	filter = mailbox->buf;
+	if (grp) {
+		memset(filter, 0, sizeof *filter);
+		for (i = VLAN_FLTR_SIZE - 1; i >= 0; i--) {
+			entry = 0;
+			for (j = 0; j < 32; j++)
+				if (vlan_group_get_device(grp, index++))
+					entry |= 1 << j;
+			filter->entry[i] = cpu_to_be32(entry);
+		}
+	} else {
+		/* When no vlans are configured we block all vlans */
+		memset(filter, 0, sizeof(*filter));
+	}
+	err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_VLAN_FLTR,
+		       MLX4_CMD_TIME_CLASS_B);
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+
+int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_general_context *context;
+	int err;
+	u32 in_mod;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	context = mailbox->buf;
+	memset(context, 0, sizeof *context);
+
+	context->flags = SET_PORT_GEN_ALL_VALID;
+	context->mtu = cpu_to_be16(mtu);
+	context->pptx = (pptx * (!pfctx)) << 7;
+	context->pfctx = pfctx;
+	context->pprx = (pprx * (!pfcrx)) << 7;
+	context->pfcrx = pfcrx;
+
+	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+			   u8 promisc)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_rqp_calc_context *context;
+	int err;
+	u32 in_mod;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	context = mailbox->buf;
+	memset(context, 0, sizeof *context);
+
+	context->base_qpn = cpu_to_be32(base_qpn);
+	context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT | base_qpn);
+	context->mcast = cpu_to_be32(1 << SET_PORT_PROMISC_SHIFT | base_qpn);
+	context->intra_no_vlan = 0;
+	context->no_vlan = MLX4_NO_VLAN_IDX;
+	context->intra_vlan_miss = 0;
+	context->vlan_miss = MLX4_VLAN_MISS_IDX;
+
+	in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+
+int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
+{
+	struct mlx4_en_stat_out_mbox *mlx4_en_stats;
+	struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]);
+	struct net_device_stats *stats = &priv->stats;
+	struct mlx4_cmd_mailbox *mailbox;
+	u64 in_mod = reset << 8 | port;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	memset(mailbox->buf, 0, sizeof(*mlx4_en_stats));
+	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
+			   MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B);
+	if (err)
+		goto out;
+
+	mlx4_en_stats = mailbox->buf;
+
+	spin_lock_bh(&priv->stats_lock);
+
+	stats->rx_packets = be32_to_cpu(mlx4_en_stats->RTOTFRMS) -
+			    be32_to_cpu(mlx4_en_stats->RDROP);
+	stats->tx_packets = be64_to_cpu(mlx4_en_stats->TTOT_prio_0) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_prio_1) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_prio_2) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_prio_3) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_prio_4) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_prio_5) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_prio_6) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_prio_7) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_novlan) +
+			    be64_to_cpu(mlx4_en_stats->TTOT_loopbk);
+	stats->rx_bytes = be64_to_cpu(mlx4_en_stats->ROCT_prio_0) +
+			  be64_to_cpu(mlx4_en_stats->ROCT_prio_1) +
+			  be64_to_cpu(mlx4_en_stats->ROCT_prio_2) +
+			  be64_to_cpu(mlx4_en_stats->ROCT_prio_3) +
+			  be64_to_cpu(mlx4_en_stats->ROCT_prio_4) +
+			  be64_to_cpu(mlx4_en_stats->ROCT_prio_5) +
+			  be64_to_cpu(mlx4_en_stats->ROCT_prio_6) +
+			  be64_to_cpu(mlx4_en_stats->ROCT_prio_7) +
+			  be64_to_cpu(mlx4_en_stats->ROCT_novlan);
+
+	stats->tx_bytes = be64_to_cpu(mlx4_en_stats->TTTLOCT_prio_0) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_prio_1) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_prio_2) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_prio_3) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_prio_4) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_prio_5) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_prio_6) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_prio_7) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_novlan) +
+			  be64_to_cpu(mlx4_en_stats->TTTLOCT_loopbk);
+
+	stats->rx_errors = be64_to_cpu(mlx4_en_stats->PCS) +
+			   be32_to_cpu(mlx4_en_stats->RdropLength) +
+			   be32_to_cpu(mlx4_en_stats->RJBBR) +
+			   be32_to_cpu(mlx4_en_stats->RCRC) +
+			   be32_to_cpu(mlx4_en_stats->RRUNT);
+	stats->tx_errors = be32_to_cpu(mlx4_en_stats->TDROP);
+	stats->multicast = be64_to_cpu(mlx4_en_stats->MCAST_prio_0) +
+			   be64_to_cpu(mlx4_en_stats->MCAST_prio_1) +
+			   be64_to_cpu(mlx4_en_stats->MCAST_prio_2) +
+			   be64_to_cpu(mlx4_en_stats->MCAST_prio_3) +
+			   be64_to_cpu(mlx4_en_stats->MCAST_prio_4) +
+			   be64_to_cpu(mlx4_en_stats->MCAST_prio_5) +
+			   be64_to_cpu(mlx4_en_stats->MCAST_prio_6) +
+			   be64_to_cpu(mlx4_en_stats->MCAST_prio_7) +
+			   be64_to_cpu(mlx4_en_stats->MCAST_novlan);
+	stats->collisions = 0;
+	stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
+	stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+	stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
+	stats->rx_frame_errors = 0;
+	stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+	stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+	stats->tx_aborted_errors = 0;
+	stats->tx_carrier_errors = 0;
+	stats->tx_fifo_errors = 0;
+	stats->tx_heartbeat_errors = 0;
+	stats->tx_window_errors = 0;
+
+	priv->pkstats.broadcast =
+				be64_to_cpu(mlx4_en_stats->RBCAST_prio_0) +
+				be64_to_cpu(mlx4_en_stats->RBCAST_prio_1) +
+				be64_to_cpu(mlx4_en_stats->RBCAST_prio_2) +
+				be64_to_cpu(mlx4_en_stats->RBCAST_prio_3) +
+				be64_to_cpu(mlx4_en_stats->RBCAST_prio_4) +
+				be64_to_cpu(mlx4_en_stats->RBCAST_prio_5) +
+				be64_to_cpu(mlx4_en_stats->RBCAST_prio_6) +
+				be64_to_cpu(mlx4_en_stats->RBCAST_prio_7) +
+				be64_to_cpu(mlx4_en_stats->RBCAST_novlan);
+	priv->pkstats.rx_prio[0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_0);
+	priv->pkstats.rx_prio[1] = be64_to_cpu(mlx4_en_stats->RTOT_prio_1);
+	priv->pkstats.rx_prio[2] = be64_to_cpu(mlx4_en_stats->RTOT_prio_2);
+	priv->pkstats.rx_prio[3] = be64_to_cpu(mlx4_en_stats->RTOT_prio_3);
+	priv->pkstats.rx_prio[4] = be64_to_cpu(mlx4_en_stats->RTOT_prio_4);
+	priv->pkstats.rx_prio[5] = be64_to_cpu(mlx4_en_stats->RTOT_prio_5);
+	priv->pkstats.rx_prio[6] = be64_to_cpu(mlx4_en_stats->RTOT_prio_6);
+	priv->pkstats.rx_prio[7] = be64_to_cpu(mlx4_en_stats->RTOT_prio_7);
+	priv->pkstats.tx_prio[0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_0);
+	priv->pkstats.tx_prio[1] = be64_to_cpu(mlx4_en_stats->TTOT_prio_1);
+	priv->pkstats.tx_prio[2] = be64_to_cpu(mlx4_en_stats->TTOT_prio_2);
+	priv->pkstats.tx_prio[3] = be64_to_cpu(mlx4_en_stats->TTOT_prio_3);
+	priv->pkstats.tx_prio[4] = be64_to_cpu(mlx4_en_stats->TTOT_prio_4);
+	priv->pkstats.tx_prio[5] = be64_to_cpu(mlx4_en_stats->TTOT_prio_5);
+	priv->pkstats.tx_prio[6] = be64_to_cpu(mlx4_en_stats->TTOT_prio_6);
+	priv->pkstats.tx_prio[7] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7);
+	spin_unlock_bh(&priv->stats_lock);
+
+out:
+	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+	return err;
+}
+
diff --git a/drivers/net/mlx4/en_port.h b/drivers/net/mlx4/en_port.h
new file mode 100644
index 00000000000..e6477f12beb
--- /dev/null
+++ b/drivers/net/mlx4/en_port.h
@@ -0,0 +1,570 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _MLX4_EN_PORT_H_
+#define _MLX4_EN_PORT_H_
+
+
+#define SET_PORT_GEN_ALL_VALID	0x7
+#define SET_PORT_PROMISC_SHIFT	31
+
+enum {
+	MLX4_CMD_SET_VLAN_FLTR  = 0x47,
+	MLX4_CMD_SET_MCAST_FLTR = 0x48,
+	MLX4_CMD_DUMP_ETH_STATS = 0x49,
+};
+
+struct mlx4_set_port_general_context {
+	u8 reserved[3];
+	u8 flags;
+	u16 reserved2;
+	__be16 mtu;
+	u8 pptx;
+	u8 pfctx;
+	u16 reserved3;
+	u8 pprx;
+	u8 pfcrx;
+	u16 reserved4;
+};
+
+struct mlx4_set_port_rqp_calc_context {
+	__be32 base_qpn;
+	__be32 flags;
+	u8 reserved[3];
+	u8 mac_miss;
+	u8 intra_no_vlan;
+	u8 no_vlan;
+	u8 intra_vlan_miss;
+	u8 vlan_miss;
+	u8 reserved2[3];
+	u8 no_vlan_prio;
+	__be32 promisc;
+	__be32 mcast;
+};
+
+#define VLAN_FLTR_SIZE	128
+struct mlx4_set_vlan_fltr_mbox {
+	__be32 entry[VLAN_FLTR_SIZE];
+};
+
+
+enum {
+	MLX4_MCAST_CONFIG       = 0,
+	MLX4_MCAST_DISABLE      = 1,
+	MLX4_MCAST_ENABLE       = 2,
+};
+
+
+struct mlx4_en_stat_out_mbox {
+	/* Received frames with a length of 64 octets */
+	__be64 R64_prio_0;
+	__be64 R64_prio_1;
+	__be64 R64_prio_2;
+	__be64 R64_prio_3;
+	__be64 R64_prio_4;
+	__be64 R64_prio_5;
+	__be64 R64_prio_6;
+	__be64 R64_prio_7;
+	__be64 R64_novlan;
+	/* Received frames with a length of 127 octets */
+	__be64 R127_prio_0;
+	__be64 R127_prio_1;
+	__be64 R127_prio_2;
+	__be64 R127_prio_3;
+	__be64 R127_prio_4;
+	__be64 R127_prio_5;
+	__be64 R127_prio_6;
+	__be64 R127_prio_7;
+	__be64 R127_novlan;
+	/* Received frames with a length of 255 octets */
+	__be64 R255_prio_0;
+	__be64 R255_prio_1;
+	__be64 R255_prio_2;
+	__be64 R255_prio_3;
+	__be64 R255_prio_4;
+	__be64 R255_prio_5;
+	__be64 R255_prio_6;
+	__be64 R255_prio_7;
+	__be64 R255_novlan;
+	/* Received frames with a length of 511 octets */
+	__be64 R511_prio_0;
+	__be64 R511_prio_1;
+	__be64 R511_prio_2;
+	__be64 R511_prio_3;
+	__be64 R511_prio_4;
+	__be64 R511_prio_5;
+	__be64 R511_prio_6;
+	__be64 R511_prio_7;
+	__be64 R511_novlan;
+	/* Received frames with a length of 1023 octets */
+	__be64 R1023_prio_0;
+	__be64 R1023_prio_1;
+	__be64 R1023_prio_2;
+	__be64 R1023_prio_3;
+	__be64 R1023_prio_4;
+	__be64 R1023_prio_5;
+	__be64 R1023_prio_6;
+	__be64 R1023_prio_7;
+	__be64 R1023_novlan;
+	/* Received frames with a length of 1518 octets */
+	__be64 R1518_prio_0;
+	__be64 R1518_prio_1;
+	__be64 R1518_prio_2;
+	__be64 R1518_prio_3;
+	__be64 R1518_prio_4;
+	__be64 R1518_prio_5;
+	__be64 R1518_prio_6;
+	__be64 R1518_prio_7;
+	__be64 R1518_novlan;
+	/* Received frames with a length of 1522 octets */
+	__be64 R1522_prio_0;
+	__be64 R1522_prio_1;
+	__be64 R1522_prio_2;
+	__be64 R1522_prio_3;
+	__be64 R1522_prio_4;
+	__be64 R1522_prio_5;
+	__be64 R1522_prio_6;
+	__be64 R1522_prio_7;
+	__be64 R1522_novlan;
+	/* Received frames with a length of 1548 octets */
+	__be64 R1548_prio_0;
+	__be64 R1548_prio_1;
+	__be64 R1548_prio_2;
+	__be64 R1548_prio_3;
+	__be64 R1548_prio_4;
+	__be64 R1548_prio_5;
+	__be64 R1548_prio_6;
+	__be64 R1548_prio_7;
+	__be64 R1548_novlan;
+	/* Received frames with a length of 1548 < octets < MTU */
+	__be64 R2MTU_prio_0;
+	__be64 R2MTU_prio_1;
+	__be64 R2MTU_prio_2;
+	__be64 R2MTU_prio_3;
+	__be64 R2MTU_prio_4;
+	__be64 R2MTU_prio_5;
+	__be64 R2MTU_prio_6;
+	__be64 R2MTU_prio_7;
+	__be64 R2MTU_novlan;
+	/* Received frames with a length of MTU< octets and good CRC */
+	__be64 RGIANT_prio_0;
+	__be64 RGIANT_prio_1;
+	__be64 RGIANT_prio_2;
+	__be64 RGIANT_prio_3;
+	__be64 RGIANT_prio_4;
+	__be64 RGIANT_prio_5;
+	__be64 RGIANT_prio_6;
+	__be64 RGIANT_prio_7;
+	__be64 RGIANT_novlan;
+	/* Received broadcast frames with good CRC */
+	__be64 RBCAST_prio_0;
+	__be64 RBCAST_prio_1;
+	__be64 RBCAST_prio_2;
+	__be64 RBCAST_prio_3;
+	__be64 RBCAST_prio_4;
+	__be64 RBCAST_prio_5;
+	__be64 RBCAST_prio_6;
+	__be64 RBCAST_prio_7;
+	__be64 RBCAST_novlan;
+	/* Received multicast frames with good CRC */
+	__be64 MCAST_prio_0;
+	__be64 MCAST_prio_1;
+	__be64 MCAST_prio_2;
+	__be64 MCAST_prio_3;
+	__be64 MCAST_prio_4;
+	__be64 MCAST_prio_5;
+	__be64 MCAST_prio_6;
+	__be64 MCAST_prio_7;
+	__be64 MCAST_novlan;
+	/* Received unicast not short or GIANT frames with good CRC */
+	__be64 RTOTG_prio_0;
+	__be64 RTOTG_prio_1;
+	__be64 RTOTG_prio_2;
+	__be64 RTOTG_prio_3;
+	__be64 RTOTG_prio_4;
+	__be64 RTOTG_prio_5;
+	__be64 RTOTG_prio_6;
+	__be64 RTOTG_prio_7;
+	__be64 RTOTG_novlan;
+
+	/* Count of total octets of received frames, includes framing characters */
+	__be64 RTTLOCT_prio_0;
+	/* Count of total octets of received frames, not including framing
+	   characters */
+	__be64 RTTLOCT_NOFRM_prio_0;
+	/* Count of Total number of octets received
+	   (only for frames without errors) */
+	__be64 ROCT_prio_0;
+
+	__be64 RTTLOCT_prio_1;
+	__be64 RTTLOCT_NOFRM_prio_1;
+	__be64 ROCT_prio_1;
+
+	__be64 RTTLOCT_prio_2;
+	__be64 RTTLOCT_NOFRM_prio_2;
+	__be64 ROCT_prio_2;
+
+	__be64 RTTLOCT_prio_3;
+	__be64 RTTLOCT_NOFRM_prio_3;
+	__be64 ROCT_prio_3;
+
+	__be64 RTTLOCT_prio_4;
+	__be64 RTTLOCT_NOFRM_prio_4;
+	__be64 ROCT_prio_4;
+
+	__be64 RTTLOCT_prio_5;
+	__be64 RTTLOCT_NOFRM_prio_5;
+	__be64 ROCT_prio_5;
+
+	__be64 RTTLOCT_prio_6;
+	__be64 RTTLOCT_NOFRM_prio_6;
+	__be64 ROCT_prio_6;
+
+	__be64 RTTLOCT_prio_7;
+	__be64 RTTLOCT_NOFRM_prio_7;
+	__be64 ROCT_prio_7;
+
+	__be64 RTTLOCT_novlan;
+	__be64 RTTLOCT_NOFRM_novlan;
+	__be64 ROCT_novlan;
+
+	/* Count of Total received frames including bad frames */
+	__be64 RTOT_prio_0;
+	/* Count of  Total number of received frames with 802.1Q encapsulation */
+	__be64 R1Q_prio_0;
+	__be64 reserved1;
+
+	__be64 RTOT_prio_1;
+	__be64 R1Q_prio_1;
+	__be64 reserved2;
+
+	__be64 RTOT_prio_2;
+	__be64 R1Q_prio_2;
+	__be64 reserved3;
+
+	__be64 RTOT_prio_3;
+	__be64 R1Q_prio_3;
+	__be64 reserved4;
+
+	__be64 RTOT_prio_4;
+	__be64 R1Q_prio_4;
+	__be64 reserved5;
+
+	__be64 RTOT_prio_5;
+	__be64 R1Q_prio_5;
+	__be64 reserved6;
+
+	__be64 RTOT_prio_6;
+	__be64 R1Q_prio_6;
+	__be64 reserved7;
+
+	__be64 RTOT_prio_7;
+	__be64 R1Q_prio_7;
+	__be64 reserved8;
+
+	__be64 RTOT_novlan;
+	__be64 R1Q_novlan;
+	__be64 reserved9;
+
+	/* Total number of Successfully Received Control Frames */
+	__be64 RCNTL;
+	__be64 reserved10;
+	__be64 reserved11;
+	__be64 reserved12;
+	/* Count of received frames with a length/type field  value between 46
+	   (42 for VLANtagged frames) and 1500 (also 1500 for VLAN-tagged frames),
+	   inclusive */
+	__be64 RInRangeLengthErr;
+	/* Count of received frames with length/type field between 1501 and 1535
+	   decimal, inclusive */
+	__be64 ROutRangeLengthErr;
+	/* Count of received frames that are longer than max allowed size for
+	   802.3 frames (1518/1522) */
+	__be64 RFrmTooLong;
+	/* Count frames received with PCS error */
+	__be64 PCS;
+
+	/* Transmit frames with a length of 64 octets */
+	__be64 T64_prio_0;
+	__be64 T64_prio_1;
+	__be64 T64_prio_2;
+	__be64 T64_prio_3;
+	__be64 T64_prio_4;
+	__be64 T64_prio_5;
+	__be64 T64_prio_6;
+	__be64 T64_prio_7;
+	__be64 T64_novlan;
+	__be64 T64_loopbk;
+	/* Transmit frames with a length of 65 to 127 octets. */
+	__be64 T127_prio_0;
+	__be64 T127_prio_1;
+	__be64 T127_prio_2;
+	__be64 T127_prio_3;
+	__be64 T127_prio_4;
+	__be64 T127_prio_5;
+	__be64 T127_prio_6;
+	__be64 T127_prio_7;
+	__be64 T127_novlan;
+	__be64 T127_loopbk;
+	/* Transmit frames with a length of 128 to 255 octets */
+	__be64 T255_prio_0;
+	__be64 T255_prio_1;
+	__be64 T255_prio_2;
+	__be64 T255_prio_3;
+	__be64 T255_prio_4;
+	__be64 T255_prio_5;
+	__be64 T255_prio_6;
+	__be64 T255_prio_7;
+	__be64 T255_novlan;
+	__be64 T255_loopbk;
+	/* Transmit frames with a length of 256 to 511 octets */
+	__be64 T511_prio_0;
+	__be64 T511_prio_1;
+	__be64 T511_prio_2;
+	__be64 T511_prio_3;
+	__be64 T511_prio_4;
+	__be64 T511_prio_5;
+	__be64 T511_prio_6;
+	__be64 T511_prio_7;
+	__be64 T511_novlan;
+	__be64 T511_loopbk;
+	/* Transmit frames with a length of 512 to 1023 octets */
+	__be64 T1023_prio_0;
+	__be64 T1023_prio_1;
+	__be64 T1023_prio_2;
+	__be64 T1023_prio_3;
+	__be64 T1023_prio_4;
+	__be64 T1023_prio_5;
+	__be64 T1023_prio_6;
+	__be64 T1023_prio_7;
+	__be64 T1023_novlan;
+	__be64 T1023_loopbk;
+	/* Transmit frames with a length of 1024 to 1518 octets */
+	__be64 T1518_prio_0;
+	__be64 T1518_prio_1;
+	__be64 T1518_prio_2;
+	__be64 T1518_prio_3;
+	__be64 T1518_prio_4;
+	__be64 T1518_prio_5;
+	__be64 T1518_prio_6;
+	__be64 T1518_prio_7;
+	__be64 T1518_novlan;
+	__be64 T1518_loopbk;
+	/* Counts transmit frames with a length of 1519 to 1522 bytes */
+	__be64 T1522_prio_0;
+	__be64 T1522_prio_1;
+	__be64 T1522_prio_2;
+	__be64 T1522_prio_3;
+	__be64 T1522_prio_4;
+	__be64 T1522_prio_5;
+	__be64 T1522_prio_6;
+	__be64 T1522_prio_7;
+	__be64 T1522_novlan;
+	__be64 T1522_loopbk;
+	/* Transmit frames with a length of 1523 to 1548 octets */
+	__be64 T1548_prio_0;
+	__be64 T1548_prio_1;
+	__be64 T1548_prio_2;
+	__be64 T1548_prio_3;
+	__be64 T1548_prio_4;
+	__be64 T1548_prio_5;
+	__be64 T1548_prio_6;
+	__be64 T1548_prio_7;
+	__be64 T1548_novlan;
+	__be64 T1548_loopbk;
+	/* Counts transmit frames with a length of 1549 to MTU bytes */
+	__be64 T2MTU_prio_0;
+	__be64 T2MTU_prio_1;
+	__be64 T2MTU_prio_2;
+	__be64 T2MTU_prio_3;
+	__be64 T2MTU_prio_4;
+	__be64 T2MTU_prio_5;
+	__be64 T2MTU_prio_6;
+	__be64 T2MTU_prio_7;
+	__be64 T2MTU_novlan;
+	__be64 T2MTU_loopbk;
+	/* Transmit frames with a length greater than MTU octets and a good CRC. */
+	__be64 TGIANT_prio_0;
+	__be64 TGIANT_prio_1;
+	__be64 TGIANT_prio_2;
+	__be64 TGIANT_prio_3;
+	__be64 TGIANT_prio_4;
+	__be64 TGIANT_prio_5;
+	__be64 TGIANT_prio_6;
+	__be64 TGIANT_prio_7;
+	__be64 TGIANT_novlan;
+	__be64 TGIANT_loopbk;
+	/* Transmit broadcast frames with a good CRC */
+	__be64 TBCAST_prio_0;
+	__be64 TBCAST_prio_1;
+	__be64 TBCAST_prio_2;
+	__be64 TBCAST_prio_3;
+	__be64 TBCAST_prio_4;
+	__be64 TBCAST_prio_5;
+	__be64 TBCAST_prio_6;
+	__be64 TBCAST_prio_7;
+	__be64 TBCAST_novlan;
+	__be64 TBCAST_loopbk;
+	/* Transmit multicast frames with a good CRC */
+	__be64 TMCAST_prio_0;
+	__be64 TMCAST_prio_1;
+	__be64 TMCAST_prio_2;
+	__be64 TMCAST_prio_3;
+	__be64 TMCAST_prio_4;
+	__be64 TMCAST_prio_5;
+	__be64 TMCAST_prio_6;
+	__be64 TMCAST_prio_7;
+	__be64 TMCAST_novlan;
+	__be64 TMCAST_loopbk;
+	/* Transmit good frames that are neither broadcast nor multicast */
+	__be64 TTOTG_prio_0;
+	__be64 TTOTG_prio_1;
+	__be64 TTOTG_prio_2;
+	__be64 TTOTG_prio_3;
+	__be64 TTOTG_prio_4;
+	__be64 TTOTG_prio_5;
+	__be64 TTOTG_prio_6;
+	__be64 TTOTG_prio_7;
+	__be64 TTOTG_novlan;
+	__be64 TTOTG_loopbk;
+
+	/* total octets of transmitted frames, including framing characters */
+	__be64 TTTLOCT_prio_0;
+	/* total octets of transmitted frames, not including framing characters */
+	__be64 TTTLOCT_NOFRM_prio_0;
+	/* ifOutOctets */
+	__be64 TOCT_prio_0;
+
+	__be64 TTTLOCT_prio_1;
+	__be64 TTTLOCT_NOFRM_prio_1;
+	__be64 TOCT_prio_1;
+
+	__be64 TTTLOCT_prio_2;
+	__be64 TTTLOCT_NOFRM_prio_2;
+	__be64 TOCT_prio_2;
+
+	__be64 TTTLOCT_prio_3;
+	__be64 TTTLOCT_NOFRM_prio_3;
+	__be64 TOCT_prio_3;
+
+	__be64 TTTLOCT_prio_4;
+	__be64 TTTLOCT_NOFRM_prio_4;
+	__be64 TOCT_prio_4;
+
+	__be64 TTTLOCT_prio_5;
+	__be64 TTTLOCT_NOFRM_prio_5;
+	__be64 TOCT_prio_5;
+
+	__be64 TTTLOCT_prio_6;
+	__be64 TTTLOCT_NOFRM_prio_6;
+	__be64 TOCT_prio_6;
+
+	__be64 TTTLOCT_prio_7;
+	__be64 TTTLOCT_NOFRM_prio_7;
+	__be64 TOCT_prio_7;
+
+	__be64 TTTLOCT_novlan;
+	__be64 TTTLOCT_NOFRM_novlan;
+	__be64 TOCT_novlan;
+
+	__be64 TTTLOCT_loopbk;
+	__be64 TTTLOCT_NOFRM_loopbk;
+	__be64 TOCT_loopbk;
+
+	/* Total frames transmitted with a good CRC that are not aborted  */
+	__be64 TTOT_prio_0;
+	/* Total number of frames transmitted with 802.1Q encapsulation */
+	__be64 T1Q_prio_0;
+	__be64 reserved13;
+
+	__be64 TTOT_prio_1;
+	__be64 T1Q_prio_1;
+	__be64 reserved14;
+
+	__be64 TTOT_prio_2;
+	__be64 T1Q_prio_2;
+	__be64 reserved15;
+
+	__be64 TTOT_prio_3;
+	__be64 T1Q_prio_3;
+	__be64 reserved16;
+
+	__be64 TTOT_prio_4;
+	__be64 T1Q_prio_4;
+	__be64 reserved17;
+
+	__be64 TTOT_prio_5;
+	__be64 T1Q_prio_5;
+	__be64 reserved18;
+
+	__be64 TTOT_prio_6;
+	__be64 T1Q_prio_6;
+	__be64 reserved19;
+
+	__be64 TTOT_prio_7;
+	__be64 T1Q_prio_7;
+	__be64 reserved20;
+
+	__be64 TTOT_novlan;
+	__be64 T1Q_novlan;
+	__be64 reserved21;
+
+	__be64 TTOT_loopbk;
+	__be64 T1Q_loopbk;
+	__be64 reserved22;
+
+	/* Received frames with a length greater than MTU octets and a bad CRC */
+	__be32 RJBBR;
+	/* Received frames with a bad CRC that are not runts, jabbers,
+	   or alignment errors */
+	__be32 RCRC;
+	/* Received frames with SFD with a length of less than 64 octets and a
+	   bad CRC */
+	__be32 RRUNT;
+	/* Received frames with a length less than 64 octets and a good CRC */
+	__be32 RSHORT;
+	/* Total Number of Received Packets Dropped */
+	__be32 RDROP;
+	/* Drop due to overflow  */
+	__be32 RdropOvflw;
+	/* Drop due to overflow */
+	__be32 RdropLength;
+	/* Total of good frames. Does not include frames received with
+	   frame-too-long, FCS, or length errors */
+	__be32 RTOTFRMS;
+	/* Total dropped Xmited packets */
+	__be32 TDROP;
+};
+
+
+#endif
diff --git a/drivers/net/mlx4/en_resources.c b/drivers/net/mlx4/en_resources.c
new file mode 100644
index 00000000000..a0545209e50
--- /dev/null
+++ b/drivers/net/mlx4/en_resources.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/mlx4/qp.h>
+
+#include "mlx4_en.h"
+
+void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
+			     int is_tx, int rss, int qpn, int cqn, int srqn,
+			     struct mlx4_qp_context *context)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	memset(context, 0, sizeof *context);
+	context->flags = cpu_to_be32(7 << 16 | rss << 13);
+	context->pd = cpu_to_be32(mdev->priv_pdn);
+	context->mtu_msgmax = 0xff;
+	context->rq_size_stride = 0;
+	if (is_tx)
+		context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
+	else
+		context->sq_size_stride = 1;
+	context->usr_page = cpu_to_be32(mdev->priv_uar.index);
+	context->local_qpn = cpu_to_be32(qpn);
+	context->pri_path.ackto = 1 & 0x07;
+	context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
+	context->pri_path.counter_index = 0xff;
+	context->cqn_send = cpu_to_be32(cqn);
+	context->cqn_recv = cpu_to_be32(cqn);
+	context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2);
+	if (!rss)
+		context->srqn = cpu_to_be32(MLX4_EN_USE_SRQ | srqn);
+}
+
+
+int mlx4_en_map_buffer(struct mlx4_buf *buf)
+{
+	struct page **pages;
+	int i;
+
+	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
+		return 0;
+
+	pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < buf->nbufs; ++i)
+		pages[i] = virt_to_page(buf->page_list[i].buf);
+
+	buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
+	kfree(pages);
+	if (!buf->direct.buf)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx4_en_unmap_buffer(struct mlx4_buf *buf)
+{
+	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
+		return;
+
+	vunmap(buf->direct.buf);
+}
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
new file mode 100644
index 00000000000..6232227f56c
--- /dev/null
+++ b/drivers/net/mlx4/en_rx.c
@@ -0,0 +1,1080 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/qp.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
+
+#include "mlx4_en.h"
+
+static void *get_wqe(struct mlx4_en_rx_ring *ring, int n)
+{
+	int offset = n << ring->srq.wqe_shift;
+	return ring->buf + offset;
+}
+
+static void mlx4_en_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
+{
+	return;
+}
+
+static int mlx4_en_get_frag_header(struct skb_frag_struct *frags, void **mac_hdr,
+				   void **ip_hdr, void **tcpudp_hdr,
+				   u64 *hdr_flags, void *priv)
+{
+	*mac_hdr = page_address(frags->page) + frags->page_offset;
+	*ip_hdr = *mac_hdr + ETH_HLEN;
+	*tcpudp_hdr = (struct tcphdr *)(*ip_hdr + sizeof(struct iphdr));
+	*hdr_flags = LRO_IPV4 | LRO_TCP;
+
+	return 0;
+}
+
+static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
+			      struct mlx4_en_rx_desc *rx_desc,
+			      struct skb_frag_struct *skb_frags,
+			      struct mlx4_en_rx_alloc *ring_alloc,
+			      int i)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
+	struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
+	struct page *page;
+	dma_addr_t dma;
+
+	if (page_alloc->offset == frag_info->last_offset) {
+		/* Allocate new page */
+		page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER);
+		if (!page)
+			return -ENOMEM;
+
+		skb_frags[i].page = page_alloc->page;
+		skb_frags[i].page_offset = page_alloc->offset;
+		page_alloc->page = page;
+		page_alloc->offset = frag_info->frag_align;
+	} else {
+		page = page_alloc->page;
+		get_page(page);
+
+		skb_frags[i].page = page;
+		skb_frags[i].page_offset = page_alloc->offset;
+		page_alloc->offset += frag_info->frag_stride;
+	}
+	dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) +
+			     skb_frags[i].page_offset, frag_info->frag_size,
+			     PCI_DMA_FROMDEVICE);
+	rx_desc->data[i].addr = cpu_to_be64(dma);
+	return 0;
+}
+
+static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
+				  struct mlx4_en_rx_ring *ring)
+{
+	struct mlx4_en_rx_alloc *page_alloc;
+	int i;
+
+	for (i = 0; i < priv->num_frags; i++) {
+		page_alloc = &ring->page_alloc[i];
+		page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
+					       MLX4_EN_ALLOC_ORDER);
+		if (!page_alloc->page)
+			goto out;
+
+		page_alloc->offset = priv->frag_info[i].frag_align;
+		mlx4_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
+			 i, page_alloc->page);
+	}
+	return 0;
+
+out:
+	while (i--) {
+		page_alloc = &ring->page_alloc[i];
+		put_page(page_alloc->page);
+		page_alloc->page = NULL;
+	}
+	return -ENOMEM;
+}
+
+static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
+				      struct mlx4_en_rx_ring *ring)
+{
+	struct mlx4_en_rx_alloc *page_alloc;
+	int i;
+
+	for (i = 0; i < priv->num_frags; i++) {
+		page_alloc = &ring->page_alloc[i];
+		mlx4_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
+			 i, page_count(page_alloc->page));
+
+		put_page(page_alloc->page);
+		page_alloc->page = NULL;
+	}
+}
+
+
+static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
+				 struct mlx4_en_rx_ring *ring, int index)
+{
+	struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
+	struct skb_frag_struct *skb_frags = ring->rx_info +
+					    (index << priv->log_rx_info);
+	int possible_frags;
+	int i;
+
+	/* Pre-link descriptor */
+	rx_desc->next.next_wqe_index = cpu_to_be16((index + 1) & ring->size_mask);
+
+	/* Set size and memtype fields */
+	for (i = 0; i < priv->num_frags; i++) {
+		skb_frags[i].size = priv->frag_info[i].frag_size;
+		rx_desc->data[i].byte_count =
+			cpu_to_be32(priv->frag_info[i].frag_size);
+		rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
+	}
+
+	/* If the number of used fragments does not fill up the ring stride,
+	 * remaining (unused) fragments must be padded with null address/size
+	 * and a special memory key */
+	possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
+	for (i = priv->num_frags; i < possible_frags; i++) {
+		rx_desc->data[i].byte_count = 0;
+		rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
+		rx_desc->data[i].addr = 0;
+	}
+}
+
+
+static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
+				   struct mlx4_en_rx_ring *ring, int index)
+{
+	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
+	struct skb_frag_struct *skb_frags = ring->rx_info +
+					    (index << priv->log_rx_info);
+	int i;
+
+	for (i = 0; i < priv->num_frags; i++)
+		if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i))
+			goto err;
+
+	return 0;
+
+err:
+	while (i--)
+		put_page(skb_frags[i].page);
+	return -ENOMEM;
+}
+
+static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
+{
+	*ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
+}
+
+static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_rx_ring *ring;
+	int ring_ind;
+	int buf_ind;
+
+	for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
+		for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
+			ring = &priv->rx_ring[ring_ind];
+
+			if (mlx4_en_prepare_rx_desc(priv, ring,
+						    ring->actual_size)) {
+				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
+					mlx4_err(mdev, "Failed to allocate "
+						       "enough rx buffers\n");
+					return -ENOMEM;
+				} else {
+					if (netif_msg_rx_err(priv))
+						mlx4_warn(mdev,
+							  "Only %d buffers allocated\n",
+							  ring->actual_size);
+					goto out;
+				}
+			}
+			ring->actual_size++;
+			ring->prod++;
+		}
+	}
+out:
+	return 0;
+}
+
+static int mlx4_en_fill_rx_buf(struct net_device *dev,
+			       struct mlx4_en_rx_ring *ring)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int num = 0;
+	int err;
+
+	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
+		err = mlx4_en_prepare_rx_desc(priv, ring, ring->prod &
+					      ring->size_mask);
+		if (err) {
+			if (netif_msg_rx_err(priv))
+				mlx4_warn(priv->mdev,
+					  "Failed preparing rx descriptor\n");
+			priv->port_stats.rx_alloc_failed++;
+			break;
+		}
+		++num;
+		++ring->prod;
+	}
+	if ((u32) (ring->prod - ring->cons) == ring->size)
+		ring->full = 1;
+
+	return num;
+}
+
+static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
+				struct mlx4_en_rx_ring *ring)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct skb_frag_struct *skb_frags;
+	struct mlx4_en_rx_desc *rx_desc;
+	dma_addr_t dma;
+	int index;
+	int nr;
+
+	mlx4_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
+			ring->cons, ring->prod);
+
+	/* Unmap and free Rx buffers */
+	BUG_ON((u32) (ring->prod - ring->cons) > ring->size);
+	while (ring->cons != ring->prod) {
+		index = ring->cons & ring->size_mask;
+		rx_desc = ring->buf + (index << ring->log_stride);
+		skb_frags = ring->rx_info + (index << priv->log_rx_info);
+		mlx4_dbg(DRV, priv, "Processing descriptor:%d\n", index);
+
+		for (nr = 0; nr < priv->num_frags; nr++) {
+			mlx4_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
+			dma = be64_to_cpu(rx_desc->data[nr].addr);
+
+			mlx4_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma);
+			pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
+					 PCI_DMA_FROMDEVICE);
+			put_page(skb_frags[nr].page);
+		}
+		++ring->cons;
+	}
+}
+
+
+void mlx4_en_rx_refill(struct work_struct *work)
+{
+	struct delayed_work *delay = container_of(work, struct delayed_work, work);
+	struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
+						 refill_task);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct net_device *dev = priv->dev;
+	struct mlx4_en_rx_ring *ring;
+	int need_refill = 0;
+	int i;
+
+	mutex_lock(&mdev->state_lock);
+	if (!mdev->device_up || !priv->port_up)
+		goto out;
+
+	/* We only get here if there are no receive buffers, so we can't race
+	 * with Rx interrupts while filling buffers */
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		ring = &priv->rx_ring[i];
+		if (ring->need_refill) {
+			if (mlx4_en_fill_rx_buf(dev, ring)) {
+				ring->need_refill = 0;
+				mlx4_en_update_rx_prod_db(ring);
+			} else
+				need_refill = 1;
+		}
+	}
+	if (need_refill)
+		queue_delayed_work(mdev->workqueue, &priv->refill_task, HZ);
+
+out:
+	mutex_unlock(&mdev->state_lock);
+}
+
+
+int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
+			   struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+	int tmp;
+
+	/* Sanity check SRQ size before proceeding */
+	if (size >= mdev->dev->caps.max_srq_wqes)
+		return -EINVAL;
+
+	ring->prod = 0;
+	ring->cons = 0;
+	ring->size = size;
+	ring->size_mask = size - 1;
+	ring->stride = stride;
+	ring->log_stride = ffs(ring->stride) - 1;
+	ring->buf_size = ring->size * ring->stride;
+
+	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
+					sizeof(struct skb_frag_struct));
+	ring->rx_info = vmalloc(tmp);
+	if (!ring->rx_info) {
+		mlx4_err(mdev, "Failed allocating rx_info ring\n");
+		return -ENOMEM;
+	}
+	mlx4_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
+		 ring->rx_info, tmp);
+
+	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
+				 ring->buf_size, 2 * PAGE_SIZE);
+	if (err)
+		goto err_ring;
+
+	err = mlx4_en_map_buffer(&ring->wqres.buf);
+	if (err) {
+		mlx4_err(mdev, "Failed to map RX buffer\n");
+		goto err_hwq;
+	}
+	ring->buf = ring->wqres.buf.direct.buf;
+
+	/* Configure lro mngr */
+	memset(&ring->lro, 0, sizeof(struct net_lro_mgr));
+	ring->lro.dev = priv->dev;
+	ring->lro.features = LRO_F_NAPI;
+	ring->lro.frag_align_pad = NET_IP_ALIGN;
+	ring->lro.ip_summed = CHECKSUM_UNNECESSARY;
+	ring->lro.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+	ring->lro.max_desc = mdev->profile.num_lro;
+	ring->lro.max_aggr = MAX_SKB_FRAGS;
+	ring->lro.lro_arr = kzalloc(mdev->profile.num_lro *
+				    sizeof(struct net_lro_desc),
+				    GFP_KERNEL);
+	if (!ring->lro.lro_arr) {
+		mlx4_err(mdev, "Failed to allocate lro array\n");
+		goto err_map;
+	}
+	ring->lro.get_frag_header = mlx4_en_get_frag_header;
+
+	return 0;
+
+err_map:
+	mlx4_en_unmap_buffer(&ring->wqres.buf);
+err_hwq:
+	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+err_ring:
+	vfree(ring->rx_info);
+	ring->rx_info = NULL;
+	return err;
+}
+
+int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_wqe_srq_next_seg *next;
+	struct mlx4_en_rx_ring *ring;
+	int i;
+	int ring_ind;
+	int err;
+	int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+					DS_SIZE * priv->num_frags);
+	int max_gs = (stride - sizeof(struct mlx4_wqe_srq_next_seg)) / DS_SIZE;
+
+	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
+		ring = &priv->rx_ring[ring_ind];
+
+		ring->prod = 0;
+		ring->cons = 0;
+		ring->actual_size = 0;
+		ring->cqn = priv->rx_cq[ring_ind].mcq.cqn;
+
+		ring->stride = stride;
+		ring->log_stride = ffs(ring->stride) - 1;
+		ring->buf_size = ring->size * ring->stride;
+
+		memset(ring->buf, 0, ring->buf_size);
+		mlx4_en_update_rx_prod_db(ring);
+
+		/* Initailize all descriptors */
+		for (i = 0; i < ring->size; i++)
+			mlx4_en_init_rx_desc(priv, ring, i);
+
+		/* Initialize page allocators */
+		err = mlx4_en_init_allocator(priv, ring);
+		if (err) {
+			 mlx4_err(mdev, "Failed initializing ring allocator\n");
+			 goto err_allocator;
+		}
+
+		/* Fill Rx buffers */
+		ring->full = 0;
+	}
+	if (mlx4_en_fill_rx_buffers(priv))
+		goto err_buffers;
+
+	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
+		ring = &priv->rx_ring[ring_ind];
+
+		mlx4_en_update_rx_prod_db(ring);
+
+		/* Configure SRQ representing the ring */
+		ring->srq.max    = ring->size;
+		ring->srq.max_gs = max_gs;
+		ring->srq.wqe_shift = ilog2(ring->stride);
+
+		for (i = 0; i < ring->srq.max; ++i) {
+			next = get_wqe(ring, i);
+			next->next_wqe_index =
+			cpu_to_be16((i + 1) & (ring->srq.max - 1));
+		}
+
+		err = mlx4_srq_alloc(mdev->dev, mdev->priv_pdn, &ring->wqres.mtt,
+				     ring->wqres.db.dma, &ring->srq);
+		if (err){
+			mlx4_err(mdev, "Failed to allocate srq\n");
+			goto err_srq;
+		}
+		ring->srq.event = mlx4_en_srq_event;
+	}
+
+	return 0;
+
+err_srq:
+	while (ring_ind >= 0) {
+		ring = &priv->rx_ring[ring_ind];
+		mlx4_srq_free(mdev->dev, &ring->srq);
+		ring_ind--;
+	}
+
+err_buffers:
+	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
+		mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]);
+
+	ring_ind = priv->rx_ring_num - 1;
+err_allocator:
+	while (ring_ind >= 0) {
+		mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]);
+		ring_ind--;
+	}
+	return err;
+}
+
+void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
+			     struct mlx4_en_rx_ring *ring)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	kfree(ring->lro.lro_arr);
+	mlx4_en_unmap_buffer(&ring->wqres.buf);
+	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+	vfree(ring->rx_info);
+	ring->rx_info = NULL;
+}
+
+void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
+				struct mlx4_en_rx_ring *ring)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	mlx4_srq_free(mdev->dev, &ring->srq);
+	mlx4_en_free_rx_buf(priv, ring);
+	mlx4_en_destroy_allocator(priv, ring);
+}
+
+
+/* Unmap a completed descriptor and free unused pages */
+static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
+				    struct mlx4_en_rx_desc *rx_desc,
+				    struct skb_frag_struct *skb_frags,
+				    struct skb_frag_struct *skb_frags_rx,
+				    struct mlx4_en_rx_alloc *page_alloc,
+				    int length)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_frag_info *frag_info;
+	int nr;
+	dma_addr_t dma;
+
+	/* Collect used fragments while replacing them in the HW descirptors */
+	for (nr = 0; nr < priv->num_frags; nr++) {
+		frag_info = &priv->frag_info[nr];
+		if (length <= frag_info->frag_prefix_size)
+			break;
+
+		/* Save page reference in skb */
+		skb_frags_rx[nr].page = skb_frags[nr].page;
+		skb_frags_rx[nr].size = skb_frags[nr].size;
+		skb_frags_rx[nr].page_offset = skb_frags[nr].page_offset;
+		dma = be64_to_cpu(rx_desc->data[nr].addr);
+
+		/* Allocate a replacement page */
+		if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr))
+			goto fail;
+
+		/* Unmap buffer */
+		pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
+				 PCI_DMA_FROMDEVICE);
+	}
+	/* Adjust size of last fragment to match actual length */
+	skb_frags_rx[nr - 1].size = length -
+		priv->frag_info[nr - 1].frag_prefix_size;
+	return nr;
+
+fail:
+	/* Drop all accumulated fragments (which have already been replaced in
+	 * the descriptor) of this packet; remaining fragments are reused... */
+	while (nr > 0) {
+		nr--;
+		put_page(skb_frags_rx[nr].page);
+	}
+	return 0;
+}
+
+
+static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
+				      struct mlx4_en_rx_desc *rx_desc,
+				      struct skb_frag_struct *skb_frags,
+				      struct mlx4_en_rx_alloc *page_alloc,
+				      unsigned int length)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct sk_buff *skb;
+	void *va;
+	int used_frags;
+	dma_addr_t dma;
+
+	skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN);
+	if (!skb) {
+		mlx4_dbg(RX_ERR, priv, "Failed allocating skb\n");
+		return NULL;
+	}
+	skb->dev = priv->dev;
+	skb_reserve(skb, NET_IP_ALIGN);
+	skb->len = length;
+	skb->truesize = length + sizeof(struct sk_buff);
+
+	/* Get pointer to first fragment so we could copy the headers into the
+	 * (linear part of the) skb */
+	va = page_address(skb_frags[0].page) + skb_frags[0].page_offset;
+
+	if (length <= SMALL_PACKET_SIZE) {
+		/* We are copying all relevant data to the skb - temporarily
+		 * synch buffers for the copy */
+		dma = be64_to_cpu(rx_desc->data[0].addr);
+		dma_sync_single_range_for_cpu(&mdev->pdev->dev, dma, 0,
+					      length, DMA_FROM_DEVICE);
+		skb_copy_to_linear_data(skb, va, length);
+		dma_sync_single_range_for_device(&mdev->pdev->dev, dma, 0,
+						 length, DMA_FROM_DEVICE);
+		skb->tail += length;
+	} else {
+
+		/* Move relevant fragments to skb */
+		used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags,
+						      skb_shinfo(skb)->frags,
+						      page_alloc, length);
+		skb_shinfo(skb)->nr_frags = used_frags;
+
+		/* Copy headers into the skb linear buffer */
+		memcpy(skb->data, va, HEADER_COPY_SIZE);
+		skb->tail += HEADER_COPY_SIZE;
+
+		/* Skip headers in first fragment */
+		skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE;
+
+		/* Adjust size of first fragment */
+		skb_shinfo(skb)->frags[0].size -= HEADER_COPY_SIZE;
+		skb->data_len = length - HEADER_COPY_SIZE;
+	}
+	return skb;
+}
+
+static void mlx4_en_copy_desc(struct mlx4_en_priv *priv,
+			      struct mlx4_en_rx_ring *ring,
+			      int from, int to, int num)
+{
+	struct skb_frag_struct *skb_frags_from;
+	struct skb_frag_struct *skb_frags_to;
+	struct mlx4_en_rx_desc *rx_desc_from;
+	struct mlx4_en_rx_desc *rx_desc_to;
+	int from_index, to_index;
+	int nr, i;
+
+	for (i = 0; i < num; i++) {
+		from_index = (from + i) & ring->size_mask;
+		to_index = (to + i) & ring->size_mask;
+		skb_frags_from = ring->rx_info + (from_index << priv->log_rx_info);
+		skb_frags_to = ring->rx_info + (to_index << priv->log_rx_info);
+		rx_desc_from = ring->buf + (from_index << ring->log_stride);
+		rx_desc_to = ring->buf + (to_index << ring->log_stride);
+
+		for (nr = 0; nr < priv->num_frags; nr++) {
+			skb_frags_to[nr].page = skb_frags_from[nr].page;
+			skb_frags_to[nr].page_offset = skb_frags_from[nr].page_offset;
+			rx_desc_to->data[nr].addr = rx_desc_from->data[nr].addr;
+		}
+	}
+}
+
+
+int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_cqe *cqe;
+	struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring];
+	struct skb_frag_struct *skb_frags;
+	struct skb_frag_struct lro_frags[MLX4_EN_MAX_RX_FRAGS];
+	struct mlx4_en_rx_desc *rx_desc;
+	struct sk_buff *skb;
+	int index;
+	int nr;
+	unsigned int length;
+	int polled = 0;
+	int ip_summed;
+
+	if (!priv->port_up)
+		return 0;
+
+	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
+	 * descriptor offset can be deduced from the CQE index instead of
+	 * reading 'cqe->index' */
+	index = cq->mcq.cons_index & ring->size_mask;
+	cqe = &cq->buf[index];
+
+	/* Process all completed CQEs */
+	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
+		    cq->mcq.cons_index & cq->size)) {
+
+		skb_frags = ring->rx_info + (index << priv->log_rx_info);
+		rx_desc = ring->buf + (index << ring->log_stride);
+
+		/*
+		 * make sure we read the CQE after we read the ownership bit
+		 */
+		rmb();
+
+		/* Drop packet on bad receive or bad checksum */
+		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+						MLX4_CQE_OPCODE_ERROR)) {
+			mlx4_err(mdev, "CQE completed in error - vendor "
+				  "syndrom:%d syndrom:%d\n",
+				  ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
+				  ((struct mlx4_err_cqe *) cqe)->syndrome);
+			goto next;
+		}
+		if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
+			mlx4_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
+			goto next;
+		}
+
+		/*
+		 * Packet is OK - process it.
+		 */
+		length = be32_to_cpu(cqe->byte_cnt);
+		ring->bytes += length;
+		ring->packets++;
+
+		if (likely(priv->rx_csum)) {
+			if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+			    (cqe->checksum == cpu_to_be16(0xffff))) {
+				priv->port_stats.rx_chksum_good++;
+				/* This packet is eligible for LRO if it is:
+				 * - DIX Ethernet (type interpretation)
+				 * - TCP/IP (v4)
+				 * - without IP options
+				 * - not an IP fragment */
+				if (mlx4_en_can_lro(cqe->status) &&
+				    dev->features & NETIF_F_LRO) {
+
+					nr = mlx4_en_complete_rx_desc(
+						priv, rx_desc,
+						skb_frags, lro_frags,
+						ring->page_alloc, length);
+					if (!nr)
+						goto next;
+
+					if (priv->vlgrp && (cqe->vlan_my_qpn &
+							    cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK))) {
+						lro_vlan_hwaccel_receive_frags(
+						       &ring->lro, lro_frags,
+						       length, length,
+						       priv->vlgrp,
+						       be16_to_cpu(cqe->sl_vid),
+						       NULL, 0);
+					} else
+						lro_receive_frags(&ring->lro,
+								  lro_frags,
+								  length,
+								  length,
+								  NULL, 0);
+
+					goto next;
+				}
+
+				/* LRO not possible, complete processing here */
+				ip_summed = CHECKSUM_UNNECESSARY;
+				INC_PERF_COUNTER(priv->pstats.lro_misses);
+			} else {
+				ip_summed = CHECKSUM_NONE;
+				priv->port_stats.rx_chksum_none++;
+			}
+		} else {
+			ip_summed = CHECKSUM_NONE;
+			priv->port_stats.rx_chksum_none++;
+		}
+
+		skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags,
+				     ring->page_alloc, length);
+		if (!skb) {
+			priv->stats.rx_dropped++;
+			goto next;
+		}
+
+		skb->ip_summed = ip_summed;
+		skb->protocol = eth_type_trans(skb, dev);
+
+		/* Push it up the stack */
+		if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) &
+				    MLX4_CQE_VLAN_PRESENT_MASK)) {
+			vlan_hwaccel_receive_skb(skb, priv->vlgrp,
+						be16_to_cpu(cqe->sl_vid));
+		} else
+			netif_receive_skb(skb);
+
+		dev->last_rx = jiffies;
+
+next:
+		++cq->mcq.cons_index;
+		index = (cq->mcq.cons_index) & ring->size_mask;
+		cqe = &cq->buf[index];
+		if (++polled == budget) {
+			/* We are here because we reached the NAPI budget -
+			 * flush only pending LRO sessions */
+			lro_flush_all(&ring->lro);
+			goto out;
+		}
+	}
+
+	/* If CQ is empty flush all LRO sessions unconditionally */
+	lro_flush_all(&ring->lro);
+
+out:
+	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
+	mlx4_cq_set_ci(&cq->mcq);
+	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
+	ring->cons = cq->mcq.cons_index;
+	ring->prod += polled; /* Polled descriptors were realocated in place */
+	if (unlikely(!ring->full)) {
+		mlx4_en_copy_desc(priv, ring, ring->cons - polled,
+				  ring->prod - polled, polled);
+		mlx4_en_fill_rx_buf(dev, ring);
+	}
+	mlx4_en_update_rx_prod_db(ring);
+	return polled;
+}
+
+
+void mlx4_en_rx_irq(struct mlx4_cq *mcq)
+{
+	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
+	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
+
+	if (priv->port_up)
+		netif_rx_schedule(cq->dev, &cq->napi);
+	else
+		mlx4_en_arm_cq(priv, cq);
+}
+
+/* Rx CQ polling - called by NAPI */
+int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
+{
+	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
+	struct net_device *dev = cq->dev;
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int done;
+
+	done = mlx4_en_process_rx_cq(dev, cq, budget);
+
+	/* If we used up all the quota - we're probably not done yet... */
+	if (done == budget)
+		INC_PERF_COUNTER(priv->pstats.napi_quota);
+	else {
+		/* Done for now */
+		netif_rx_complete(dev, napi);
+		mlx4_en_arm_cq(priv, cq);
+	}
+	return done;
+}
+
+
+/* Calculate the last offset position that accomodates a full fragment
+ * (assuming fagment size = stride-align) */
+static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
+{
+	u16 res = MLX4_EN_ALLOC_SIZE % stride;
+	u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;
+
+	mlx4_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
+			    "res:%d offset:%d\n", stride, align, res, offset);
+	return offset;
+}
+
+
+static int frag_sizes[] = {
+	FRAG_SZ0,
+	FRAG_SZ1,
+	FRAG_SZ2,
+	FRAG_SZ3
+};
+
+void mlx4_en_calc_rx_buf(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE;
+	int buf_size = 0;
+	int i = 0;
+
+	while (buf_size < eff_mtu) {
+		priv->frag_info[i].frag_size =
+			(eff_mtu > buf_size + frag_sizes[i]) ?
+				frag_sizes[i] : eff_mtu - buf_size;
+		priv->frag_info[i].frag_prefix_size = buf_size;
+		if (!i)	{
+			priv->frag_info[i].frag_align = NET_IP_ALIGN;
+			priv->frag_info[i].frag_stride =
+				ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES);
+		} else {
+			priv->frag_info[i].frag_align = 0;
+			priv->frag_info[i].frag_stride =
+				ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
+		}
+		priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
+						priv, priv->frag_info[i].frag_stride,
+						priv->frag_info[i].frag_align);
+		buf_size += priv->frag_info[i].frag_size;
+		i++;
+	}
+
+	priv->num_frags = i;
+	priv->rx_skb_size = eff_mtu;
+	priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct));
+
+	mlx4_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
+		  "num_frags:%d):\n", eff_mtu, priv->num_frags);
+	for (i = 0; i < priv->num_frags; i++) {
+		mlx4_dbg(DRV, priv, "  frag:%d - size:%d prefix:%d align:%d "
+				"stride:%d last_offset:%d\n", i,
+				priv->frag_info[i].frag_size,
+				priv->frag_info[i].frag_prefix_size,
+				priv->frag_info[i].frag_align,
+				priv->frag_info[i].frag_stride,
+				priv->frag_info[i].last_offset);
+	}
+}
+
+/* RSS related functions */
+
+/* Calculate rss size and map each entry in rss table to rx ring */
+void mlx4_en_set_default_rss_map(struct mlx4_en_priv *priv,
+				 struct mlx4_en_rss_map *rss_map,
+				 int num_entries, int num_rings)
+{
+	int i;
+
+	rss_map->size = roundup_pow_of_two(num_entries);
+	mlx4_dbg(DRV, priv, "Setting default RSS map of %d entires\n",
+		 rss_map->size);
+
+	for (i = 0; i < rss_map->size; i++) {
+		rss_map->map[i] = i % num_rings;
+		mlx4_dbg(DRV, priv, "Entry %d ---> ring %d\n", i, rss_map->map[i]);
+	}
+}
+
+static void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event)
+{
+    return;
+}
+
+
+static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv,
+				 int qpn, int srqn, int cqn,
+				 enum mlx4_qp_state *state,
+				 struct mlx4_qp *qp)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_qp_context *context;
+	int err = 0;
+
+	context = kmalloc(sizeof *context , GFP_KERNEL);
+	if (!context) {
+		mlx4_err(mdev, "Failed to allocate qp context\n");
+		return -ENOMEM;
+	}
+
+	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
+	if (err) {
+		mlx4_err(mdev, "Failed to allocate qp #%d\n", qpn);
+		goto out;
+		return err;
+	}
+	qp->event = mlx4_en_sqp_event;
+
+	memset(context, 0, sizeof *context);
+	mlx4_en_fill_qp_context(priv, 0, 0, 0, 0, qpn, cqn, srqn, context);
+
+	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, context, qp, state);
+	if (err) {
+		mlx4_qp_remove(mdev->dev, qp);
+		mlx4_qp_free(mdev->dev, qp);
+	}
+out:
+	kfree(context);
+	return err;
+}
+
+/* Allocate rx qp's and configure them according to rss map */
+int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
+	struct mlx4_qp_context context;
+	struct mlx4_en_rss_context *rss_context;
+	void *ptr;
+	int rss_xor = mdev->profile.rss_xor;
+	u8 rss_mask = mdev->profile.rss_mask;
+	int i, srqn, qpn, cqn;
+	int err = 0;
+	int good_qps = 0;
+
+	mlx4_dbg(DRV, priv, "Configuring rss steering for port %u\n", priv->port);
+	err = mlx4_qp_reserve_range(mdev->dev, rss_map->size,
+				    rss_map->size, &rss_map->base_qpn);
+	if (err) {
+		mlx4_err(mdev, "Failed reserving %d qps for port %u\n",
+			 rss_map->size, priv->port);
+		return err;
+	}
+
+	for (i = 0; i < rss_map->size; i++) {
+		cqn = priv->rx_ring[rss_map->map[i]].cqn;
+		srqn = priv->rx_ring[rss_map->map[i]].srq.srqn;
+		qpn = rss_map->base_qpn + i;
+		err = mlx4_en_config_rss_qp(priv, qpn, srqn, cqn,
+					    &rss_map->state[i],
+					    &rss_map->qps[i]);
+		if (err)
+			goto rss_err;
+
+		++good_qps;
+	}
+
+	/* Configure RSS indirection qp */
+	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn);
+	if (err) {
+		mlx4_err(mdev, "Failed to reserve range for RSS "
+			       "indirection qp\n");
+		goto rss_err;
+	}
+	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
+	if (err) {
+		mlx4_err(mdev, "Failed to allocate RSS indirection QP\n");
+		goto reserve_err;
+	}
+	rss_map->indir_qp.event = mlx4_en_sqp_event;
+	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
+				priv->rx_ring[0].cqn, 0, &context);
+
+	ptr = ((void *) &context) + 0x3c;
+	rss_context = (struct mlx4_en_rss_context *) ptr;
+	rss_context->base_qpn = cpu_to_be32(ilog2(rss_map->size) << 24 |
+					    (rss_map->base_qpn));
+	rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
+	rss_context->hash_fn = rss_xor & 0x3;
+	rss_context->flags = rss_mask << 2;
+
+	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
+			       &rss_map->indir_qp, &rss_map->indir_state);
+	if (err)
+		goto indir_err;
+
+	return 0;
+
+indir_err:
+	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
+		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
+	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
+	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
+reserve_err:
+	mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
+rss_err:
+	for (i = 0; i < good_qps; i++) {
+		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
+			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
+		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
+		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
+	}
+	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size);
+	return err;
+}
+
+void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
+	int i;
+
+	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
+		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
+	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
+	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
+	mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
+
+	for (i = 0; i < rss_map->size; i++) {
+		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
+			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
+		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
+		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
+	}
+	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size);
+}
+
+
+
+
+
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
new file mode 100644
index 00000000000..8592f8fb847
--- /dev/null
+++ b/drivers/net/mlx4/en_tx.c
@@ -0,0 +1,820 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <asm/page.h>
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/qp.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
+
+#include "mlx4_en.h"
+
+enum {
+	MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */
+};
+
+static int inline_thold __read_mostly = MAX_INLINE;
+
+module_param_named(inline_thold, inline_thold, int, 0444);
+MODULE_PARM_DESC(inline_thold, "treshold for using inline data");
+
+int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
+			   struct mlx4_en_tx_ring *ring, u32 size,
+			   u16 stride)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int tmp;
+	int err;
+
+	ring->size = size;
+	ring->size_mask = size - 1;
+	ring->stride = stride;
+
+	inline_thold = min(inline_thold, MAX_INLINE);
+
+	spin_lock_init(&ring->comp_lock);
+
+	tmp = size * sizeof(struct mlx4_en_tx_info);
+	ring->tx_info = vmalloc(tmp);
+	if (!ring->tx_info) {
+		mlx4_err(mdev, "Failed allocating tx_info ring\n");
+		return -ENOMEM;
+	}
+	mlx4_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
+		 ring->tx_info, tmp);
+
+	ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+	if (!ring->bounce_buf) {
+		mlx4_err(mdev, "Failed allocating bounce buffer\n");
+		err = -ENOMEM;
+		goto err_tx;
+	}
+	ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
+
+	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
+				 2 * PAGE_SIZE);
+	if (err) {
+		mlx4_err(mdev, "Failed allocating hwq resources\n");
+		goto err_bounce;
+	}
+
+	err = mlx4_en_map_buffer(&ring->wqres.buf);
+	if (err) {
+		mlx4_err(mdev, "Failed to map TX buffer\n");
+		goto err_hwq_res;
+	}
+
+	ring->buf = ring->wqres.buf.direct.buf;
+
+	mlx4_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d "
+		 "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size,
+		 ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
+
+	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn);
+	if (err) {
+		mlx4_err(mdev, "Failed reserving qp for tx ring.\n");
+		goto err_map;
+	}
+
+	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp);
+	if (err) {
+		mlx4_err(mdev, "Failed allocating qp %d\n", ring->qpn);
+		goto err_reserve;
+	}
+
+	return 0;
+
+err_reserve:
+	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
+err_map:
+	mlx4_en_unmap_buffer(&ring->wqres.buf);
+err_hwq_res:
+	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+err_bounce:
+	kfree(ring->bounce_buf);
+	ring->bounce_buf = NULL;
+err_tx:
+	vfree(ring->tx_info);
+	ring->tx_info = NULL;
+	return err;
+}
+
+void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
+			     struct mlx4_en_tx_ring *ring)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	mlx4_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
+
+	mlx4_qp_remove(mdev->dev, &ring->qp);
+	mlx4_qp_free(mdev->dev, &ring->qp);
+	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
+	mlx4_en_unmap_buffer(&ring->wqres.buf);
+	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+	kfree(ring->bounce_buf);
+	ring->bounce_buf = NULL;
+	vfree(ring->tx_info);
+	ring->tx_info = NULL;
+}
+
+int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
+			     struct mlx4_en_tx_ring *ring,
+			     int cq, int srqn)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	ring->cqn = cq;
+	ring->prod = 0;
+	ring->cons = 0xffffffff;
+	ring->last_nr_txbb = 1;
+	ring->poll_cnt = 0;
+	ring->blocked = 0;
+	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
+	memset(ring->buf, 0, ring->buf_size);
+
+	ring->qp_state = MLX4_QP_STATE_RST;
+	ring->doorbell_qpn = swab32(ring->qp.qpn << 8);
+
+	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
+				ring->cqn, srqn, &ring->context);
+
+	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
+			       &ring->qp, &ring->qp_state);
+
+	return err;
+}
+
+void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
+				struct mlx4_en_tx_ring *ring)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	mlx4_qp_modify(mdev->dev, NULL, ring->qp_state,
+		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
+}
+
+
+static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+				struct mlx4_en_tx_ring *ring,
+				int index, u8 owner)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
+	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
+	struct sk_buff *skb = tx_info->skb;
+	struct skb_frag_struct *frag;
+	void *end = ring->buf + ring->buf_size;
+	int frags = skb_shinfo(skb)->nr_frags;
+	int i;
+	__be32 *ptr = (__be32 *)tx_desc;
+	__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
+
+	/* Optimize the common case when there are no wraparounds */
+	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
+		if (tx_info->linear) {
+			pci_unmap_single(mdev->pdev,
+					 (dma_addr_t) be64_to_cpu(data->addr),
+					 be32_to_cpu(data->byte_count),
+					 PCI_DMA_TODEVICE);
+			++data;
+		}
+
+		for (i = 0; i < frags; i++) {
+			frag = &skb_shinfo(skb)->frags[i];
+			pci_unmap_page(mdev->pdev,
+				       (dma_addr_t) be64_to_cpu(data[i].addr),
+				       frag->size, PCI_DMA_TODEVICE);
+		}
+		/* Stamp the freed descriptor */
+		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
+			*ptr = stamp;
+			ptr += STAMP_DWORDS;
+		}
+
+	} else {
+		if ((void *) data >= end) {
+			data = (struct mlx4_wqe_data_seg *)
+					(ring->buf + ((void *) data - end));
+		}
+
+		if (tx_info->linear) {
+			pci_unmap_single(mdev->pdev,
+					 (dma_addr_t) be64_to_cpu(data->addr),
+					 be32_to_cpu(data->byte_count),
+					 PCI_DMA_TODEVICE);
+			++data;
+		}
+
+		for (i = 0; i < frags; i++) {
+			/* Check for wraparound before unmapping */
+			if ((void *) data >= end)
+				data = (struct mlx4_wqe_data_seg *) ring->buf;
+			frag = &skb_shinfo(skb)->frags[i];
+			pci_unmap_page(mdev->pdev,
+					(dma_addr_t) be64_to_cpu(data->addr),
+					 frag->size, PCI_DMA_TODEVICE);
+		}
+		/* Stamp the freed descriptor */
+		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
+			*ptr = stamp;
+			ptr += STAMP_DWORDS;
+			if ((void *) ptr >= end) {
+				ptr = ring->buf;
+				stamp ^= cpu_to_be32(0x80000000);
+			}
+		}
+
+	}
+	dev_kfree_skb_any(skb);
+	return tx_info->nr_txbb;
+}
+
+
+int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int cnt = 0;
+
+	/* Skip last polled descriptor */
+	ring->cons += ring->last_nr_txbb;
+	mlx4_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
+		 ring->cons, ring->prod);
+
+	if ((u32) (ring->prod - ring->cons) > ring->size) {
+		if (netif_msg_tx_err(priv))
+			mlx4_warn(priv->mdev, "Tx consumer passed producer!\n");
+		return 0;
+	}
+
+	while (ring->cons != ring->prod) {
+		ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring,
+						ring->cons & ring->size_mask,
+						!!(ring->cons & ring->size));
+		ring->cons += ring->last_nr_txbb;
+		cnt++;
+	}
+
+	if (cnt)
+		mlx4_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
+
+	return cnt;
+}
+
+void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num)
+{
+	int block = 8 / ring_num;
+	int extra = 8 - (block * ring_num);
+	int num = 0;
+	u16 ring = 1;
+	int prio;
+
+	if (ring_num == 1) {
+		for (prio = 0; prio < 8; prio++)
+			prio_map[prio] = 0;
+		return;
+	}
+
+	for (prio = 0; prio < 8; prio++) {
+		if (extra && (num == block + 1)) {
+			ring++;
+			num = 0;
+			extra--;
+		} else if (!extra && (num == block)) {
+			ring++;
+			num = 0;
+		}
+		prio_map[prio] = ring;
+		mlx4_dbg(DRV, priv, " prio:%d --> ring:%d\n", prio, ring);
+		num++;
+	}
+}
+
+static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_cq *mcq = &cq->mcq;
+	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
+	struct mlx4_cqe *cqe = cq->buf;
+	u16 index;
+	u16 new_index;
+	u32 txbbs_skipped = 0;
+	u32 cq_last_sav;
+
+	/* index always points to the first TXBB of the last polled descriptor */
+	index = ring->cons & ring->size_mask;
+	new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
+	if (index == new_index)
+		return;
+
+	if (!priv->port_up)
+		return;
+
+	/*
+	 * We use a two-stage loop:
+	 * - the first samples the HW-updated CQE
+	 * - the second frees TXBBs until the last sample
+	 * This lets us amortize CQE cache misses, while still polling the CQ
+	 * until is quiescent.
+	 */
+	cq_last_sav = mcq->cons_index;
+	do {
+		do {
+			/* Skip over last polled CQE */
+			index = (index + ring->last_nr_txbb) & ring->size_mask;
+			txbbs_skipped += ring->last_nr_txbb;
+
+			/* Poll next CQE */
+			ring->last_nr_txbb = mlx4_en_free_tx_desc(
+						priv, ring, index,
+						!!((ring->cons + txbbs_skipped) &
+						   ring->size));
+			++mcq->cons_index;
+
+		} while (index != new_index);
+
+		new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
+	} while (index != new_index);
+	AVG_PERF_COUNTER(priv->pstats.tx_coal_avg,
+			 (u32) (mcq->cons_index - cq_last_sav));
+
+	/*
+	 * To prevent CQ overflow we first update CQ consumer and only then
+	 * the ring consumer.
+	 */
+	mlx4_cq_set_ci(mcq);
+	wmb();
+	ring->cons += txbbs_skipped;
+
+	/* Wakeup Tx queue if this ring stopped it */
+	if (unlikely(ring->blocked)) {
+		if (((u32) (ring->prod - ring->cons) <=
+		     ring->size - HEADROOM - MAX_DESC_TXBBS) && !cq->armed) {
+
+			/* TODO: support multiqueue netdevs. Currently, we block
+			 * when *any* ring is full. Note that:
+			 * - 2 Tx rings can unblock at the same time and call
+			 *   netif_wake_queue(), which is OK since this
+			 *   operation is idempotent.
+			 * - We might wake the queue just after another ring
+			 *   stopped it. This is no big deal because the next
+			 *   transmission on that ring would stop the queue.
+			 */
+			ring->blocked = 0;
+			netif_wake_queue(dev);
+			priv->port_stats.wake_queue++;
+		}
+	}
+}
+
+void mlx4_en_tx_irq(struct mlx4_cq *mcq)
+{
+	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
+	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
+	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
+
+	spin_lock_irq(&ring->comp_lock);
+	cq->armed = 0;
+	mlx4_en_process_tx_cq(cq->dev, cq);
+	if (ring->blocked)
+		mlx4_en_arm_cq(priv, cq);
+	else
+		mod_timer(&cq->timer, jiffies + 1);
+	spin_unlock_irq(&ring->comp_lock);
+}
+
+
+void mlx4_en_poll_tx_cq(unsigned long data)
+{
+	struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data;
+	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
+	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
+	u32 inflight;
+
+	INC_PERF_COUNTER(priv->pstats.tx_poll);
+
+	netif_tx_lock(priv->dev);
+	spin_lock_irq(&ring->comp_lock);
+	mlx4_en_process_tx_cq(cq->dev, cq);
+	inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb);
+
+	/* If there are still packets in flight and the timer has not already
+	 * been scheduled by the Tx routine then schedule it here to guarantee
+	 * completion processing of these packets */
+	if (inflight && priv->port_up)
+		mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
+
+	spin_unlock_irq(&ring->comp_lock);
+	netif_tx_unlock(priv->dev);
+}
+
+static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
+						      struct mlx4_en_tx_ring *ring,
+						      u32 index,
+						      unsigned int desc_size)
+{
+	u32 copy = (ring->size - index) * TXBB_SIZE;
+	int i;
+
+	for (i = desc_size - copy - 4; i >= 0; i -= 4) {
+		if ((i & (TXBB_SIZE - 1)) == 0)
+			wmb();
+
+		*((u32 *) (ring->buf + i)) =
+			*((u32 *) (ring->bounce_buf + copy + i));
+	}
+
+	for (i = copy - 4; i >= 4 ; i -= 4) {
+		if ((i & (TXBB_SIZE - 1)) == 0)
+			wmb();
+
+		*((u32 *) (ring->buf + index * TXBB_SIZE + i)) =
+			*((u32 *) (ring->bounce_buf + i));
+	}
+
+	/* Return real descriptor location */
+	return ring->buf + index * TXBB_SIZE;
+}
+
+static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind)
+{
+	struct mlx4_en_cq *cq = &priv->tx_cq[tx_ind];
+	struct mlx4_en_tx_ring *ring = &priv->tx_ring[tx_ind];
+
+	/* If we don't have a pending timer, set one up to catch our recent
+	   post in case the interface becomes idle */
+	if (!timer_pending(&cq->timer))
+		mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
+
+	/* Poll the CQ every mlx4_en_TX_MODER_POLL packets */
+	if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0)
+		mlx4_en_process_tx_cq(priv->dev, cq);
+}
+
+static void *get_frag_ptr(struct sk_buff *skb)
+{
+	struct skb_frag_struct *frag =  &skb_shinfo(skb)->frags[0];
+	struct page *page = frag->page;
+	void *ptr;
+
+	ptr = page_address(page);
+	if (unlikely(!ptr))
+		return NULL;
+
+	return ptr + frag->page_offset;
+}
+
+static int is_inline(struct sk_buff *skb, void **pfrag)
+{
+	void *ptr;
+
+	if (inline_thold && !skb_is_gso(skb) && skb->len <= inline_thold) {
+		if (skb_shinfo(skb)->nr_frags == 1) {
+			ptr = get_frag_ptr(skb);
+			if (unlikely(!ptr))
+				return 0;
+
+			if (pfrag)
+				*pfrag = ptr;
+
+			return 1;
+		} else if (unlikely(skb_shinfo(skb)->nr_frags))
+			return 0;
+		else
+			return 1;
+	}
+
+	return 0;
+}
+
+static int inline_size(struct sk_buff *skb)
+{
+	if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
+	    <= MLX4_INLINE_ALIGN)
+		return ALIGN(skb->len + CTRL_SIZE +
+			     sizeof(struct mlx4_wqe_inline_seg), 16);
+	else
+		return ALIGN(skb->len + CTRL_SIZE + 2 *
+			     sizeof(struct mlx4_wqe_inline_seg), 16);
+}
+
+static int get_real_size(struct sk_buff *skb, struct net_device *dev,
+			 int *lso_header_size)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int real_size;
+
+	if (skb_is_gso(skb)) {
+		*lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb);
+		real_size = CTRL_SIZE + skb_shinfo(skb)->nr_frags * DS_SIZE +
+			ALIGN(*lso_header_size + 4, DS_SIZE);
+		if (unlikely(*lso_header_size != skb_headlen(skb))) {
+			/* We add a segment for the skb linear buffer only if
+			 * it contains data */
+			if (*lso_header_size < skb_headlen(skb))
+				real_size += DS_SIZE;
+			else {
+				if (netif_msg_tx_err(priv))
+					mlx4_warn(mdev, "Non-linear headers\n");
+				dev_kfree_skb_any(skb);
+				return 0;
+			}
+		}
+		if (unlikely(*lso_header_size > MAX_LSO_HDR_SIZE)) {
+			if (netif_msg_tx_err(priv))
+				mlx4_warn(mdev, "LSO header size too big\n");
+			dev_kfree_skb_any(skb);
+			return 0;
+		}
+	} else {
+		*lso_header_size = 0;
+		if (!is_inline(skb, NULL))
+			real_size = CTRL_SIZE + (skb_shinfo(skb)->nr_frags + 1) * DS_SIZE;
+		else
+			real_size = inline_size(skb);
+	}
+
+	return real_size;
+}
+
+static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *skb,
+			     int real_size, u16 *vlan_tag, int tx_ind, void *fragptr)
+{
+	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
+	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
+
+	if (skb->len <= spc) {
+		inl->byte_count = cpu_to_be32(1 << 31 | skb->len);
+		skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb));
+		if (skb_shinfo(skb)->nr_frags)
+			memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr,
+			       skb_shinfo(skb)->frags[0].size);
+
+	} else {
+		inl->byte_count = cpu_to_be32(1 << 31 | spc);
+		if (skb_headlen(skb) <= spc) {
+			skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb));
+			if (skb_headlen(skb) < spc) {
+				memcpy(((void *)(inl + 1)) + skb_headlen(skb),
+					fragptr, spc - skb_headlen(skb));
+				fragptr +=  spc - skb_headlen(skb);
+			}
+			inl = (void *) (inl + 1) + spc;
+			memcpy(((void *)(inl + 1)), fragptr, skb->len - spc);
+		} else {
+			skb_copy_from_linear_data(skb, inl + 1, spc);
+			inl = (void *) (inl + 1) + spc;
+			skb_copy_from_linear_data_offset(skb, spc, inl + 1,
+					skb_headlen(skb) - spc);
+			if (skb_shinfo(skb)->nr_frags)
+				memcpy(((void *)(inl + 1)) + skb_headlen(skb) - spc,
+					fragptr, skb_shinfo(skb)->frags[0].size);
+		}
+
+		wmb();
+		inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc));
+	}
+	tx_desc->ctrl.vlan_tag = cpu_to_be16(*vlan_tag);
+	tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!(*vlan_tag);
+	tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
+}
+
+static int get_vlan_info(struct mlx4_en_priv *priv, struct sk_buff *skb,
+			 u16 *vlan_tag)
+{
+	int tx_ind;
+
+	/* Obtain VLAN information if present */
+	if (priv->vlgrp && vlan_tx_tag_present(skb)) {
+		*vlan_tag = vlan_tx_tag_get(skb);
+		/* Set the Tx ring to use according to vlan priority */
+		tx_ind = priv->tx_prio_map[*vlan_tag >> 13];
+	} else {
+		*vlan_tag = 0;
+		tx_ind = 0;
+	}
+	return tx_ind;
+}
+
+int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_tx_ring *ring;
+	struct mlx4_en_cq *cq;
+	struct mlx4_en_tx_desc *tx_desc;
+	struct mlx4_wqe_data_seg *data;
+	struct skb_frag_struct *frag;
+	struct mlx4_en_tx_info *tx_info;
+	int tx_ind = 0;
+	int nr_txbb;
+	int desc_size;
+	int real_size;
+	dma_addr_t dma;
+	u32 index;
+	__be32 op_own;
+	u16 vlan_tag;
+	int i;
+	int lso_header_size;
+	void *fragptr;
+
+	if (unlikely(!skb->len)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+	real_size = get_real_size(skb, dev, &lso_header_size);
+	if (unlikely(!real_size))
+		return NETDEV_TX_OK;
+
+	/* Allign descriptor to TXBB size */
+	desc_size = ALIGN(real_size, TXBB_SIZE);
+	nr_txbb = desc_size / TXBB_SIZE;
+	if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
+		if (netif_msg_tx_err(priv))
+			mlx4_warn(mdev, "Oversized header or SG list\n");
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	tx_ind = get_vlan_info(priv, skb, &vlan_tag);
+	ring = &priv->tx_ring[tx_ind];
+
+	/* Check available TXBBs And 2K spare for prefetch */
+	if (unlikely(((int)(ring->prod - ring->cons)) >
+		     ring->size - HEADROOM - MAX_DESC_TXBBS)) {
+		/* every full Tx ring stops queue.
+		 * TODO: implement multi-queue support (per-queue stop) */
+		netif_stop_queue(dev);
+		ring->blocked = 1;
+		priv->port_stats.queue_stopped++;
+
+		/* Use interrupts to find out when queue opened */
+		cq = &priv->tx_cq[tx_ind];
+		mlx4_en_arm_cq(priv, cq);
+		return NETDEV_TX_BUSY;
+	}
+
+	/* Now that we know what Tx ring to use */
+	if (unlikely(!priv->port_up)) {
+		if (netif_msg_tx_err(priv))
+			mlx4_warn(mdev, "xmit: port down!\n");
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* Track current inflight packets for performance analysis */
+	AVG_PERF_COUNTER(priv->pstats.inflight_avg,
+			 (u32) (ring->prod - ring->cons - 1));
+
+	/* Packet is good - grab an index and transmit it */
+	index = ring->prod & ring->size_mask;
+
+	/* See if we have enough space for whole descriptor TXBB for setting
+	 * SW ownership on next descriptor; if not, use a bounce buffer. */
+	if (likely(index + nr_txbb <= ring->size))
+		tx_desc = ring->buf + index * TXBB_SIZE;
+	else
+		tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
+
+	/* Save skb in tx_info ring */
+	tx_info = &ring->tx_info[index];
+	tx_info->skb = skb;
+	tx_info->nr_txbb = nr_txbb;
+
+	/* Prepare ctrl segement apart opcode+ownership, which depends on
+	 * whether LSO is used */
+	tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
+	tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag;
+	tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
+	tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
+						MLX4_WQE_CTRL_SOLICITED);
+	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+		tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+							 MLX4_WQE_CTRL_TCP_UDP_CSUM);
+		priv->port_stats.tx_chksum_offload++;
+	}
+
+	/* Handle LSO (TSO) packets */
+	if (lso_header_size) {
+		/* Mark opcode as LSO */
+		op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
+			((ring->prod & ring->size) ?
+				cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+
+		/* Fill in the LSO prefix */
+		tx_desc->lso.mss_hdr_size = cpu_to_be32(
+			skb_shinfo(skb)->gso_size << 16 | lso_header_size);
+
+		/* Copy headers;
+		 * note that we already verified that it is linear */
+		memcpy(tx_desc->lso.header, skb->data, lso_header_size);
+		data = ((void *) &tx_desc->lso +
+			ALIGN(lso_header_size + 4, DS_SIZE));
+
+		priv->port_stats.tso_packets++;
+		i = ((skb->len - lso_header_size) / skb_shinfo(skb)->gso_size) +
+			!!((skb->len - lso_header_size) % skb_shinfo(skb)->gso_size);
+		ring->bytes += skb->len + (i - 1) * lso_header_size;
+		ring->packets += i;
+	} else {
+		/* Normal (Non LSO) packet */
+		op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
+			((ring->prod & ring->size) ?
+			 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+		data = &tx_desc->data;
+		ring->bytes += max(skb->len, (unsigned int) ETH_ZLEN);
+		ring->packets++;
+
+	}
+	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
+
+
+	/* valid only for none inline segments */
+	tx_info->data_offset = (void *) data - (void *) tx_desc;
+
+	tx_info->linear = (lso_header_size < skb_headlen(skb) && !is_inline(skb, NULL)) ? 1 : 0;
+	data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1;
+
+	if (!is_inline(skb, &fragptr)) {
+		/* Map fragments */
+		for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
+			frag = &skb_shinfo(skb)->frags[i];
+			dma = pci_map_page(mdev->dev->pdev, frag->page, frag->page_offset,
+					   frag->size, PCI_DMA_TODEVICE);
+			data->addr = cpu_to_be64(dma);
+			data->lkey = cpu_to_be32(mdev->mr.key);
+			wmb();
+			data->byte_count = cpu_to_be32(frag->size);
+			--data;
+		}
+
+		/* Map linear part */
+		if (tx_info->linear) {
+			dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size,
+					     skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE);
+			data->addr = cpu_to_be64(dma);
+			data->lkey = cpu_to_be32(mdev->mr.key);
+			wmb();
+			data->byte_count = cpu_to_be32(skb_headlen(skb) - lso_header_size);
+		}
+	} else
+		build_inline_wqe(tx_desc, skb, real_size, &vlan_tag, tx_ind, fragptr);
+
+	ring->prod += nr_txbb;
+
+	/* If we used a bounce buffer then copy descriptor back into place */
+	if (tx_desc == (struct mlx4_en_tx_desc *) ring->bounce_buf)
+		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
+
+	/* Run destructor before passing skb to HW */
+	if (likely(!skb_shared(skb)))
+		skb_orphan(skb);
+
+	/* Ensure new descirptor hits memory
+	 * before setting ownership of this descriptor to HW */
+	wmb();
+	tx_desc->ctrl.owner_opcode = op_own;
+
+	/* Ring doorbell! */
+	wmb();
+	writel(ring->doorbell_qpn, mdev->uar_map + MLX4_SEND_DOORBELL);
+	dev->trans_start = jiffies;
+
+	/* Poll CQ here */
+	mlx4_en_xmit_poll(priv, tx_ind);
+
+	return 0;
+}
+
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 8a8b56135a5..de169338cd9 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -558,7 +558,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 	int i;
 
 	err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
-			       dev->caps.num_eqs - 1, dev->caps.reserved_eqs);
+			       dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0);
 	if (err)
 		return err;
 
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 7e32955da98..be09fdb79cb 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -88,6 +88,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
 		[ 8] = "P_Key violation counter",
 		[ 9] = "Q_Key violation counter",
 		[10] = "VMM",
+		[12] = "DPDP",
 		[16] = "MW support",
 		[17] = "APM support",
 		[18] = "Atomic ops support",
@@ -346,7 +347,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
 			dev_cap->max_vl[i]	   = field >> 4;
 			MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET);
-			dev_cap->max_mtu[i]	   = field >> 4;
+			dev_cap->ib_mtu[i]	   = field >> 4;
 			dev_cap->max_port_width[i] = field & 0xf;
 			MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET);
 			dev_cap->max_gids[i]	   = 1 << (field & 0xf);
@@ -354,9 +355,13 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			dev_cap->max_pkeys[i]	   = 1 << (field & 0xf);
 		}
 	} else {
+#define QUERY_PORT_SUPPORTED_TYPE_OFFSET	0x00
 #define QUERY_PORT_MTU_OFFSET			0x01
+#define QUERY_PORT_ETH_MTU_OFFSET		0x02
 #define QUERY_PORT_WIDTH_OFFSET			0x06
 #define QUERY_PORT_MAX_GID_PKEY_OFFSET		0x07
+#define QUERY_PORT_MAC_OFFSET			0x08
+#define QUERY_PORT_MAX_MACVLAN_OFFSET		0x0a
 #define QUERY_PORT_MAX_VL_OFFSET		0x0b
 
 		for (i = 1; i <= dev_cap->num_ports; ++i) {
@@ -365,8 +370,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			if (err)
 				goto out;
 
+			MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+			dev_cap->supported_port_types[i] = field & 3;
 			MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET);
-			dev_cap->max_mtu[i]	   = field & 0xf;
+			dev_cap->ib_mtu[i]	   = field & 0xf;
 			MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET);
 			dev_cap->max_port_width[i] = field & 0xf;
 			MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET);
@@ -374,6 +381,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			dev_cap->max_pkeys[i]	   = 1 << (field & 0xf);
 			MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET);
 			dev_cap->max_vl[i]	   = field & 0xf;
+			MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET);
+			dev_cap->log_max_macs[i]  = field & 0xf;
+			dev_cap->log_max_vlans[i] = field >> 4;
+			MLX4_GET(dev_cap->eth_mtu[i], outbox, QUERY_PORT_ETH_MTU_OFFSET);
+			MLX4_GET(dev_cap->def_mac[i], outbox, QUERY_PORT_MAC_OFFSET);
 		}
 	}
 
@@ -407,7 +419,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
 		 dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz);
 	mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n",
-		 dev_cap->local_ca_ack_delay, 128 << dev_cap->max_mtu[1],
+		 dev_cap->local_ca_ack_delay, 128 << dev_cap->ib_mtu[1],
 		 dev_cap->max_port_width[1]);
 	mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n",
 		 dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
@@ -819,7 +831,7 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
 		flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT;
 		MLX4_PUT(inbox, flags,		  INIT_PORT_FLAGS_OFFSET);
 
-		field = 128 << dev->caps.mtu_cap[port];
+		field = 128 << dev->caps.ib_mtu_cap[port];
 		MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET);
 		field = dev->caps.gid_table_len[port];
 		MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET);
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index decbb5c2ad4..526d7f30c04 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -66,11 +66,13 @@ struct mlx4_dev_cap {
 	int local_ca_ack_delay;
 	int num_ports;
 	u32 max_msg_sz;
-	int max_mtu[MLX4_MAX_PORTS + 1];
+	int ib_mtu[MLX4_MAX_PORTS + 1];
 	int max_port_width[MLX4_MAX_PORTS + 1];
 	int max_vl[MLX4_MAX_PORTS + 1];
 	int max_gids[MLX4_MAX_PORTS + 1];
 	int max_pkeys[MLX4_MAX_PORTS + 1];
+	u64 def_mac[MLX4_MAX_PORTS + 1];
+	u16 eth_mtu[MLX4_MAX_PORTS + 1];
 	u16 stat_rate_support;
 	u32 flags;
 	int reserved_uars;
@@ -102,6 +104,9 @@ struct mlx4_dev_cap {
 	u32 reserved_lkey;
 	u64 max_icm_sz;
 	int max_gso_sz;
+	u8  supported_port_types[MLX4_MAX_PORTS + 1];
+	u8  log_max_macs[MLX4_MAX_PORTS + 1];
+	u8  log_max_vlans[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_adapter {
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 1252a919de2..468921b8f4b 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -85,6 +85,57 @@ static struct mlx4_profile default_profile = {
 	.num_mtt	= 1 << 20,
 };
 
+static int log_num_mac = 2;
+module_param_named(log_num_mac, log_num_mac, int, 0444);
+MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
+
+static int log_num_vlan;
+module_param_named(log_num_vlan, log_num_vlan, int, 0444);
+MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
+
+static int use_prio;
+module_param_named(use_prio, use_prio, bool, 0444);
+MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
+		  "(0/1, default 0)");
+
+static int mlx4_check_port_params(struct mlx4_dev *dev,
+				  enum mlx4_port_type *port_type)
+{
+	int i;
+
+	for (i = 0; i < dev->caps.num_ports - 1; i++) {
+		if (port_type[i] != port_type[i+1] &&
+		    !(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+			mlx4_err(dev, "Only same port types supported "
+				 "on this HCA, aborting.\n");
+			return -EINVAL;
+		}
+	}
+	if ((port_type[0] == MLX4_PORT_TYPE_ETH) &&
+	    (port_type[1] == MLX4_PORT_TYPE_IB)) {
+		mlx4_err(dev, "eth-ib configuration is not supported.\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dev->caps.num_ports; i++) {
+		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
+			mlx4_err(dev, "Requested port type for port %d is not "
+				      "supported on this HCA\n", i + 1);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static void mlx4_set_port_mask(struct mlx4_dev *dev)
+{
+	int i;
+
+	dev->caps.port_mask = 0;
+	for (i = 1; i <= dev->caps.num_ports; ++i)
+		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB)
+			dev->caps.port_mask |= 1 << (i - 1);
+}
 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
 	int err;
@@ -120,10 +171,13 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.num_ports	     = dev_cap->num_ports;
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		dev->caps.vl_cap[i]	    = dev_cap->max_vl[i];
-		dev->caps.mtu_cap[i]	    = dev_cap->max_mtu[i];
+		dev->caps.ib_mtu_cap[i]	    = dev_cap->ib_mtu[i];
 		dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
 		dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
 		dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
+		dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
+		dev->caps.def_mac[i]        = dev_cap->def_mac[i];
+		dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
 	}
 
 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
@@ -134,7 +188,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
 	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
 	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
-	dev->caps.reserved_qps	     = dev_cap->reserved_qps;
 	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
 	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
 	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
@@ -163,9 +216,138 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
+	dev->caps.log_num_macs  = log_num_mac;
+	dev->caps.log_num_vlans = log_num_vlan;
+	dev->caps.log_num_prios = use_prio ? 3 : 0;
+
+	for (i = 1; i <= dev->caps.num_ports; ++i) {
+		if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH)
+			dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
+		else
+			dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+
+		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
+			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
+			mlx4_warn(dev, "Requested number of MACs is too much "
+				  "for port %d, reducing to %d.\n",
+				  i, 1 << dev->caps.log_num_macs);
+		}
+		if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
+			dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
+			mlx4_warn(dev, "Requested number of VLANs is too much "
+				  "for port %d, reducing to %d.\n",
+				  i, 1 << dev->caps.log_num_vlans);
+		}
+	}
+
+	mlx4_set_port_mask(dev);
+
+	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
+	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
+		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
+		(1 << dev->caps.log_num_macs) *
+		(1 << dev->caps.log_num_vlans) *
+		(1 << dev->caps.log_num_prios) *
+		dev->caps.num_ports;
+	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
+
+	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
+		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
+		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
+		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
+
 	return 0;
 }
 
+/*
+ * Change the port configuration of the device.
+ * Every user of this function must hold the port mutex.
+ */
+static int mlx4_change_port_types(struct mlx4_dev *dev,
+				  enum mlx4_port_type *port_types)
+{
+	int err = 0;
+	int change = 0;
+	int port;
+
+	for (port = 0; port <  dev->caps.num_ports; port++) {
+		if (port_types[port] != dev->caps.port_type[port + 1]) {
+			change = 1;
+			dev->caps.port_type[port + 1] = port_types[port];
+		}
+	}
+	if (change) {
+		mlx4_unregister_device(dev);
+		for (port = 1; port <= dev->caps.num_ports; port++) {
+			mlx4_CLOSE_PORT(dev, port);
+			err = mlx4_SET_PORT(dev, port);
+			if (err) {
+				mlx4_err(dev, "Failed to set port %d, "
+					      "aborting\n", port);
+				goto out;
+			}
+		}
+		mlx4_set_port_mask(dev);
+		err = mlx4_register_device(dev);
+	}
+
+out:
+	return err;
+}
+
+static ssize_t show_port_type(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+						   port_attr);
+	struct mlx4_dev *mdev = info->dev;
+
+	return sprintf(buf, "%s\n",
+		       mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB ?
+		       "ib" : "eth");
+}
+
+static ssize_t set_port_type(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+						   port_attr);
+	struct mlx4_dev *mdev = info->dev;
+	struct mlx4_priv *priv = mlx4_priv(mdev);
+	enum mlx4_port_type types[MLX4_MAX_PORTS];
+	int i;
+	int err = 0;
+
+	if (!strcmp(buf, "ib\n"))
+		info->tmp_type = MLX4_PORT_TYPE_IB;
+	else if (!strcmp(buf, "eth\n"))
+		info->tmp_type = MLX4_PORT_TYPE_ETH;
+	else {
+		mlx4_err(mdev, "%s is not supported port type\n", buf);
+		return -EINVAL;
+	}
+
+	mutex_lock(&priv->port_mutex);
+	for (i = 0; i < mdev->caps.num_ports; i++)
+		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
+					mdev->caps.port_type[i+1];
+
+	err = mlx4_check_port_params(mdev, types);
+	if (err)
+		goto out;
+
+	for (i = 1; i <= mdev->caps.num_ports; i++)
+		priv->port[i].tmp_type = 0;
+
+	err = mlx4_change_port_types(mdev, types);
+
+out:
+	mutex_unlock(&priv->port_mutex);
+	return err ? err : count;
+}
+
 static int mlx4_load_fw(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -211,7 +393,8 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 				  ((u64) (MLX4_CMPT_TYPE_QP *
 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
 				  cmpt_entry_sz, dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
 	if (err)
 		goto err;
 
@@ -336,7 +519,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 				  init_hca->qpc_base,
 				  dev_cap->qpc_entry_sz,
 				  dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
 		goto err_unmap_dmpt;
@@ -346,7 +530,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 				  init_hca->auxc_base,
 				  dev_cap->aux_entry_sz,
 				  dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
 		goto err_unmap_qp;
@@ -356,7 +541,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 				  init_hca->altc_base,
 				  dev_cap->altc_entry_sz,
 				  dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
 		goto err_unmap_auxc;
@@ -366,7 +552,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 				  init_hca->rdmarc_base,
 				  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
 				  dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
 		goto err_unmap_altc;
@@ -565,6 +752,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;
+	int port;
 
 	err = mlx4_init_uar_table(dev);
 	if (err) {
@@ -663,8 +851,20 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 		goto err_qp_table_free;
 	}
 
+	for (port = 1; port <= dev->caps.num_ports; port++) {
+		err = mlx4_SET_PORT(dev, port);
+		if (err) {
+			mlx4_err(dev, "Failed to set port %d, aborting\n",
+				port);
+			goto err_mcg_table_free;
+		}
+	}
+
 	return 0;
 
+err_mcg_table_free:
+	mlx4_cleanup_mcg_table(dev);
+
 err_qp_table_free:
 	mlx4_cleanup_qp_table(dev);
 
@@ -728,11 +928,45 @@ no_msi:
 		priv->eq_table.eq[i].irq = dev->pdev->irq;
 }
 
+static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	int err = 0;
+
+	info->dev = dev;
+	info->port = port;
+	mlx4_init_mac_table(dev, &info->mac_table);
+	mlx4_init_vlan_table(dev, &info->vlan_table);
+
+	sprintf(info->dev_name, "mlx4_port%d", port);
+	info->port_attr.attr.name = info->dev_name;
+	info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+	info->port_attr.show      = show_port_type;
+	info->port_attr.store     = set_port_type;
+
+	err = device_create_file(&dev->pdev->dev, &info->port_attr);
+	if (err) {
+		mlx4_err(dev, "Failed to create file for port %d\n", port);
+		info->port = -1;
+	}
+
+	return err;
+}
+
+static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
+{
+	if (info->port < 0)
+		return;
+
+	device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+}
+
 static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct mlx4_priv *priv;
 	struct mlx4_dev *dev;
 	int err;
+	int port;
 
 	printk(KERN_INFO PFX "Initializing %s\n",
 	       pci_name(pdev));
@@ -807,6 +1041,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);
 
+	mutex_init(&priv->port_mutex);
+
 	INIT_LIST_HEAD(&priv->pgdir_list);
 	mutex_init(&priv->pgdir_mutex);
 
@@ -842,15 +1078,24 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto err_close;
 
+	for (port = 1; port <= dev->caps.num_ports; port++) {
+		err = mlx4_init_port_info(dev, port);
+		if (err)
+			goto err_port;
+	}
+
 	err = mlx4_register_device(dev);
 	if (err)
-		goto err_cleanup;
+		goto err_port;
 
 	pci_set_drvdata(pdev, dev);
 
 	return 0;
 
-err_cleanup:
+err_port:
+	for (port = 1; port <= dev->caps.num_ports; port++)
+		mlx4_cleanup_port_info(&priv->port[port]);
+
 	mlx4_cleanup_mcg_table(dev);
 	mlx4_cleanup_qp_table(dev);
 	mlx4_cleanup_srq_table(dev);
@@ -907,8 +1152,10 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	if (dev) {
 		mlx4_unregister_device(dev);
 
-		for (p = 1; p <= dev->caps.num_ports; ++p)
+		for (p = 1; p <= dev->caps.num_ports; p++) {
+			mlx4_cleanup_port_info(&priv->port[p]);
 			mlx4_CLOSE_PORT(dev, p);
+		}
 
 		mlx4_cleanup_mcg_table(dev);
 		mlx4_cleanup_qp_table(dev);
@@ -948,6 +1195,8 @@ static struct pci_device_id mlx4_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
 	{ PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
 	{ PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
+	{ PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
 	{ 0, }
 };
 
@@ -960,10 +1209,28 @@ static struct pci_driver mlx4_driver = {
 	.remove		= __devexit_p(mlx4_remove_one)
 };
 
+static int __init mlx4_verify_params(void)
+{
+	if ((log_num_mac < 0) || (log_num_mac > 7)) {
+		printk(KERN_WARNING "mlx4_core: bad num_mac: %d\n", log_num_mac);
+		return -1;
+	}
+
+	if ((log_num_vlan < 0) || (log_num_vlan > 7)) {
+		printk(KERN_WARNING "mlx4_core: bad num_vlan: %d\n", log_num_vlan);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int __init mlx4_init(void)
 {
 	int ret;
 
+	if (mlx4_verify_params())
+		return -EINVAL;
+
 	ret = mlx4_catas_init();
 	if (ret)
 		return ret;
diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index c83f88ce073..592c01ae2c5 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c
@@ -368,8 +368,8 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;
 
-	err = mlx4_bitmap_init(&priv->mcg_table.bitmap,
-			       dev->caps.num_amgms, dev->caps.num_amgms - 1, 0);
+	err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms,
+			       dev->caps.num_amgms - 1, 0, 0);
 	if (err)
 		return err;
 
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 5337e3ac3e7..fa431fad0ee 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -111,6 +111,7 @@ struct mlx4_bitmap {
 	u32			last;
 	u32			top;
 	u32			max;
+	u32                     reserved_top;
 	u32			mask;
 	spinlock_t		lock;
 	unsigned long	       *table;
@@ -251,6 +252,38 @@ struct mlx4_catas_err {
 	struct list_head	list;
 };
 
+#define MLX4_MAX_MAC_NUM	128
+#define MLX4_MAC_TABLE_SIZE	(MLX4_MAX_MAC_NUM << 3)
+
+struct mlx4_mac_table {
+	__be64			entries[MLX4_MAX_MAC_NUM];
+	int			refs[MLX4_MAX_MAC_NUM];
+	struct mutex		mutex;
+	int			total;
+	int			max;
+};
+
+#define MLX4_MAX_VLAN_NUM	128
+#define MLX4_VLAN_TABLE_SIZE	(MLX4_MAX_VLAN_NUM << 2)
+
+struct mlx4_vlan_table {
+	__be32			entries[MLX4_MAX_VLAN_NUM];
+	int			refs[MLX4_MAX_VLAN_NUM];
+	struct mutex		mutex;
+	int			total;
+	int			max;
+};
+
+struct mlx4_port_info {
+	struct mlx4_dev	       *dev;
+	int			port;
+	char			dev_name[16];
+	struct device_attribute port_attr;
+	enum mlx4_port_type	tmp_type;
+	struct mlx4_mac_table	mac_table;
+	struct mlx4_vlan_table	vlan_table;
+};
+
 struct mlx4_priv {
 	struct mlx4_dev		dev;
 
@@ -279,6 +312,8 @@ struct mlx4_priv {
 
 	struct mlx4_uar		driver_uar;
 	void __iomem	       *kar;
+	struct mlx4_port_info	port[MLX4_MAX_PORTS + 1];
+	struct mutex		port_mutex;
 };
 
 static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
@@ -288,7 +323,10 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
 
 u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
 void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
-int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved);
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align);
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt);
+int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
+		     u32 reserved_bot, u32 resetrved_top);
 void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
 
 int mlx4_reset(struct mlx4_dev *dev);
@@ -346,4 +384,9 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 
 void mlx4_handle_catas_err(struct mlx4_dev *dev);
 
+void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
+void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
+
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
+
 #endif /* MLX4_H */
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
new file mode 100644
index 00000000000..11fb17c6e97
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -0,0 +1,561 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _MLX4_EN_H_
+#define _MLX4_EN_H_
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/inet_lro.h>
+
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/srq.h>
+#include <linux/mlx4/doorbell.h>
+
+#include "en_port.h"
+
+#define DRV_NAME	"mlx4_en"
+#define DRV_VERSION	"1.4.0"
+#define DRV_RELDATE	"Sep 2008"
+
+
+#define MLX4_EN_MSG_LEVEL	(NETIF_MSG_LINK | NETIF_MSG_IFDOWN)
+
+#define mlx4_dbg(mlevel, priv, format, arg...)	\
+	if (NETIF_MSG_##mlevel & priv->msg_enable) \
+	printk(KERN_DEBUG "%s %s: " format , DRV_NAME ,\
+		(&priv->mdev->pdev->dev)->bus_id , ## arg)
+
+#define mlx4_err(mdev, format, arg...) \
+	printk(KERN_ERR "%s %s: " format , DRV_NAME ,\
+		(&mdev->pdev->dev)->bus_id , ## arg)
+#define mlx4_info(mdev, format, arg...) \
+	printk(KERN_INFO "%s %s: " format , DRV_NAME ,\
+		(&mdev->pdev->dev)->bus_id , ## arg)
+#define mlx4_warn(mdev, format, arg...) \
+	printk(KERN_WARNING "%s %s: " format , DRV_NAME ,\
+		(&mdev->pdev->dev)->bus_id , ## arg)
+
+/*
+ * Device constants
+ */
+
+
+#define MLX4_EN_PAGE_SHIFT	12
+#define MLX4_EN_PAGE_SIZE	(1 << MLX4_EN_PAGE_SHIFT)
+#define MAX_TX_RINGS		16
+#define MAX_RX_RINGS		16
+#define MAX_RSS_MAP_SIZE	64
+#define RSS_FACTOR		2
+#define TXBB_SIZE		64
+#define HEADROOM		(2048 / TXBB_SIZE + 1)
+#define MAX_LSO_HDR_SIZE	92
+#define STAMP_STRIDE		64
+#define STAMP_DWORDS		(STAMP_STRIDE / 4)
+#define STAMP_SHIFT		31
+#define STAMP_VAL		0x7fffffff
+#define STATS_DELAY		(HZ / 4)
+
+/* Typical TSO descriptor with 16 gather entries is 352 bytes... */
+#define MAX_DESC_SIZE		512
+#define MAX_DESC_TXBBS		(MAX_DESC_SIZE / TXBB_SIZE)
+
+/*
+ * OS related constants and tunables
+ */
+
+#define MLX4_EN_WATCHDOG_TIMEOUT	(15 * HZ)
+
+#define MLX4_EN_ALLOC_ORDER	2
+#define MLX4_EN_ALLOC_SIZE	(PAGE_SIZE << MLX4_EN_ALLOC_ORDER)
+
+#define MLX4_EN_MAX_LRO_DESCRIPTORS	32
+
+/* Receive fragment sizes; we use at most 4 fragments (for 9600 byte MTU
+ * and 4K allocations) */
+enum {
+	FRAG_SZ0 = 512 - NET_IP_ALIGN,
+	FRAG_SZ1 = 1024,
+	FRAG_SZ2 = 4096,
+	FRAG_SZ3 = MLX4_EN_ALLOC_SIZE
+};
+#define MLX4_EN_MAX_RX_FRAGS	4
+
+/* Minimum ring size for our page-allocation sceme to work */
+#define MLX4_EN_MIN_RX_SIZE	(MLX4_EN_ALLOC_SIZE / SMP_CACHE_BYTES)
+#define MLX4_EN_MIN_TX_SIZE	(4096 / TXBB_SIZE)
+
+#define MLX4_EN_TX_RING_NUM		9
+#define MLX4_EN_DEF_TX_RING_SIZE	1024
+#define MLX4_EN_DEF_RX_RING_SIZE  	1024
+
+/* Target number of bytes to coalesce with interrupt moderation */
+#define MLX4_EN_RX_COAL_TARGET	0x20000
+#define MLX4_EN_RX_COAL_TIME	0x10
+
+#define MLX4_EN_TX_COAL_PKTS	5
+#define MLX4_EN_TX_COAL_TIME	0x80
+
+#define MLX4_EN_RX_RATE_LOW		400000
+#define MLX4_EN_RX_COAL_TIME_LOW	0
+#define MLX4_EN_RX_RATE_HIGH		450000
+#define MLX4_EN_RX_COAL_TIME_HIGH	128
+#define MLX4_EN_RX_SIZE_THRESH		1024
+#define MLX4_EN_RX_RATE_THRESH		(1000000 / MLX4_EN_RX_COAL_TIME_HIGH)
+#define MLX4_EN_SAMPLE_INTERVAL		0
+
+#define MLX4_EN_AUTO_CONF	0xffff
+
+#define MLX4_EN_DEF_RX_PAUSE	1
+#define MLX4_EN_DEF_TX_PAUSE	1
+
+/* Interval between sucessive polls in the Tx routine when polling is used
+   instead of interrupts (in per-core Tx rings) - should be power of 2 */
+#define MLX4_EN_TX_POLL_MODER	16
+#define MLX4_EN_TX_POLL_TIMEOUT	(HZ / 4)
+
+#define ETH_LLC_SNAP_SIZE	8
+
+#define SMALL_PACKET_SIZE      (256 - NET_IP_ALIGN)
+#define HEADER_COPY_SIZE       (128 - NET_IP_ALIGN)
+
+#define MLX4_EN_MIN_MTU		46
+#define ETH_BCAST		0xffffffffffffULL
+
+#ifdef MLX4_EN_PERF_STAT
+/* Number of samples to 'average' */
+#define AVG_SIZE			128
+#define AVG_FACTOR			1024
+#define NUM_PERF_STATS			NUM_PERF_COUNTERS
+
+#define INC_PERF_COUNTER(cnt)		(++(cnt))
+#define ADD_PERF_COUNTER(cnt, add)	((cnt) += (add))
+#define AVG_PERF_COUNTER(cnt, sample) \
+	((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE)
+#define GET_PERF_COUNTER(cnt)		(cnt)
+#define GET_AVG_PERF_COUNTER(cnt)	((cnt) / AVG_FACTOR)
+
+#else
+
+#define NUM_PERF_STATS			0
+#define INC_PERF_COUNTER(cnt)		do {} while (0)
+#define ADD_PERF_COUNTER(cnt, add)	do {} while (0)
+#define AVG_PERF_COUNTER(cnt, sample)	do {} while (0)
+#define GET_PERF_COUNTER(cnt)		(0)
+#define GET_AVG_PERF_COUNTER(cnt)	(0)
+#endif /* MLX4_EN_PERF_STAT */
+
+/*
+ * Configurables
+ */
+
+enum cq_type {
+	RX = 0,
+	TX = 1,
+};
+
+
+/*
+ * Useful macros
+ */
+#define ROUNDUP_LOG2(x)		ilog2(roundup_pow_of_two(x))
+#define XNOR(x, y)		(!(x) == !(y))
+#define ILLEGAL_MAC(addr)	(addr == 0xffffffffffffULL || addr == 0x0)
+
+
+struct mlx4_en_tx_info {
+	struct sk_buff *skb;
+	u32 nr_txbb;
+	u8 linear;
+	u8 data_offset;
+};
+
+
+#define MLX4_EN_BIT_DESC_OWN	0x80000000
+#define CTRL_SIZE	sizeof(struct mlx4_wqe_ctrl_seg)
+#define MLX4_EN_MEMTYPE_PAD	0x100
+#define DS_SIZE		sizeof(struct mlx4_wqe_data_seg)
+
+
+struct mlx4_en_tx_desc {
+	struct mlx4_wqe_ctrl_seg ctrl;
+	union {
+		struct mlx4_wqe_data_seg data; /* at least one data segment */
+		struct mlx4_wqe_lso_seg lso;
+		struct mlx4_wqe_inline_seg inl;
+	};
+};
+
+#define MLX4_EN_USE_SRQ		0x01000000
+
+struct mlx4_en_rx_alloc {
+	struct page *page;
+	u16 offset;
+};
+
+struct mlx4_en_tx_ring {
+	struct mlx4_hwq_resources wqres;
+	u32 size ; /* number of TXBBs */
+	u32 size_mask;
+	u16 stride;
+	u16 cqn;	/* index of port CQ associated with this ring */
+	u32 prod;
+	u32 cons;
+	u32 buf_size;
+	u32 doorbell_qpn;
+	void *buf;
+	u16 poll_cnt;
+	int blocked;
+	struct mlx4_en_tx_info *tx_info;
+	u8 *bounce_buf;
+	u32 last_nr_txbb;
+	struct mlx4_qp qp;
+	struct mlx4_qp_context context;
+	int qpn;
+	enum mlx4_qp_state qp_state;
+	struct mlx4_srq dummy;
+	unsigned long bytes;
+	unsigned long packets;
+	spinlock_t comp_lock;
+};
+
+struct mlx4_en_rx_desc {
+	struct mlx4_wqe_srq_next_seg next;
+	/* actual number of entries depends on rx ring stride */
+	struct mlx4_wqe_data_seg data[0];
+};
+
+struct mlx4_en_rx_ring {
+	struct mlx4_srq srq;
+	struct mlx4_hwq_resources wqres;
+	struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
+	struct net_lro_mgr lro;
+	u32 size ;	/* number of Rx descs*/
+	u32 actual_size;
+	u32 size_mask;
+	u16 stride;
+	u16 log_stride;
+	u16 cqn;	/* index of port CQ associated with this ring */
+	u32 prod;
+	u32 cons;
+	u32 buf_size;
+	int need_refill;
+	int full;
+	void *buf;
+	void *rx_info;
+	unsigned long bytes;
+	unsigned long packets;
+};
+
+
+static inline int mlx4_en_can_lro(__be16 status)
+{
+	return (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4	|
+				     MLX4_CQE_STATUS_IPV4F	|
+				     MLX4_CQE_STATUS_IPV6	|
+				     MLX4_CQE_STATUS_IPV4OPT	|
+				     MLX4_CQE_STATUS_TCP	|
+				     MLX4_CQE_STATUS_UDP	|
+				     MLX4_CQE_STATUS_IPOK)) ==
+		cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
+			    MLX4_CQE_STATUS_IPOK |
+			    MLX4_CQE_STATUS_TCP);
+}
+
+struct mlx4_en_cq {
+	struct mlx4_cq          mcq;
+	struct mlx4_hwq_resources wqres;
+	int                     ring;
+	spinlock_t              lock;
+	struct net_device      *dev;
+	struct napi_struct	napi;
+	/* Per-core Tx cq processing support */
+	struct timer_list timer;
+	int size;
+	int buf_size;
+	unsigned vector;
+	enum cq_type is_tx;
+	u16 moder_time;
+	u16 moder_cnt;
+	int armed;
+	struct mlx4_cqe *buf;
+#define MLX4_EN_OPCODE_ERROR	0x1e
+};
+
+struct mlx4_en_port_profile {
+	u32 flags;
+	u32 tx_ring_num;
+	u32 rx_ring_num;
+	u32 tx_ring_size;
+	u32 rx_ring_size;
+};
+
+struct mlx4_en_profile {
+	int rss_xor;
+	int num_lro;
+	u8 rss_mask;
+	u32 active_ports;
+	u32 small_pkt_int;
+	int rx_moder_cnt;
+	int rx_moder_time;
+	int auto_moder;
+	u8 rx_pause;
+	u8 rx_ppp;
+	u8 tx_pause;
+	u8 tx_ppp;
+	u8 no_reset;
+	struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1];
+};
+
+struct mlx4_en_dev {
+	struct mlx4_dev         *dev;
+	struct pci_dev		*pdev;
+	struct mutex		state_lock;
+	struct net_device       *pndev[MLX4_MAX_PORTS + 1];
+	u32                     port_cnt;
+	bool			device_up;
+	struct mlx4_en_profile  profile;
+	u32			LSO_support;
+	struct workqueue_struct *workqueue;
+	struct device           *dma_device;
+	void __iomem            *uar_map;
+	struct mlx4_uar         priv_uar;
+	struct mlx4_mr		mr;
+	u32                     priv_pdn;
+	spinlock_t              uar_lock;
+};
+
+
+struct mlx4_en_rss_map {
+	int size;
+	int base_qpn;
+	u16 map[MAX_RSS_MAP_SIZE];
+	struct mlx4_qp qps[MAX_RSS_MAP_SIZE];
+	enum mlx4_qp_state state[MAX_RSS_MAP_SIZE];
+	struct mlx4_qp indir_qp;
+	enum mlx4_qp_state indir_state;
+};
+
+struct mlx4_en_rss_context {
+	__be32 base_qpn;
+	__be32 default_qpn;
+	u16 reserved;
+	u8 hash_fn;
+	u8 flags;
+	__be32 rss_key[10];
+};
+
+struct mlx4_en_pkt_stats {
+	unsigned long broadcast;
+	unsigned long rx_prio[8];
+	unsigned long tx_prio[8];
+#define NUM_PKT_STATS		17
+};
+
+struct mlx4_en_port_stats {
+	unsigned long lro_aggregated;
+	unsigned long lro_flushed;
+	unsigned long lro_no_desc;
+	unsigned long tso_packets;
+	unsigned long queue_stopped;
+	unsigned long wake_queue;
+	unsigned long tx_timeout;
+	unsigned long rx_alloc_failed;
+	unsigned long rx_chksum_good;
+	unsigned long rx_chksum_none;
+	unsigned long tx_chksum_offload;
+#define NUM_PORT_STATS		11
+};
+
+struct mlx4_en_perf_stats {
+	u32 tx_poll;
+	u64 tx_pktsz_avg;
+	u32 inflight_avg;
+	u16 tx_coal_avg;
+	u16 rx_coal_avg;
+	u32 napi_quota;
+#define NUM_PERF_COUNTERS		6
+};
+
+struct mlx4_en_frag_info {
+	u16 frag_size;
+	u16 frag_prefix_size;
+	u16 frag_stride;
+	u16 frag_align;
+	u16 last_offset;
+
+};
+
+struct mlx4_en_priv {
+	struct mlx4_en_dev *mdev;
+	struct mlx4_en_port_profile *prof;
+	struct net_device *dev;
+	struct vlan_group *vlgrp;
+	struct net_device_stats stats;
+	struct net_device_stats ret_stats;
+	spinlock_t stats_lock;
+
+	unsigned long last_moder_packets;
+	unsigned long last_moder_tx_packets;
+	unsigned long last_moder_bytes;
+	unsigned long last_moder_jiffies;
+	int last_moder_time;
+	u16 rx_usecs;
+	u16 rx_frames;
+	u16 tx_usecs;
+	u16 tx_frames;
+	u32 pkt_rate_low;
+	u16 rx_usecs_low;
+	u32 pkt_rate_high;
+	u16 rx_usecs_high;
+	u16 sample_interval;
+	u16 adaptive_rx_coal;
+	u32 msg_enable;
+
+	struct mlx4_hwq_resources res;
+	int link_state;
+	int last_link_state;
+	bool port_up;
+	int port;
+	int registered;
+	int allocated;
+	int stride;
+	int rx_csum;
+	u64 mac;
+	int mac_index;
+	unsigned max_mtu;
+	int base_qpn;
+
+	struct mlx4_en_rss_map rss_map;
+	u16 tx_prio_map[8];
+	u32 flags;
+#define MLX4_EN_FLAG_PROMISC	0x1
+	u32 tx_ring_num;
+	u32 rx_ring_num;
+	u32 rx_skb_size;
+	struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
+	u16 num_frags;
+	u16 log_rx_info;
+
+	struct mlx4_en_tx_ring tx_ring[MAX_TX_RINGS];
+	struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS];
+	struct mlx4_en_cq tx_cq[MAX_TX_RINGS];
+	struct mlx4_en_cq rx_cq[MAX_RX_RINGS];
+	struct work_struct mcast_task;
+	struct work_struct mac_task;
+	struct delayed_work refill_task;
+	struct work_struct watchdog_task;
+	struct work_struct linkstate_task;
+	struct delayed_work stats_task;
+	struct mlx4_en_perf_stats pstats;
+	struct mlx4_en_pkt_stats pkstats;
+	struct mlx4_en_port_stats port_stats;
+	struct dev_mc_list *mc_list;
+	struct mlx4_en_stat_out_mbox hw_stats;
+};
+
+
+void mlx4_en_destroy_netdev(struct net_device *dev);
+int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
+			struct mlx4_en_port_profile *prof);
+
+int mlx4_en_get_profile(struct mlx4_en_dev *mdev);
+
+int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
+		      int entries, int ring, enum cq_type mode);
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+
+void mlx4_en_poll_tx_cq(unsigned long data);
+void mlx4_en_tx_irq(struct mlx4_cq *mcq);
+int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
+
+int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring,
+			   u32 size, u16 stride);
+void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring);
+int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
+			     struct mlx4_en_tx_ring *ring,
+			     int cq, int srqn);
+void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
+				struct mlx4_en_tx_ring *ring);
+
+int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
+			   struct mlx4_en_rx_ring *ring,
+			   u32 size, u16 stride);
+void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
+			     struct mlx4_en_rx_ring *ring);
+int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv);
+void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
+				struct mlx4_en_rx_ring *ring);
+int mlx4_en_process_rx_cq(struct net_device *dev,
+			  struct mlx4_en_cq *cq,
+			  int budget);
+int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
+void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
+			     int is_tx, int rss, int qpn, int cqn, int srqn,
+			     struct mlx4_qp_context *context);
+int mlx4_en_map_buffer(struct mlx4_buf *buf);
+void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
+
+void mlx4_en_calc_rx_buf(struct net_device *dev);
+void mlx4_en_set_default_rss_map(struct mlx4_en_priv *priv,
+				 struct mlx4_en_rss_map *rss_map,
+				 int num_entries, int num_rings);
+void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num);
+int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
+void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
+int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
+void mlx4_en_rx_refill(struct work_struct *work);
+void mlx4_en_rx_irq(struct mlx4_cq *mcq);
+
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
+int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, struct vlan_group *grp);
+int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
+int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+			   u8 promisc);
+
+int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
+
+/*
+ * Globals
+ */
+extern const struct ethtool_ops mlx4_en_ethtool_ops;
+#endif
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index d1dd5b48dbd..0caf74cae8b 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -461,7 +461,7 @@ int mlx4_init_mr_table(struct mlx4_dev *dev)
 	int err;
 
 	err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts,
-			       ~0, dev->caps.reserved_mrws);
+			       ~0, dev->caps.reserved_mrws, 0);
 	if (err)
 		return err;
 
diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index aa616892d09..26d1a7a9e37 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c
@@ -62,7 +62,7 @@ int mlx4_init_pd_table(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
-				(1 << 24) - 1, dev->caps.reserved_pds);
+				(1 << 24) - 1, dev->caps.reserved_pds, 0);
 }
 
 void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
@@ -100,7 +100,7 @@ int mlx4_init_uar_table(struct mlx4_dev *dev)
 
 	return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
 				dev->caps.num_uars, dev->caps.num_uars - 1,
-				max(128, dev->caps.reserved_uars));
+				max(128, dev->caps.reserved_uars), 0);
 }
 
 void mlx4_cleanup_uar_table(struct mlx4_dev *dev)
diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c
new file mode 100644
index 00000000000..e2fdab42c4c
--- /dev/null
+++ b/drivers/net/mlx4/port.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4.h"
+
+#define MLX4_MAC_VALID		(1ull << 63)
+#define MLX4_MAC_MASK		0xffffffffffffULL
+
+#define MLX4_VLAN_VALID		(1u << 31)
+#define MLX4_VLAN_MASK		0xfff
+
+void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
+{
+	int i;
+
+	mutex_init(&table->mutex);
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		table->entries[i] = 0;
+		table->refs[i]	 = 0;
+	}
+	table->max   = 1 << dev->caps.log_num_macs;
+	table->total = 0;
+}
+
+void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
+{
+	int i;
+
+	mutex_init(&table->mutex);
+	for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+		table->entries[i] = 0;
+		table->refs[i]	 = 0;
+	}
+	table->max   = 1 << dev->caps.log_num_vlans;
+	table->total = 0;
+}
+
+static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
+				   __be64 *entries)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	u32 in_mod;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
+
+	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
+{
+	struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+	int i, err = 0;
+	int free = -1;
+
+	mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
+	mutex_lock(&table->mutex);
+	for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
+		if (free < 0 && !table->refs[i]) {
+			free = i;
+			continue;
+		}
+
+		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
+			/* MAC already registered, increase refernce count */
+			*index = i;
+			++table->refs[i];
+			goto out;
+		}
+	}
+	mlx4_dbg(dev, "Free MAC index is %d\n", free);
+
+	if (table->total == table->max) {
+		/* No free mac entries */
+		err = -ENOSPC;
+		goto out;
+	}
+
+	/* Register new MAC */
+	table->refs[free] = 1;
+	table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
+
+	err = mlx4_set_port_mac_table(dev, port, table->entries);
+	if (unlikely(err)) {
+		mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) mac);
+		table->refs[free] = 0;
+		table->entries[free] = 0;
+		goto out;
+	}
+
+	*index = free;
+	++table->total;
+out:
+	mutex_unlock(&table->mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_register_mac);
+
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index)
+{
+	struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+
+	mutex_lock(&table->mutex);
+	if (!table->refs[index]) {
+		mlx4_warn(dev, "No MAC entry for index %d\n", index);
+		goto out;
+	}
+	if (--table->refs[index]) {
+		mlx4_warn(dev, "Have more references for index %d,"
+			  "no need to modify MAC table\n", index);
+		goto out;
+	}
+	table->entries[index] = 0;
+	mlx4_set_port_mac_table(dev, port, table->entries);
+	--table->total;
+out:
+	mutex_unlock(&table->mutex);
+}
+EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
+
+static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
+				    __be32 *entries)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	u32 in_mod;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
+	in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	return err;
+}
+
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+{
+	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+	int i, err = 0;
+	int free = -1;
+
+	mutex_lock(&table->mutex);
+	for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) {
+		if (free < 0 && (table->refs[i] == 0)) {
+			free = i;
+			continue;
+		}
+
+		if (table->refs[i] &&
+		    (vlan == (MLX4_VLAN_MASK &
+			      be32_to_cpu(table->entries[i])))) {
+			/* Vlan already registered, increase refernce count */
+			*index = i;
+			++table->refs[i];
+			goto out;
+		}
+	}
+
+	if (table->total == table->max) {
+		/* No free vlan entries */
+		err = -ENOSPC;
+		goto out;
+	}
+
+	/* Register new MAC */
+	table->refs[free] = 1;
+	table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
+
+	err = mlx4_set_port_vlan_table(dev, port, table->entries);
+	if (unlikely(err)) {
+		mlx4_warn(dev, "Failed adding vlan: %u\n", vlan);
+		table->refs[free] = 0;
+		table->entries[free] = 0;
+		goto out;
+	}
+
+	*index = free;
+	++table->total;
+out:
+	mutex_unlock(&table->mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_register_vlan);
+
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+{
+	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+
+	if (index < MLX4_VLAN_REGULAR) {
+		mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
+		return;
+	}
+
+	mutex_lock(&table->mutex);
+	if (!table->refs[index]) {
+		mlx4_warn(dev, "No vlan entry for index %d\n", index);
+		goto out;
+	}
+	if (--table->refs[index]) {
+		mlx4_dbg(dev, "Have more references for index %d,"
+			 "no need to modify vlan table\n", index);
+		goto out;
+	}
+	table->entries[index] = 0;
+	mlx4_set_port_vlan_table(dev, port, table->entries);
+	--table->total;
+out:
+	mutex_unlock(&table->mutex);
+}
+EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
+
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int err;
+	u8 is_eth = dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memset(mailbox->buf, 0, 256);
+	if (is_eth) {
+		((u8 *) mailbox->buf)[3] = 6;
+		((__be16 *) mailbox->buf)[4] = cpu_to_be16(1 << 15);
+		((__be16 *) mailbox->buf)[6] = cpu_to_be16(1 << 15);
+	}
+	err = mlx4_cmd(dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c
index c49a86044bf..1c565ef8d17 100644
--- a/drivers/net/mlx4/qp.c
+++ b/drivers/net/mlx4/qp.c
@@ -147,19 +147,42 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_modify);
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp)
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_qp_table *qp_table = &priv->qp_table;
+	int qpn;
+
+	qpn = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align);
+	if (qpn == -1)
+		return -ENOMEM;
+
+	*base = qpn;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
+
+void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_qp_table *qp_table = &priv->qp_table;
+	if (base_qpn < dev->caps.sqp_start + 8)
+		return;
+
+	mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
+
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
 	int err;
 
-	if (sqpn)
-		qp->qpn = sqpn;
-	else {
-		qp->qpn = mlx4_bitmap_alloc(&qp_table->bitmap);
-		if (qp->qpn == -1)
-			return -ENOMEM;
-	}
+	if (!qpn)
+		return -EINVAL;
+
+	qp->qpn = qpn;
 
 	err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn);
 	if (err)
@@ -208,9 +231,6 @@ err_put_qp:
 	mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
 
 err_out:
-	if (!sqpn)
-		mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
-
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
@@ -239,9 +259,6 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
 	mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
 	mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
 	mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
-
-	if (qp->qpn >= dev->caps.sqp_start + 8)
-		mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_free);
 
@@ -255,6 +272,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 {
 	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
 	int err;
+	int reserved_from_top = 0;
 
 	spin_lock_init(&qp_table->lock);
 	INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
@@ -264,9 +282,40 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	 * block of special QPs must be aligned to a multiple of 8, so
 	 * round up.
 	 */
-	dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps, 8);
+	dev->caps.sqp_start =
+		ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
+
+	{
+		int sort[MLX4_NUM_QP_REGION];
+		int i, j, tmp;
+		int last_base = dev->caps.num_qps;
+
+		for (i = 1; i < MLX4_NUM_QP_REGION; ++i)
+			sort[i] = i;
+
+		for (i = MLX4_NUM_QP_REGION; i > 0; --i) {
+			for (j = 2; j < i; ++j) {
+				if (dev->caps.reserved_qps_cnt[sort[j]] >
+				    dev->caps.reserved_qps_cnt[sort[j - 1]]) {
+					tmp             = sort[j];
+					sort[j]         = sort[j - 1];
+					sort[j - 1]     = tmp;
+				}
+			}
+		}
+
+		for (i = 1; i < MLX4_NUM_QP_REGION; ++i) {
+			last_base -= dev->caps.reserved_qps_cnt[sort[i]];
+			dev->caps.reserved_qps_base[sort[i]] = last_base;
+			reserved_from_top +=
+				dev->caps.reserved_qps_cnt[sort[i]];
+		}
+
+	}
+
 	err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
-			       (1 << 24) - 1, dev->caps.sqp_start + 8);
+			       (1 << 23) - 1, dev->caps.sqp_start + 8,
+			       reserved_from_top);
 	if (err)
 		return err;
 
diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c
index 533eb6db24b..fe9f218691f 100644
--- a/drivers/net/mlx4/srq.c
+++ b/drivers/net/mlx4/srq.c
@@ -245,7 +245,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev)
 	INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC);
 
 	err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs,
-			       dev->caps.num_srqs - 1, dev->caps.reserved_srqs);
+			       dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0);
 	if (err)
 		return err;
 
diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index b39d1cc1ef0..a24bb68887a 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -1205,11 +1205,12 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 		devno = 0;
 
 	ndev->dma = -1;
-	ndev->irq = platform_get_irq(pdev, 0);
-	if (ndev->irq < 0) {
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
 		ret = -ENODEV;
 		goto out_release;
 	}
+	ndev->irq = ret;
 
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 38b90e7a7ed..7914867110e 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -168,7 +168,7 @@ static int get_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
 			netif_device_detach(pegasus->net);
 		if (netif_msg_drv(pegasus) && printk_ratelimit())
 			dev_err(&pegasus->intf->dev, "%s, status %d\n",
-					__FUNCTION__, ret);
+					__func__, ret);
 		goto out;
 	}
 
@@ -192,7 +192,7 @@ static int set_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
 	if (!buffer) {
 		if (netif_msg_drv(pegasus))
 			dev_warn(&pegasus->intf->dev, "out of memory in %s\n",
-					__FUNCTION__);
+					__func__);
 		return -ENOMEM;
 	}
 	memcpy(buffer, data, size);
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index f972fef87c9..ee51b6a5e60 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -318,7 +318,7 @@ sbni_pci_probe( struct net_device  *dev )
 				continue;
 		}
 
-		if( pci_irq_line <= 0  ||  pci_irq_line >= NR_IRQS )
+		if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
 			printk( KERN_WARNING "  WARNING: The PCI BIOS assigned "
 				"this PCI card to IRQ %d, which is unlikely "
 				"to work!.\n"
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b30e38f3a50..dcc1e9958d2 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -66,15 +66,8 @@
 #undef DEBUG_CCIO_RUN_SG
 
 #ifdef CONFIG_PROC_FS
-/*
- * CCIO_SEARCH_TIME can help measure how fast the bitmap search is.
- * impacts performance though - ditch it if you don't use it.
- */
-#define CCIO_SEARCH_TIME
-#undef CCIO_MAP_STATS
-#else
-#undef CCIO_SEARCH_TIME
-#undef CCIO_MAP_STATS
+/* depends on proc fs support. But costs CPU performance. */
+#undef CCIO_COLLECT_STATS
 #endif
 
 #include <linux/proc_fs.h>
@@ -239,12 +232,10 @@ struct ioc {
 	u32 res_size;		    	/* size of resource map in bytes */
 	spinlock_t res_lock;
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 #define CCIO_SEARCH_SAMPLE 0x100
 	unsigned long avg_search[CCIO_SEARCH_SAMPLE];
 	unsigned long avg_idx;		  /* current index into avg_search */
-#endif
-#ifdef CCIO_MAP_STATS
 	unsigned long used_pages;
 	unsigned long msingle_calls;
 	unsigned long msingle_pages;
@@ -351,7 +342,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
 	unsigned int pages_needed = size >> IOVP_SHIFT;
 	unsigned int res_idx;
 	unsigned long boundary_size;
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 	unsigned long cr_start = mfctl(16);
 #endif
 	
@@ -406,7 +397,7 @@ resource_found:
 	DBG_RES("%s() res_idx %d res_hint: %d\n",
 		__func__, res_idx, ioc->res_hint);
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 	{
 		unsigned long cr_end = mfctl(16);
 		unsigned long tmp = cr_end - cr_start;
@@ -416,7 +407,7 @@ resource_found:
 	ioc->avg_search[ioc->avg_idx++] = cr_start;
 	ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
 #endif
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->used_pages += pages_needed;
 #endif
 	/* 
@@ -452,7 +443,7 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
 	DBG_RES("%s():  res_idx: %d pages_mapped %d\n", 
 		__func__, res_idx, pages_mapped);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->used_pages -= pages_mapped;
 #endif
 
@@ -764,7 +755,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
 	size = ALIGN(size + offset, IOVP_SIZE);
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->msingle_calls++;
 	ioc->msingle_pages += size >> IOVP_SHIFT;
 #endif
@@ -828,7 +819,7 @@ ccio_unmap_single(struct device *dev, dma_addr_t iova, size_t size,
 
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->usingle_calls++;
 	ioc->usingle_pages += size >> IOVP_SHIFT;
 #endif
@@ -894,7 +885,7 @@ ccio_free_consistent(struct device *dev, size_t size, void *cpu_addr,
 */
 #define PIDE_FLAG 0x80000000UL
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 #define IOMMU_MAP_STATS
 #endif
 #include "iommu-helpers.h"
@@ -938,7 +929,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->msg_calls++;
 #endif
 
@@ -997,13 +988,13 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	DBG_RUN_SG("%s() START %d entries,  %08lx,%x\n",
 		__func__, nents, sg_virt_addr(sglist), sglist->length);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->usg_calls++;
 #endif
 
 	while(sg_dma_len(sglist) && nents--) {
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
 #endif
 		ccio_unmap_single(dev, sg_dma_address(sglist),
@@ -1048,7 +1039,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "IO PDIR size    : %d bytes (%d entries)\n",
 			       total_pages * 8, total_pages);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		len += seq_printf(m, "IO PDIR entries : %ld free  %ld used (%d%%)\n",
 				  total_pages - ioc->used_pages, ioc->used_pages,
 				  (int)(ioc->used_pages * 100 / total_pages));
@@ -1057,7 +1048,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n", 
 				  ioc->res_size, total_pages);
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 		min = max = ioc->avg_search[0];
 		for(j = 0; j < CCIO_SEARCH_SAMPLE; ++j) {
 			avg += ioc->avg_search[j];
@@ -1070,7 +1061,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "  Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
 				  min, avg, max);
 #endif
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		len += seq_printf(m, "pci_map_single(): %8ld calls  %8ld pages (avg %d/1000)\n",
 				  ioc->msingle_calls, ioc->msingle_pages,
 				  (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
@@ -1088,7 +1079,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "pci_unmap_sg()  : %8ld calls  %8ld pages (avg %d/1000)\n\n\n",
 				  ioc->usg_calls, ioc->usg_pages,
 				  (int)((ioc->usg_pages * 1000)/ioc->usg_calls));
-#endif	/* CCIO_MAP_STATS */
+#endif	/* CCIO_COLLECT_STATS */
 
 		ioc = ioc->next;
 	}
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index fd56128525d..3bc54b30c3a 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -298,7 +298,8 @@ struct pci_port_ops dino_port_ops = {
 
 static void dino_disable_irq(unsigned int irq)
 {
-	struct dino_device *dino_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct dino_device *dino_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 
 	DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, irq);
@@ -310,7 +311,8 @@ static void dino_disable_irq(unsigned int irq)
 
 static void dino_enable_irq(unsigned int irq)
 {
-	struct dino_device *dino_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct dino_device *dino_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 	u32 tmp;
 
diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 771cef59254..7891db50c48 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -346,10 +346,10 @@ static int __init eisa_probe(struct parisc_device *dev)
 	}
 	
 	/* Reserve IRQ2 */
-	irq_desc[2].action = &irq2_action;
+	irq_to_desc(2)->action = &irq2_action;
 	
 	for (i = 0; i < 16; i++) {
-		irq_desc[i].chip = &eisa_interrupt_type;
+		irq_to_desc(i)->chip = &eisa_interrupt_type;
 	}
 	
 	EISA_bus = 1;
diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c
index f7d088b897e..e76db9e4d50 100644
--- a/drivers/parisc/gsc.c
+++ b/drivers/parisc/gsc.c
@@ -108,7 +108,8 @@ int gsc_find_local_irq(unsigned int irq, int *global_irqs, int limit)
 
 static void gsc_asic_disable_irq(unsigned int irq)
 {
-	struct gsc_asic *irq_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct gsc_asic *irq_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
 	u32 imr;
 
@@ -123,7 +124,8 @@ static void gsc_asic_disable_irq(unsigned int irq)
 
 static void gsc_asic_enable_irq(unsigned int irq)
 {
-	struct gsc_asic *irq_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct gsc_asic *irq_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
 	u32 imr;
 
@@ -159,12 +161,14 @@ static struct hw_interrupt_type gsc_asic_interrupt_type = {
 int gsc_assign_irq(struct hw_interrupt_type *type, void *data)
 {
 	static int irq = GSC_IRQ_BASE;
+	struct irq_desc *desc;
 
 	if (irq > GSC_IRQ_MAX)
 		return NO_IRQ;
 
-	irq_desc[irq].chip = type;
-	irq_desc[irq].chip_data = data;
+	desc = irq_to_desc(irq);
+	desc->chip = type;
+	desc->chip_data = data;
 	return irq++;
 }
 
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 6fb3f7979f2..7beffcab274 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -619,7 +619,9 @@ iosapic_set_irt_data( struct vector_info *vi, u32 *dp0, u32 *dp1)
 
 static struct vector_info *iosapic_get_vector(unsigned int irq)
 {
-	return irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc->chip_data;
 }
 
 static void iosapic_disable_irq(unsigned int irq)
diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c
index 1e8d2d17f04..1e93c837514 100644
--- a/drivers/parisc/superio.c
+++ b/drivers/parisc/superio.c
@@ -363,7 +363,9 @@ int superio_fixup_irq(struct pci_dev *pcidev)
 #endif
 
 	for (i = 0; i < 16; i++) {
-		irq_desc[i].chip = &superio_interrupt_type;
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->chip = &superio_interrupt_type;
 	}
 
 	/*
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 529d9d7727b..999cc4088b5 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -151,6 +151,13 @@ void pci_bus_add_devices(struct pci_bus *bus)
 			if (retval)
 				dev_err(&dev->dev, "Error creating cpuaffinity"
 					" file, continuing...\n");
+
+			retval = device_create_file(&child_bus->dev,
+						&dev_attr_cpulistaffinity);
+			if (retval)
+				dev_err(&dev->dev,
+					"Error creating cpulistaffinity"
+					" file, continuing...\n");
 		}
 	}
 }
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index e842e756308..8b29c307f1a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -193,7 +193,7 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	static int include_all;
-	int ret;
+	int ret = 0;
 
 	drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
 
@@ -212,7 +212,7 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 		include_all = 1;
 	}
 
-	if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
+	if (ret) {
 		list_del(&dmaru->list);
 		kfree(dmaru);
 	}
@@ -289,6 +289,24 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 	}
 }
 
+/**
+ * dmar_table_detect - checks to see if the platform supports DMAR devices
+ */
+static int __init dmar_table_detect(void)
+{
+	acpi_status status = AE_OK;
+
+	/* if we could find DMAR table, then there are DMAR devices */
+	status = acpi_get_table(ACPI_SIG_DMAR, 0,
+				(struct acpi_table_header **)&dmar_tbl);
+
+	if (ACPI_SUCCESS(status) && !dmar_tbl) {
+		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+		status = AE_NOT_FOUND;
+	}
+
+	return (ACPI_SUCCESS(status) ? 1 : 0);
+}
 
 /**
  * parse_dmar_table - parses the DMA reporting table
@@ -300,6 +318,12 @@ parse_dmar_table(void)
 	struct acpi_dmar_header *entry_header;
 	int ret = 0;
 
+	/*
+	 * Do it again, earlier dmar_tbl mapping could be mapped with
+	 * fixed map.
+	 */
+	dmar_table_detect();
+
 	dmar = (struct acpi_table_dmar *)dmar_tbl;
 	if (!dmar)
 		return -ENODEV;
@@ -373,10 +397,10 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev)
 
 int __init dmar_dev_scope_init(void)
 {
-	struct dmar_drhd_unit *drhd;
+	struct dmar_drhd_unit *drhd, *drhd_n;
 	int ret = -ENODEV;
 
-	for_each_drhd_unit(drhd) {
+	list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
 		ret = dmar_parse_dev(drhd);
 		if (ret)
 			return ret;
@@ -384,8 +408,8 @@ int __init dmar_dev_scope_init(void)
 
 #ifdef CONFIG_DMAR
 	{
-		struct dmar_rmrr_unit *rmrr;
-		for_each_rmrr_units(rmrr) {
+		struct dmar_rmrr_unit *rmrr, *rmrr_n;
+		list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
 			ret = rmrr_parse_dev(rmrr);
 			if (ret)
 				return ret;
@@ -430,30 +454,11 @@ int __init dmar_table_init(void)
 	return 0;
 }
 
-/**
- * early_dmar_detect - checks to see if the platform supports DMAR devices
- */
-int __init early_dmar_detect(void)
-{
-	acpi_status status = AE_OK;
-
-	/* if we could find DMAR table, then there are DMAR devices */
-	status = acpi_get_table(ACPI_SIG_DMAR, 0,
-				(struct acpi_table_header **)&dmar_tbl);
-
-	if (ACPI_SUCCESS(status) && !dmar_tbl) {
-		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
-		status = AE_NOT_FOUND;
-	}
-
-	return (ACPI_SUCCESS(status) ? 1 : 0);
-}
-
 void __init detect_intel_iommu(void)
 {
 	int ret;
 
-	ret = early_dmar_detect();
+	ret = dmar_table_detect();
 
 #ifdef CONFIG_DMAR
 	{
@@ -479,14 +484,16 @@ void __init detect_intel_iommu(void)
 			       " x2apic support\n");
 
 			dmar_disabled = 1;
-			return;
+			goto end;
 		}
 
 		if (ret && !no_iommu && !iommu_detected && !swiotlb &&
 		    !dmar_disabled)
 			iommu_detected = 1;
 	}
+end:
 #endif
+	dmar_tbl = NULL;
 }
 
 
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 7d27631e6e6..8cfd1c4926c 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -123,10 +123,8 @@ static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc (void)
 static void __init print_bus_info (void)
 {
 	struct bus_info *ptr;
-	struct list_head *ptr1;
 	
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list);
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		debug ("%s - slot_min = %x\n", __func__, ptr->slot_min);
 		debug ("%s - slot_max = %x\n", __func__, ptr->slot_max);
 		debug ("%s - slot_count = %x\n", __func__, ptr->slot_count);
@@ -146,10 +144,8 @@ static void __init print_bus_info (void)
 static void print_lo_info (void)
 {
 	struct rio_detail *ptr;
-	struct list_head *ptr1;
 	debug ("print_lo_info ----\n");	
-	list_for_each (ptr1, &rio_lo_head) {
-		ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
+	list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -163,10 +159,8 @@ static void print_lo_info (void)
 static void print_vg_info (void)
 {
 	struct rio_detail *ptr;
-	struct list_head *ptr1;
 	debug ("%s ---\n", __func__);
-	list_for_each (ptr1, &rio_vg_head) {
-		ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
+	list_for_each_entry(ptr, &rio_vg_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -180,10 +174,8 @@ static void print_vg_info (void)
 static void __init print_ebda_pci_rsrc (void)
 {
 	struct ebda_pci_rsrc *ptr;
-	struct list_head *ptr1;
 
-	list_for_each (ptr1, &ibmphp_ebda_pci_rsrc_head) {
-		ptr = list_entry (ptr1, struct ebda_pci_rsrc, ebda_pci_rsrc_list);
+	list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
 		debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 
 			__func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr);
 	}
@@ -192,10 +184,8 @@ static void __init print_ebda_pci_rsrc (void)
 static void __init print_ibm_slot (void)
 {
 	struct slot *ptr;
-	struct list_head *ptr1;
 
-	list_for_each (ptr1, &ibmphp_slot_head) {
-		ptr = list_entry (ptr1, struct slot, ibm_slot_list);
+	list_for_each_entry(ptr, &ibmphp_slot_head, ibm_slot_list) {
 		debug ("%s - slot_number: %x\n", __func__, ptr->number);
 	}
 }
@@ -203,10 +193,8 @@ static void __init print_ibm_slot (void)
 static void __init print_opt_vg (void)
 {
 	struct opt_rio *ptr;
-	struct list_head *ptr1;
 	debug ("%s ---\n", __func__);
-	list_for_each (ptr1, &opt_vg_head) {
-		ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
+	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
 		debug ("%s - rio_type %x\n", __func__, ptr->rio_type);
 		debug ("%s - chassis_num: %x\n", __func__, ptr->chassis_num);
 		debug ("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num);
@@ -217,13 +205,9 @@ static void __init print_opt_vg (void)
 static void __init print_ebda_hpc (void)
 {
 	struct controller *hpc_ptr;
-	struct list_head *ptr1;
 	u16 index;
 
-	list_for_each (ptr1, &ebda_hpc_head) {
-
-		hpc_ptr = list_entry (ptr1, struct controller, ebda_hpc_list); 
-
+	list_for_each_entry(hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
 		for (index = 0; index < hpc_ptr->slot_count; index++) {
 			debug ("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num);
 			debug ("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num);
@@ -460,9 +444,7 @@ static int __init ebda_rio_table (void)
 static struct opt_rio *search_opt_vg (u8 chassis_num)
 {
 	struct opt_rio *ptr;
-	struct list_head *ptr1;
-	list_for_each (ptr1, &opt_vg_head) {
-		ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
+	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -473,10 +455,8 @@ static int __init combine_wpg_for_chassis (void)
 {
 	struct opt_rio *opt_rio_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
-	struct list_head *list_head_ptr = NULL;
 	
-	list_for_each (list_head_ptr, &rio_vg_head) {
-		rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
+	list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) {
 		opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num);
 		if (!opt_rio_ptr) {
 			opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL);
@@ -497,14 +477,12 @@ static int __init combine_wpg_for_chassis (void)
 }	
 
 /*
- * reorgnizing linked list of expansion box	 
+ * reorganizing linked list of expansion box
  */
 static struct opt_rio_lo *search_opt_lo (u8 chassis_num)
 {
 	struct opt_rio_lo *ptr;
-	struct list_head *ptr1;
-	list_for_each (ptr1, &opt_lo_head) {
-		ptr = list_entry (ptr1, struct opt_rio_lo, opt_rio_lo_list);
+	list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -515,10 +493,8 @@ static int combine_wpg_for_expansion (void)
 {
 	struct opt_rio_lo *opt_rio_lo_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
-	struct list_head *list_head_ptr = NULL;
 	
-	list_for_each (list_head_ptr, &rio_lo_head) {
-		rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
+	list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) {
 		opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num);
 		if (!opt_rio_lo_ptr) {
 			opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL);
@@ -550,20 +526,17 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 {
 	struct opt_rio *opt_vg_ptr = NULL;
 	struct opt_rio_lo *opt_lo_ptr = NULL;
-	struct list_head *ptr = NULL;
 	int rc = 0;
 
 	if (!var) {
-		list_for_each (ptr, &opt_vg_head) {
-			opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
+		list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 			if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { 
 				rc = -ENODEV;
 				break;
 			}
 		}
 	} else {
-		list_for_each (ptr, &opt_lo_head) {
-			opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
+		list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 			if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) {
 				rc = -ENODEV;
 				break;
@@ -576,10 +549,8 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 {
 	struct opt_rio_lo *opt_lo_ptr;
-	struct list_head *ptr;
 
-	list_for_each (ptr, &opt_lo_head) {
-		opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
+	list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 		//check to see if this slot_num belongs to expansion box
 		if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) 
 			return opt_lo_ptr;
@@ -590,10 +561,8 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 static struct opt_rio * find_chassis_num (u8 slot_num)
 {
 	struct opt_rio *opt_vg_ptr;
-	struct list_head *ptr;
 
-	list_for_each (ptr, &opt_vg_head) {
-		opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
+	list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 		//check to see if this slot_num belongs to chassis 
 		if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) 
 			return opt_vg_ptr;
@@ -607,11 +576,9 @@ static struct opt_rio * find_chassis_num (u8 slot_num)
 static u8 calculate_first_slot (u8 slot_num)
 {
 	u8 first_slot = 1;
-	struct list_head * list;
 	struct slot * slot_cur;
 	
-	list_for_each (list, &ibmphp_slot_head) {
-		slot_cur = list_entry (list, struct slot, ibm_slot_list);
+	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot_cur->ctrl) {
 			if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) 
 				first_slot = slot_cur->ctrl->ending_slot_num;
@@ -767,7 +734,6 @@ static int __init ebda_rsrc_controller (void)
 	struct bus_info *bus_info_ptr1, *bus_info_ptr2;
 	int rc;
 	struct slot *tmp_slot;
-	struct list_head *list;
 
 	addr = hpc_list_ptr->phys_addr;
 	for (ctlr = 0; ctlr < hpc_list_ptr->num_ctlrs; ctlr++) {
@@ -997,9 +963,7 @@ static int __init ebda_rsrc_controller (void)
 
 	}			/* each hpc  */
 
-	list_for_each (list, &ibmphp_slot_head) {
-		tmp_slot = list_entry (list, struct slot, ibm_slot_list);
-
+	list_for_each_entry(tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
 		snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
 		pci_hp_register(tmp_slot->hotplug_slot,
 			pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
@@ -1101,10 +1065,8 @@ u16 ibmphp_get_total_controllers (void)
 struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 {
 	struct slot *slot;
-	struct list_head *list;
 
-	list_for_each (list, &ibmphp_slot_head) {
-		slot = list_entry (list, struct slot, ibm_slot_list);
+	list_for_each_entry(slot, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot->number == physical_num)
 			return slot;
 	}
@@ -1120,10 +1082,8 @@ struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 struct bus_info *ibmphp_find_same_bus_num (u32 num)
 {
 	struct bus_info *ptr;
-	struct list_head  *ptr1;
 
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list); 
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num) 
 			 return ptr;
 	}
@@ -1136,10 +1096,8 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num)
 int ibmphp_get_bus_index (u8 num)
 {
 	struct bus_info *ptr;
-	struct list_head  *ptr1;
 
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list);
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num)  
 			return ptr->index;
 	}
@@ -1212,11 +1170,9 @@ static struct pci_driver ibmphp_driver = {
 int ibmphp_register_pci (void)
 {
 	struct controller *ctrl;
-	struct list_head *tmp;
 	int rc = 0;
 
-	list_for_each (tmp, &ebda_hpc_head) {
-		ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
+	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			rc = pci_register_driver(&ibmphp_driver);
 			break;
@@ -1227,12 +1183,10 @@ int ibmphp_register_pci (void)
 static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
 {
 	struct controller *ctrl;
-	struct list_head *tmp;
 
 	debug ("inside ibmphp_probe\n");
 	
-	list_for_each (tmp, &ebda_hpc_head) {
-		ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
+	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
 				ctrl->ctrl_dev = dev;
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 5f85b1b120e..2e6c4474644 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -102,13 +102,13 @@ static int get_##name (struct hotplug_slot *slot, type *value)		\
 {									\
 	struct hotplug_slot_ops *ops = slot->ops;			\
 	int retval = 0;							\
-	if (try_module_get(ops->owner)) {				\
-		if (ops->get_##name)					\
-			retval = ops->get_##name(slot, value);		\
-		else							\
-			*value = slot->info->name;			\
-		module_put(ops->owner);					\
-	}								\
+	if (!try_module_get(ops->owner))				\
+		return -ENODEV;						\
+	if (ops->get_##name)						\
+		retval = ops->get_##name(slot, value);			\
+	else								\
+		*value = slot->info->name;				\
+	module_put(ops->owner);						\
 	return retval;							\
 }
 
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 9e6cec67e1c..c367978bd7f 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -57,6 +57,19 @@ extern struct workqueue_struct *pciehp_wq;
 #define warn(format, arg...)						\
 	printk(KERN_WARNING "%s: " format, MY_NAME , ## arg)
 
+#define ctrl_dbg(ctrl, format, arg...)					\
+	do {								\
+		if (pciehp_debug)					\
+			dev_printk(, &ctrl->pcie->device,		\
+					format, ## arg);		\
+	} while (0)
+#define ctrl_err(ctrl, format, arg...)					\
+	dev_err(&ctrl->pcie->device, format, ## arg)
+#define ctrl_info(ctrl, format, arg...)					\
+	dev_info(&ctrl->pcie->device, format, ## arg)
+#define ctrl_warn(ctrl, format, arg...)					\
+	dev_warn(&ctrl->pcie->device, format, ## arg)
+
 #define SLOT_NAME_SIZE 10
 struct slot {
 	u8 bus;
@@ -87,6 +100,7 @@ struct controller {
 	int num_slots;			/* Number of slots on ctlr */
 	int slot_num_inc;		/* 1 or -1 */
 	struct pci_dev *pci_dev;
+	struct pcie_device *pcie;	/* PCI Express port service */
 	struct list_head slot_list;
 	struct hpc_ops *hpc_ops;
 	wait_queue_head_t queue;	/* sleep & wake process */
@@ -170,7 +184,7 @@ static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
 			return slot;
 	}
 
-	err("%s: slot (device=0x%x) not found\n", __func__, device);
+	ctrl_err(ctrl, "%s: slot (device=0x%x) not found\n", __func__, device);
 	return NULL;
 }
 
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 4fd5355bc3b..c748a19db89 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -144,9 +144,10 @@ set_lock_exit:
  * sysfs interface which allows the user to toggle the Electro Mechanical
  * Interlock.  Valid values are either 0 or 1.  0 == unlock, 1 == lock
  */
-static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
-		size_t count)
+static ssize_t lock_write_file(struct hotplug_slot *hotplug_slot,
+		const char *buf, size_t count)
 {
+	struct slot *slot = hotplug_slot->private;
 	unsigned long llock;
 	u8 lock;
 	int retval = 0;
@@ -157,10 +158,11 @@ static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
 	switch (lock) {
 		case 0:
 		case 1:
-			retval = set_lock_status(slot, lock);
+			retval = set_lock_status(hotplug_slot, lock);
 			break;
 		default:
-			err ("%d is an invalid lock value\n", lock);
+			ctrl_err(slot->ctrl, "%d is an invalid lock value\n",
+				 lock);
 			retval = -EINVAL;
 	}
 	if (retval)
@@ -180,7 +182,10 @@ static struct hotplug_slot_attribute hotplug_slot_attr_lock = {
  */
 static void release_slot(struct hotplug_slot *hotplug_slot)
 {
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	struct slot *slot = hotplug_slot->private;
+
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	kfree(hotplug_slot->info);
 	kfree(hotplug_slot);
@@ -215,9 +220,9 @@ static int init_slots(struct controller *ctrl)
 		get_adapter_status(hotplug_slot, &info->adapter_status);
 		slot->hotplug_slot = hotplug_slot;
 
-		dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x "
-		    "slot_device_offset=%x\n", slot->bus, slot->device,
-		    slot->hp_slot, slot->number, ctrl->slot_device_offset);
+		ctrl_dbg(ctrl, "Registering bus=%x dev=%x hp_slot=%x sun=%x "
+			 "slot_device_offset=%x\n", slot->bus, slot->device,
+			 slot->hp_slot, slot->number, ctrl->slot_device_offset);
 duplicate_name:
 		retval = pci_hp_register(hotplug_slot,
 					 ctrl->pci_dev->subordinate,
@@ -233,9 +238,11 @@ duplicate_name:
 				if (len < SLOT_NAME_SIZE)
 					goto duplicate_name;
 				else
-					err("duplicate slot name overflow\n");
+					ctrl_err(ctrl, "duplicate slot name "
+						 "overflow\n");
 			}
-			err("pci_hp_register failed with error %d\n", retval);
+			ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
+				 retval);
 			goto error_info;
 		}
 		/* create additional sysfs entries */
@@ -244,7 +251,8 @@ duplicate_name:
 				&hotplug_slot_attr_lock.attr);
 			if (retval) {
 				pci_hp_deregister(hotplug_slot);
-				err("cannot create additional sysfs entries\n");
+				ctrl_err(ctrl, "cannot create additional sysfs "
+					 "entries\n");
 				goto error_info;
 			}
 		}
@@ -278,7 +286,8 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	hotplug_slot->info->attention_status = status;
 
@@ -293,7 +302,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	return pciehp_sysfs_enable_slot(slot);
 }
@@ -303,7 +313,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	return pciehp_sysfs_disable_slot(slot);
 }
@@ -313,7 +324,8 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_power_status(slot, value);
 	if (retval < 0)
@@ -327,7 +339,8 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_attention_status(slot, value);
 	if (retval < 0)
@@ -341,7 +354,8 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_latch_status(slot, value);
 	if (retval < 0)
@@ -355,7 +369,8 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_adapter_status(slot, value);
 	if (retval < 0)
@@ -370,7 +385,8 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_max_bus_speed(slot, value);
 	if (retval < 0)
@@ -384,7 +400,8 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
 	if (retval < 0)
@@ -402,14 +419,15 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
 	struct pci_dev *pdev = dev->port;
 
 	if (pciehp_force)
-		dbg("Bypassing BIOS check for pciehp use on %s\n",
-		    pci_name(pdev));
+		dev_info(&dev->device,
+			 "Bypassing BIOS check for pciehp use on %s\n",
+			 pci_name(pdev));
 	else if (pciehp_get_hp_hw_control_from_firmware(pdev))
 		goto err_out_none;
 
 	ctrl = pcie_init(dev);
 	if (!ctrl) {
-		dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME);
+		dev_err(&dev->device, "controller initialization failed\n");
 		goto err_out_none;
 	}
 	set_service_data(dev, ctrl);
@@ -418,11 +436,10 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
 	rc = init_slots(ctrl);
 	if (rc) {
 		if (rc == -EBUSY)
-			warn("%s: slot already registered by another "
-				"hotplug driver\n", PCIE_MODULE_NAME);
+			ctrl_warn(ctrl, "slot already registered by another "
+				  "hotplug driver\n");
 		else
-			err("%s: slot initialization failed\n",
-				PCIE_MODULE_NAME);
+			ctrl_err(ctrl, "slot initialization failed\n");
 		goto err_out_release_ctlr;
 	}
 
@@ -461,13 +478,13 @@ static void pciehp_remove (struct pcie_device *dev)
 #ifdef CONFIG_PM
 static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
 {
-	printk("%s ENTRY\n", __func__);
+	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	return 0;
 }
 
 static int pciehp_resume (struct pcie_device *dev)
 {
-	printk("%s ENTRY\n", __func__);
+	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	if (pciehp_force) {
 		struct controller *ctrl = get_service_data(dev);
 		struct slot *t_slot;
@@ -497,10 +514,9 @@ static struct pcie_port_service_id port_pci_ids[] = { {
 	.driver_data =	0,
 	}, { /* end: all zeroes */ }
 };
-static const char device_name[] = "hpdriver";
 
 static struct pcie_port_service_driver hpdriver_portdrv = {
-	.name		= (char *)device_name,
+	.name		= PCIE_MODULE_NAME,
 	.id_table	= &port_pci_ids[0],
 
 	.probe		= pciehp_probe,
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 96a5d55a498..acb7f9efd18 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -58,14 +58,15 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
 u8 pciehp_handle_attention_button(struct slot *p_slot)
 {
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Attention Button Change */
-	dbg("pciehp:  Attention button interrupt received.\n");
+	ctrl_dbg(ctrl, "Attention button interrupt received.\n");
 
 	/*
 	 *  Button pressed - See if need to TAKE ACTION!!!
 	 */
-	info("Button pressed on Slot(%s)\n", p_slot->name);
+	ctrl_info(ctrl, "Button pressed on Slot(%s)\n", p_slot->name);
 	event_type = INT_BUTTON_PRESS;
 
 	queue_interrupt_event(p_slot, event_type);
@@ -77,22 +78,23 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
 {
 	u8 getstatus;
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Switch Change */
-	dbg("pciehp:  Switch interrupt received.\n");
+	ctrl_dbg(ctrl, "Switch interrupt received.\n");
 
 	p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 	if (getstatus) {
 		/*
 		 * Switch opened
 		 */
-		info("Latch open on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Latch open on Slot(%s)\n", p_slot->name);
 		event_type = INT_SWITCH_OPEN;
 	} else {
 		/*
 		 *  Switch closed
 		 */
-		info("Latch close on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Latch close on Slot(%s)\n", p_slot->name);
 		event_type = INT_SWITCH_CLOSE;
 	}
 
@@ -105,9 +107,10 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 {
 	u32 event_type;
 	u8 presence_save;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Presence Change */
-	dbg("pciehp:  Presence/Notify input change.\n");
+	ctrl_dbg(ctrl, "Presence/Notify input change.\n");
 
 	/* Switch is open, assume a presence change
 	 * Save the presence state
@@ -117,13 +120,13 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 		/*
 		 * Card Present
 		 */
-		info("Card present on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Card present on Slot(%s)\n", p_slot->name);
 		event_type = INT_PRESENCE_ON;
 	} else {
 		/*
 		 * Not Present
 		 */
-		info("Card not present on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Card not present on Slot(%s)\n", p_slot->name);
 		event_type = INT_PRESENCE_OFF;
 	}
 
@@ -135,23 +138,25 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 u8 pciehp_handle_power_fault(struct slot *p_slot)
 {
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* power fault */
-	dbg("pciehp:  Power fault interrupt received.\n");
+	ctrl_dbg(ctrl, "Power fault interrupt received.\n");
 
 	if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
 		/*
 		 * power fault Cleared
 		 */
-		info("Power fault cleared on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Power fault cleared on Slot(%s)\n",
+			  p_slot->name);
 		event_type = INT_POWER_FAULT_CLEAR;
 	} else {
 		/*
 		 *   power fault
 		 */
-		info("Power fault on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Power fault on Slot(%s)\n", p_slot->name);
 		event_type = INT_POWER_FAULT;
-		info("power fault bit %x set\n", 0);
+		ctrl_info(ctrl, "power fault bit %x set\n", 0);
 	}
 
 	queue_interrupt_event(p_slot, event_type);
@@ -168,8 +173,9 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
 	/* turn off slot, turn on Amber LED, turn off Green LED if supported*/
 	if (POWER_CTRL(ctrl)) {
 		if (pslot->hpc_ops->power_off_slot(pslot)) {
-			err("%s: Issue of Slot Power Off command failed\n",
-			    __func__);
+			ctrl_err(ctrl,
+				 "%s: Issue of Slot Power Off command failed\n",
+				 __func__);
 			return;
 		}
 	}
@@ -186,8 +192,8 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
 
 	if (ATTN_LED(ctrl)) {
 		if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
-			err("%s: Issue of Set Attention Led command failed\n",
-			    __func__);
+			ctrl_err(ctrl, "%s: Issue of Set Attention "
+				 "Led command failed\n", __func__);
 			return;
 		}
 	}
@@ -205,9 +211,9 @@ static int board_added(struct slot *p_slot)
 	int retval = 0;
 	struct controller *ctrl = p_slot->ctrl;
 
-	dbg("%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
-			__func__, p_slot->device,
-			ctrl->slot_device_offset, p_slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
+		 __func__, p_slot->device, ctrl->slot_device_offset,
+		 p_slot->hp_slot);
 
 	if (POWER_CTRL(ctrl)) {
 		/* Power on slot */
@@ -225,22 +231,22 @@ static int board_added(struct slot *p_slot)
 	/* Check link training status */
 	retval = p_slot->hpc_ops->check_lnk_status(ctrl);
 	if (retval) {
-		err("%s: Failed to check link status\n", __func__);
+		ctrl_err(ctrl, "%s: Failed to check link status\n", __func__);
 		set_slot_off(ctrl, p_slot);
 		return retval;
 	}
 
 	/* Check for a power fault */
 	if (p_slot->hpc_ops->query_power_fault(p_slot)) {
-		dbg("%s: power fault detected\n", __func__);
+		ctrl_dbg(ctrl, "%s: power fault detected\n", __func__);
 		retval = POWER_FAILURE;
 		goto err_exit;
 	}
 
 	retval = pciehp_configure_device(p_slot);
 	if (retval) {
-		err("Cannot add device 0x%x:%x\n", p_slot->bus,
-		    p_slot->device);
+		ctrl_err(ctrl, "Cannot add device 0x%x:%x\n",
+			 p_slot->bus, p_slot->device);
 		goto err_exit;
 	}
 
@@ -272,14 +278,14 @@ static int remove_board(struct slot *p_slot)
 	if (retval)
 		return retval;
 
-	dbg("In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
+	ctrl_dbg(ctrl, "In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
 
 	if (POWER_CTRL(ctrl)) {
 		/* power off slot */
 		retval = p_slot->hpc_ops->power_off_slot(p_slot);
 		if (retval) {
-			err("%s: Issue of Slot Disable command failed\n",
-			    __func__);
+			ctrl_err(ctrl, "%s: Issue of Slot Disable command "
+				 "failed\n", __func__);
 			return retval;
 		}
 	}
@@ -320,8 +326,8 @@ static void pciehp_power_thread(struct work_struct *work)
 	switch (p_slot->state) {
 	case POWEROFF_STATE:
 		mutex_unlock(&p_slot->lock);
-		dbg("%s: disabling bus:device(%x:%x)\n",
-		    __func__, p_slot->bus, p_slot->device);
+		ctrl_dbg(p_slot->ctrl, "%s: disabling bus:device(%x:%x)\n",
+			 __func__, p_slot->bus, p_slot->device);
 		pciehp_disable_slot(p_slot);
 		mutex_lock(&p_slot->lock);
 		p_slot->state = STATIC_STATE;
@@ -349,7 +355,8 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
-		err("%s: Cannot allocate memory\n", __func__);
+		ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+			 __func__);
 		return;
 	}
 	info->p_slot = p_slot;
@@ -403,12 +410,14 @@ static void handle_button_press_event(struct slot *p_slot)
 		p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (getstatus) {
 			p_slot->state = BLINKINGOFF_STATE;
-			info("PCI slot #%s - powering off due to button "
-			     "press.\n", p_slot->name);
+			ctrl_info(ctrl,
+				  "PCI slot #%s - powering off due to button "
+				  "press.\n", p_slot->name);
 		} else {
 			p_slot->state = BLINKINGON_STATE;
-			info("PCI slot #%s - powering on due to button "
-			     "press.\n", p_slot->name);
+			ctrl_info(ctrl,
+				  "PCI slot #%s - powering on due to button "
+				  "press.\n", p_slot->name);
 		}
 		/* blink green LED and turn off amber */
 		if (PWR_LED(ctrl))
@@ -425,8 +434,8 @@ static void handle_button_press_event(struct slot *p_slot)
 		 * press the attention again before the 5 sec. limit
 		 * expires to cancel hot-add or hot-remove
 		 */
-		info("Button cancel on Slot(%s)\n", p_slot->name);
-		dbg("%s: button cancel\n", __func__);
+		ctrl_info(ctrl, "Button cancel on Slot(%s)\n", p_slot->name);
+		ctrl_dbg(ctrl, "%s: button cancel\n", __func__);
 		cancel_delayed_work(&p_slot->work);
 		if (p_slot->state == BLINKINGOFF_STATE) {
 			if (PWR_LED(ctrl))
@@ -437,8 +446,8 @@ static void handle_button_press_event(struct slot *p_slot)
 		}
 		if (ATTN_LED(ctrl))
 			p_slot->hpc_ops->set_attention_status(p_slot, 0);
-		info("PCI slot #%s - action canceled due to button press\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "PCI slot #%s - action canceled "
+			  "due to button press\n", p_slot->name);
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWEROFF_STATE:
@@ -448,11 +457,11 @@ static void handle_button_press_event(struct slot *p_slot)
 		 * this means that the previous attention button action
 		 * to hot-add or hot-remove is undergoing
 		 */
-		info("Button ignore on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Button ignore on Slot(%s)\n", p_slot->name);
 		update_slot_info(p_slot);
 		break;
 	default:
-		warn("Not a valid state\n");
+		ctrl_warn(ctrl, "Not a valid state\n");
 		break;
 	}
 }
@@ -467,7 +476,8 @@ static void handle_surprise_event(struct slot *p_slot)
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
-		err("%s: Cannot allocate memory\n", __func__);
+		ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+			 __func__);
 		return;
 	}
 	info->p_slot = p_slot;
@@ -505,7 +515,7 @@ static void interrupt_event_handler(struct work_struct *work)
 	case INT_PRESENCE_OFF:
 		if (!HP_SUPR_RM(ctrl))
 			break;
-		dbg("Surprise Removal\n");
+		ctrl_dbg(ctrl, "Surprise Removal\n");
 		update_slot_info(p_slot);
 		handle_surprise_event(p_slot);
 		break;
@@ -522,22 +532,23 @@ int pciehp_enable_slot(struct slot *p_slot)
 {
 	u8 getstatus = 0;
 	int rc;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Check to see if (latch closed, card present, power off) */
 	mutex_lock(&p_slot->ctrl->crit_sect);
 
 	rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
 	if (rc || !getstatus) {
-		info("%s: no adapter on slot(%s)\n", __func__,
-		     p_slot->name);
+		ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
+			  __func__, p_slot->name);
 		mutex_unlock(&p_slot->ctrl->crit_sect);
 		return -ENODEV;
 	}
 	if (MRL_SENS(p_slot->ctrl)) {
 		rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 		if (rc || getstatus) {
-			info("%s: latch open on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -546,8 +557,8 @@ int pciehp_enable_slot(struct slot *p_slot)
 	if (POWER_CTRL(p_slot->ctrl)) {
 		rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (rc || getstatus) {
-			info("%s: already enabled on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: already enabled on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -EINVAL;
 		}
@@ -571,6 +582,7 @@ int pciehp_disable_slot(struct slot *p_slot)
 {
 	u8 getstatus = 0;
 	int ret = 0;
+	struct controller *ctrl = p_slot->ctrl;
 
 	if (!p_slot->ctrl)
 		return 1;
@@ -581,8 +593,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (!HP_SUPR_RM(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
 		if (ret || !getstatus) {
-			info("%s: no adapter on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -591,8 +603,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (MRL_SENS(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 		if (ret || getstatus) {
-			info("%s: latch open on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -601,8 +613,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (POWER_CTRL(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (ret || !getstatus) {
-			info("%s: already disabled slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: already disabled slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -EINVAL;
 		}
@@ -618,6 +630,7 @@ int pciehp_disable_slot(struct slot *p_slot)
 int pciehp_sysfs_enable_slot(struct slot *p_slot)
 {
 	int retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
 
 	mutex_lock(&p_slot->lock);
 	switch (p_slot->state) {
@@ -631,15 +644,15 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWERON_STATE:
-		info("Slot %s is already in powering on state\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "Slot %s is already in powering on state\n",
+			  p_slot->name);
 		break;
 	case BLINKINGOFF_STATE:
 	case POWEROFF_STATE:
-		info("Already enabled on slot %s\n", p_slot->name);
+		ctrl_info(ctrl, "Already enabled on slot %s\n", p_slot->name);
 		break;
 	default:
-		err("Not a valid state on slot %s\n", p_slot->name);
+		ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
 		break;
 	}
 	mutex_unlock(&p_slot->lock);
@@ -650,6 +663,7 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
 int pciehp_sysfs_disable_slot(struct slot *p_slot)
 {
 	int retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
 
 	mutex_lock(&p_slot->lock);
 	switch (p_slot->state) {
@@ -663,15 +677,15 @@ int pciehp_sysfs_disable_slot(struct slot *p_slot)
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWEROFF_STATE:
-		info("Slot %s is already in powering off state\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "Slot %s is already in powering off state\n",
+			  p_slot->name);
 		break;
 	case BLINKINGON_STATE:
 	case POWERON_STATE:
-		info("Already disabled on slot %s\n", p_slot->name);
+		ctrl_info(ctrl, "Already disabled on slot %s\n", p_slot->name);
 		break;
 	default:
-		err("Not a valid state on slot %s\n", p_slot->name);
+		ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
 		break;
 	}
 	mutex_unlock(&p_slot->lock);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 9d934ddee95..8e9530c4c36 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -223,7 +223,7 @@ static void start_int_poll_timer(struct controller *ctrl, int sec)
 
 static inline int pciehp_request_irq(struct controller *ctrl)
 {
-	int retval, irq = ctrl->pci_dev->irq;
+	int retval, irq = ctrl->pcie->irq;
 
 	/* Install interrupt polling timer. Start with 10 sec delay */
 	if (pciehp_poll_mode) {
@@ -235,7 +235,8 @@ static inline int pciehp_request_irq(struct controller *ctrl)
 	/* Installs the interrupt handler */
 	retval = request_irq(irq, pcie_isr, IRQF_SHARED, MY_NAME, ctrl);
 	if (retval)
-		err("Cannot get irq %d for the hotplug controller\n", irq);
+		ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
+			 irq);
 	return retval;
 }
 
@@ -244,7 +245,7 @@ static inline void pciehp_free_irq(struct controller *ctrl)
 	if (pciehp_poll_mode)
 		del_timer_sync(&ctrl->poll_timer);
 	else
-		free_irq(ctrl->pci_dev->irq, ctrl);
+		free_irq(ctrl->pcie->irq, ctrl);
 }
 
 static int pcie_poll_cmd(struct controller *ctrl)
@@ -282,7 +283,7 @@ static void pcie_wait_cmd(struct controller *ctrl, int poll)
 	else
 		rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
 	if (!rc)
-		dbg("Command not completed in 1000 msec\n");
+		ctrl_dbg(ctrl, "Command not completed in 1000 msec\n");
 }
 
 /**
@@ -301,7 +302,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		goto out;
 	}
 
@@ -312,26 +314,28 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 			 * proceed forward to issue the next command according
 			 * to spec. Just print out the error message.
 			 */
-			dbg("%s: CMD_COMPLETED not clear after 1 sec.\n",
-			    __func__);
+			ctrl_dbg(ctrl,
+				 "%s: CMD_COMPLETED not clear after 1 sec.\n",
+				 __func__);
 		} else if (!NO_CMD_CMPL(ctrl)) {
 			/*
 			 * This controller semms to notify of command completed
 			 * event even though it supports none of power
 			 * controller, attention led, power led and EMI.
 			 */
-			dbg("%s: Unexpected CMD_COMPLETED. Need to wait for "
-			    "command completed event.\n", __func__);
+			ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Need to "
+				 "wait for command completed event.\n",
+				 __func__);
 			ctrl->no_cmd_complete = 0;
 		} else {
-			dbg("%s: Unexpected CMD_COMPLETED. Maybe the "
-			    "controller is broken.\n", __func__);
+			ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Maybe "
+				 "the controller is broken.\n", __func__);
 		}
 	}
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		goto out;
 	}
 
@@ -341,7 +345,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 	smp_mb();
 	retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl);
 	if (retval)
-		err("%s: Cannot write to SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot write to SLOTCTRL register\n",
+			 __func__);
 
 	/*
 	 * Wait for command completion.
@@ -370,14 +375,15 @@ static int hpc_check_lnk_status(struct controller *ctrl)
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
-	dbg("%s: lnk_status = %x\n", __func__, lnk_status);
+	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
 	if ( (lnk_status & LNK_TRN) || (lnk_status & LNK_TRN_ERR) ||
 		!(lnk_status & NEG_LINK_WD)) {
-		err("%s : Link Training Error occurs \n", __func__);
+		ctrl_err(ctrl, "%s : Link Training Error occurs \n", __func__);
 		retval = -1;
 		return retval;
 	}
@@ -394,12 +400,12 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		return retval;
 	}
 
-	dbg("%s: SLOTCTRL %x, value read %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
 
 	atten_led_state = (slot_ctrl & ATTN_LED_CTRL) >> 6;
 
@@ -433,11 +439,11 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		return retval;
 	}
-	dbg("%s: SLOTCTRL %x value read %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
 
 	pwr_state = (slot_ctrl & PWR_CTRL) >> 10;
 
@@ -464,7 +470,8 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -482,7 +489,8 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 	card_state = (u8)((slot_status & PRSN_STATE) >> 6);
@@ -500,7 +508,7 @@ static int hpc_query_power_fault(struct slot *slot)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot check for power fault\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot check for power fault\n", __func__);
 		return retval;
 	}
 	pwr_fault = (u8)((slot_status & PWR_FAULT_DETECTED) >> 1);
@@ -516,7 +524,7 @@ static int hpc_get_emi_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s : Cannot check EMI status\n", __func__);
+		ctrl_err(ctrl, "%s : Cannot check EMI status\n", __func__);
 		return retval;
 	}
 	*status = (slot_status & EMI_STATE) >> EMI_STATUS_BIT;
@@ -560,8 +568,8 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
 			return -1;
 	}
 	rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 
 	return rc;
 }
@@ -575,8 +583,8 @@ static void hpc_set_green_led_on(struct slot *slot)
 	slot_cmd = 0x0100;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static void hpc_set_green_led_off(struct slot *slot)
@@ -588,8 +596,8 @@ static void hpc_set_green_led_off(struct slot *slot)
 	slot_cmd = 0x0300;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static void hpc_set_green_led_blink(struct slot *slot)
@@ -601,8 +609,8 @@ static void hpc_set_green_led_blink(struct slot *slot)
 	slot_cmd = 0x0200;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static int hpc_power_on_slot(struct slot * slot)
@@ -613,20 +621,22 @@ static int hpc_power_on_slot(struct slot * slot)
 	u16 slot_status;
 	int retval = 0;
 
-	dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
 	/* Clear sticky power-fault bit from previous power failures */
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 	slot_status &= PWR_FAULT_DETECTED;
 	if (slot_status) {
 		retval = pciehp_writew(ctrl, SLOTSTATUS, slot_status);
 		if (retval) {
-			err("%s: Cannot write to SLOTSTATUS register\n",
-			    __func__);
+			ctrl_err(ctrl,
+				 "%s: Cannot write to SLOTSTATUS register\n",
+				 __func__);
 			return retval;
 		}
 	}
@@ -644,11 +654,12 @@ static int hpc_power_on_slot(struct slot * slot)
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 
 	if (retval) {
-		err("%s: Write %x command failed!\n", __func__, slot_cmd);
+		ctrl_err(ctrl, "%s: Write %x command failed!\n",
+			 __func__, slot_cmd);
 		return -1;
 	}
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 
 	return retval;
 }
@@ -694,7 +705,7 @@ static int hpc_power_off_slot(struct slot * slot)
 	int retval = 0;
 	int changed;
 
-	dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
 	/*
 	 * Set Bad DLLP Mask bit in Correctable Error Mask
@@ -722,12 +733,12 @@ static int hpc_power_off_slot(struct slot * slot)
 
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	if (retval) {
-		err("%s: Write command failed!\n", __func__);
+		ctrl_err(ctrl, "%s: Write command failed!\n", __func__);
 		retval = -1;
 		goto out;
 	}
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
  out:
 	if (changed)
 		pcie_unmask_bad_dllp(ctrl);
@@ -749,7 +760,8 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 	intr_loc = 0;
 	do {
 		if (pciehp_readw(ctrl, SLOTSTATUS, &detected)) {
-			err("%s: Cannot read SLOTSTATUS\n", __func__);
+			ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS\n",
+				 __func__);
 			return IRQ_NONE;
 		}
 
@@ -760,12 +772,13 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 		if (!intr_loc)
 			return IRQ_NONE;
 		if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) {
-			err("%s: Cannot write to SLOTSTATUS\n", __func__);
+			ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
+				 __func__);
 			return IRQ_NONE;
 		}
 	} while (detected);
 
-	dbg("%s: intr_loc %x\n", __FUNCTION__, intr_loc);
+	ctrl_dbg(ctrl, "%s: intr_loc %x\n", __func__, intr_loc);
 
 	/* Check Command Complete Interrupt Pending */
 	if (intr_loc & CMD_COMPLETED) {
@@ -807,7 +820,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
 	if (retval) {
-		err("%s: Cannot read LNKCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
 		return retval;
 	}
 
@@ -821,7 +834,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 	}
 
 	*value = lnk_speed;
-	dbg("Max link speed = %d\n", lnk_speed);
+	ctrl_dbg(ctrl, "Max link speed = %d\n", lnk_speed);
 
 	return retval;
 }
@@ -836,7 +849,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
 
 	retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
 	if (retval) {
-		err("%s: Cannot read LNKCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
 		return retval;
 	}
 
@@ -871,7 +884,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
 	}
 
 	*value = lnk_wdth;
-	dbg("Max link width = %d\n", lnk_wdth);
+	ctrl_dbg(ctrl, "Max link width = %d\n", lnk_wdth);
 
 	return retval;
 }
@@ -885,7 +898,8 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -899,7 +913,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 	}
 
 	*value = lnk_speed;
-	dbg("Current link speed = %d\n", lnk_speed);
+	ctrl_dbg(ctrl, "Current link speed = %d\n", lnk_speed);
 
 	return retval;
 }
@@ -914,7 +928,8 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -949,7 +964,7 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
 	}
 
 	*value = lnk_wdth;
-	dbg("Current link width = %d\n", lnk_wdth);
+	ctrl_dbg(ctrl, "Current link width = %d\n", lnk_wdth);
 
 	return retval;
 }
@@ -998,7 +1013,8 @@ int pcie_enable_notification(struct controller *ctrl)
 	       PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
 
 	if (pcie_write_cmd(ctrl, cmd, mask)) {
-		err("%s: Cannot enable software notification\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot enable software notification\n",
+			 __func__);
 		return -1;
 	}
 	return 0;
@@ -1010,7 +1026,8 @@ static void pcie_disable_notification(struct controller *ctrl)
 	mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
 	       PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
 	if (pcie_write_cmd(ctrl, 0, mask))
-		warn("%s: Cannot disable software notification\n", __func__);
+		ctrl_warn(ctrl, "%s: Cannot disable software notification\n",
+			  __func__);
 }
 
 static int pcie_init_notification(struct controller *ctrl)
@@ -1071,34 +1088,45 @@ static inline void dbg_ctrl(struct controller *ctrl)
 	if (!pciehp_debug)
 		return;
 
-	dbg("Hotplug Controller:\n");
-	dbg("  Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n", pci_name(pdev), pdev->irq);
-	dbg("  Vendor ID            : 0x%04x\n", pdev->vendor);
-	dbg("  Device ID            : 0x%04x\n", pdev->device);
-	dbg("  Subsystem ID         : 0x%04x\n", pdev->subsystem_device);
-	dbg("  Subsystem Vendor ID  : 0x%04x\n", pdev->subsystem_vendor);
-	dbg("  PCIe Cap offset      : 0x%02x\n", ctrl->cap_base);
+	ctrl_info(ctrl, "Hotplug Controller:\n");
+	ctrl_info(ctrl, "  Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n",
+		  pci_name(pdev), pdev->irq);
+	ctrl_info(ctrl, "  Vendor ID            : 0x%04x\n", pdev->vendor);
+	ctrl_info(ctrl, "  Device ID            : 0x%04x\n", pdev->device);
+	ctrl_info(ctrl, "  Subsystem ID         : 0x%04x\n",
+		  pdev->subsystem_device);
+	ctrl_info(ctrl, "  Subsystem Vendor ID  : 0x%04x\n",
+		  pdev->subsystem_vendor);
+	ctrl_info(ctrl, "  PCIe Cap offset      : 0x%02x\n", ctrl->cap_base);
 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 		if (!pci_resource_len(pdev, i))
 			continue;
-		dbg("  PCI resource [%d]     : 0x%llx@0x%llx\n", i,
-		    (unsigned long long)pci_resource_len(pdev, i),
-		    (unsigned long long)pci_resource_start(pdev, i));
+		ctrl_info(ctrl, "  PCI resource [%d]     : 0x%llx@0x%llx\n",
+			  i, (unsigned long long)pci_resource_len(pdev, i),
+			  (unsigned long long)pci_resource_start(pdev, i));
 	}
-	dbg("Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
-	dbg("  Physical Slot Number : %d\n", ctrl->first_slot);
-	dbg("  Attention Button     : %3s\n", ATTN_BUTTN(ctrl) ? "yes" : "no");
-	dbg("  Power Controller     : %3s\n", POWER_CTRL(ctrl) ? "yes" : "no");
-	dbg("  MRL Sensor           : %3s\n", MRL_SENS(ctrl)   ? "yes" : "no");
-	dbg("  Attention Indicator  : %3s\n", ATTN_LED(ctrl)   ? "yes" : "no");
-	dbg("  Power Indicator      : %3s\n", PWR_LED(ctrl)    ? "yes" : "no");
-	dbg("  Hot-Plug Surprise    : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no");
-	dbg("  EMI Present          : %3s\n", EMI(ctrl)        ? "yes" : "no");
-	dbg("  Command Completed    : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes");
+	ctrl_info(ctrl, "Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
+	ctrl_info(ctrl, "  Physical Slot Number : %d\n", ctrl->first_slot);
+	ctrl_info(ctrl, "  Attention Button     : %3s\n",
+		  ATTN_BUTTN(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  Power Controller     : %3s\n",
+		  POWER_CTRL(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  MRL Sensor           : %3s\n",
+		  MRL_SENS(ctrl)   ? "yes" : "no");
+	ctrl_info(ctrl, "  Attention Indicator  : %3s\n",
+		  ATTN_LED(ctrl)   ? "yes" : "no");
+	ctrl_info(ctrl, "  Power Indicator      : %3s\n",
+		  PWR_LED(ctrl)    ? "yes" : "no");
+	ctrl_info(ctrl, "  Hot-Plug Surprise    : %3s\n",
+		  HP_SUPR_RM(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  EMI Present          : %3s\n",
+		  EMI(ctrl)        ? "yes" : "no");
+	ctrl_info(ctrl, "  Command Completed    : %3s\n",
+		  NO_CMD_CMPL(ctrl) ? "no" : "yes");
 	pciehp_readw(ctrl, SLOTSTATUS, &reg16);
-	dbg("Slot Status            : 0x%04x\n", reg16);
+	ctrl_info(ctrl, "Slot Status            : 0x%04x\n", reg16);
 	pciehp_readw(ctrl, SLOTCTRL, &reg16);
-	dbg("Slot Control           : 0x%04x\n", reg16);
+	ctrl_info(ctrl, "Slot Control           : 0x%04x\n", reg16);
 }
 
 struct controller *pcie_init(struct pcie_device *dev)
@@ -1109,19 +1137,21 @@ struct controller *pcie_init(struct pcie_device *dev)
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
-		err("%s : out of memory\n", __func__);
+		dev_err(&dev->device, "%s : out of memory\n", __func__);
 		goto abort;
 	}
 	INIT_LIST_HEAD(&ctrl->slot_list);
 
+	ctrl->pcie = dev;
 	ctrl->pci_dev = pdev;
 	ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
 	if (!ctrl->cap_base) {
-		err("%s: Cannot find PCI Express capability\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot find PCI Express capability\n",
+			 __func__);
 		goto abort;
 	}
 	if (pciehp_readl(ctrl, SLOTCAP, &slot_cap)) {
-		err("%s: Cannot read SLOTCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCAP register\n", __func__);
 		goto abort;
 	}
 
@@ -1161,9 +1191,9 @@ struct controller *pcie_init(struct pcie_device *dev)
 			goto abort_ctrl;
 	}
 
-	info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
-	     pdev->vendor, pdev->device,
-	     pdev->subsystem_vendor, pdev->subsystem_device);
+	ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
+		  pdev->vendor, pdev->device, pdev->subsystem_vendor,
+		  pdev->subsystem_device);
 
 	if (pcie_init_slot(ctrl))
 		goto abort_ctrl;
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index 6040dcceb25..ffd11148fbe 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -198,18 +198,20 @@ int pciehp_configure_device(struct slot *p_slot)
 	struct pci_dev *dev;
 	struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
 	int num, fn;
+	struct controller *ctrl = p_slot->ctrl;
 
 	dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
 	if (dev) {
-		err("Device %s already exists at %x:%x, cannot hot-add\n",
-				pci_name(dev), p_slot->bus, p_slot->device);
+		ctrl_err(ctrl,
+			 "Device %s already exists at %x:%x, cannot hot-add\n",
+			 pci_name(dev), p_slot->bus, p_slot->device);
 		pci_dev_put(dev);
 		return -EINVAL;
 	}
 
 	num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
 	if (num == 0) {
-		err("No new device found\n");
+		ctrl_err(ctrl, "No new device found\n");
 		return -ENODEV;
 	}
 
@@ -218,8 +220,8 @@ int pciehp_configure_device(struct slot *p_slot)
 		if (!dev)
 			continue;
 		if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			err("Cannot hot-add display device %s\n",
-					pci_name(dev));
+			ctrl_err(ctrl, "Cannot hot-add display device %s\n",
+				 pci_name(dev));
 			pci_dev_put(dev);
 			continue;
 		}
@@ -244,9 +246,10 @@ int pciehp_unconfigure_device(struct slot *p_slot)
 	u8 presence = 0;
 	struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
 	u16 command;
+	struct controller *ctrl = p_slot->ctrl;
 
-	dbg("%s: bus/dev = %x/%x\n", __func__, p_slot->bus,
-				p_slot->device);
+	ctrl_dbg(ctrl, "%s: bus/dev = %x/%x\n", __func__,
+		 p_slot->bus, p_slot->device);
 	ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
 	if (ret)
 		presence = 0;
@@ -257,16 +260,17 @@ int pciehp_unconfigure_device(struct slot *p_slot)
 		if (!temp)
 			continue;
 		if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			err("Cannot remove display device %s\n",
-					pci_name(temp));
+			ctrl_err(ctrl, "Cannot remove display device %s\n",
+				 pci_name(temp));
 			pci_dev_put(temp);
 			continue;
 		}
 		if (temp->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) {
 			pci_read_config_byte(temp, PCI_BRIDGE_CONTROL, &bctl);
 			if (bctl & PCI_BRIDGE_CTL_VGA) {
-				err("Cannot remove display device %s\n",
-				    pci_name(temp));
+				ctrl_err(ctrl,
+					 "Cannot remove display device %s\n",
+					 pci_name(temp));
 				pci_dev_put(temp);
 				continue;
 			}
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 7d5921b1ee7..419919a87b0 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -46,10 +46,10 @@
 #define PRESENT         1	/* Card in slot */
 
 #define MY_NAME "rpaphp"
-extern int debug;
+extern int rpaphp_debug;
 #define dbg(format, arg...)					\
 	do {							\
-		if (debug)					\
+		if (rpaphp_debug)					\
 			printk(KERN_DEBUG "%s: " format,	\
 				MY_NAME , ## arg); 		\
 	} while (0)
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 1f84f402acd..95d02a08fdc 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -37,7 +37,7 @@
 				/* and pci_do_scan_bus */
 #include "rpaphp.h"
 
-int debug;
+int rpaphp_debug;
 LIST_HEAD(rpaphp_slot_head);
 
 #define DRIVER_VERSION	"0.1"
@@ -50,7 +50,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(debug, bool, 0644);
+module_param_named(debug, rpaphp_debug, bool, 0644);
 
 /**
  * set_attention_status - set attention LED
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 5acfd4f3d4c..513e1e28239 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -123,7 +123,7 @@ int rpaphp_enable_slot(struct slot *slot)
 			slot->state = CONFIGURED;
 		}
 
-		if (debug) {
+		if (rpaphp_debug) {
 			struct pci_dev *dev;
 			dbg("%s: pci_devs of slot[%s]\n", __func__, slot->dn->full_name);
 			list_for_each_entry (dev, &bus->devices, bus_list)
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 279c940a003..bf7d6ce9bbb 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -126,7 +126,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	cfg->msg.address_hi = 0xffffffff;
 
 	irq = create_irq();
-	if (irq < 0) {
+
+	if (irq <= 0) {
 		kfree(cfg);
 		return -EBUSY;
 	}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index fc5f2dbf532..8b51e10b778 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -563,7 +563,7 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
 
 	spin_unlock_irqrestore(&iommu->register_lock, flag);
 
-	/* flush context entry will implictly flush write buffer */
+	/* flush context entry will implicitly flush write buffer */
 	return 0;
 }
 
@@ -656,7 +656,7 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
 		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
 			DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
-	/* flush context entry will implictly flush write buffer */
+	/* flush iotlb entry will implicitly flush write buffer */
 	return 0;
 }
 
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 738d4c89581..2de5a3238c9 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -1,3 +1,4 @@
+#include <linux/interrupt.h>
 #include <linux/dmar.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
@@ -11,41 +12,64 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
 int intr_remapping_enabled;
 
-static struct {
+struct irq_2_iommu {
 	struct intel_iommu *iommu;
 	u16 irte_index;
 	u16 sub_handle;
 	u8  irte_mask;
-} irq_2_iommu[NR_IRQS];
+};
+
+static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+	return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL;
+}
+
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
+	return irq_2_iommu(irq);
+}
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
-int irq_remapped(int irq)
+static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq)
 {
-	if (irq > NR_IRQS)
-		return 0;
+	struct irq_2_iommu *irq_iommu;
+
+	irq_iommu = irq_2_iommu(irq);
+
+	if (!irq_iommu)
+		return NULL;
+
+	if (!irq_iommu->iommu)
+		return NULL;
 
-	if (!irq_2_iommu[irq].iommu)
-		return 0;
+	return irq_iommu;
+}
 
-	return 1;
+int irq_remapped(int irq)
+{
+	return valid_irq_2_iommu(irq) != NULL;
 }
 
 int get_irte(int irq, struct irte *entry)
 {
 	int index;
+	struct irq_2_iommu *irq_iommu;
 
-	if (!entry || irq > NR_IRQS)
+	if (!entry)
 		return -1;
 
 	spin_lock(&irq_2_ir_lock);
-	if (!irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
-	*entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
+	*entry = *(irq_iommu->iommu->ir_table->base + index);
 
 	spin_unlock(&irq_2_ir_lock);
 	return 0;
@@ -54,6 +78,7 @@ int get_irte(int irq, struct irte *entry)
 int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
 	struct ir_table *table = iommu->ir_table;
+	struct irq_2_iommu *irq_iommu;
 	u16 index, start_index;
 	unsigned int mask = 0;
 	int i;
@@ -61,6 +86,10 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	if (!count)
 		return -1;
 
+	/* protect irq_2_iommu_alloc later */
+	if (irq >= nr_irqs)
+		return -1;
+
 	/*
 	 * start the IRTE search from index 0.
 	 */
@@ -100,10 +129,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	for (i = index; i < index + count; i++)
 		table->base[i].present = 1;
 
-	irq_2_iommu[irq].iommu = iommu;
-	irq_2_iommu[irq].irte_index =  index;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = mask;
+	irq_iommu = irq_2_iommu_alloc(irq);
+	irq_iommu->iommu = iommu;
+	irq_iommu->irte_index =  index;
+	irq_iommu->sub_handle = 0;
+	irq_iommu->irte_mask = mask;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -124,31 +154,33 @@ static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 {
 	int index;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	*sub_handle = irq_2_iommu[irq].sub_handle;
-	index = irq_2_iommu[irq].irte_index;
+	*sub_handle = irq_iommu->sub_handle;
+	index = irq_iommu->irte_index;
 	spin_unlock(&irq_2_ir_lock);
 	return index;
 }
 
 int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
+	struct irq_2_iommu *irq_iommu;
+
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
-		spin_unlock(&irq_2_ir_lock);
-		return -1;
-	}
 
-	irq_2_iommu[irq].iommu = iommu;
-	irq_2_iommu[irq].irte_index = index;
-	irq_2_iommu[irq].sub_handle = subhandle;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu = irq_2_iommu_alloc(irq);
+
+	irq_iommu->iommu = iommu;
+	irq_iommu->irte_index = index;
+	irq_iommu->sub_handle = subhandle;
+	irq_iommu->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -157,16 +189,19 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 
 int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
 {
+	struct irq_2_iommu *irq_iommu;
+
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	irq_2_iommu[irq].iommu = NULL;
-	irq_2_iommu[irq].irte_index = 0;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu->iommu = NULL;
+	irq_iommu->irte_index = 0;
+	irq_iommu->sub_handle = 0;
+	irq_2_iommu(irq)->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -178,16 +213,18 @@ int modify_irte(int irq, struct irte *irte_modified)
 	int index;
 	struct irte *irte;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
 	set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
@@ -203,18 +240,20 @@ int flush_irte(int irq)
 {
 	int index;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 
-	qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+	qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 	spin_unlock(&irq_2_ir_lock);
 
 	return 0;
@@ -246,28 +285,30 @@ int free_irte(int irq)
 	int index, i;
 	struct irte *irte;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
-	if (!irq_2_iommu[irq].sub_handle) {
-		for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
+	if (!irq_iommu->sub_handle) {
+		for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
 			set_64bit((unsigned long *)irte, 0);
-		qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+		qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 	}
 
-	irq_2_iommu[irq].iommu = NULL;
-	irq_2_iommu[irq].irte_index = 0;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu->iommu = NULL;
+	irq_iommu->irte_index = 0;
+	irq_iommu->sub_handle = 0;
+	irq_iommu->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 4a10b5624f7..d2812013fd2 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -378,23 +378,21 @@ static int msi_capability_init(struct pci_dev *dev)
 	entry->msi_attrib.masked = 1;
 	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.pos = pos;
-	if (is_mask_bit_support(control)) {
+	if (entry->msi_attrib.maskbit) {
 		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
-				is_64bit_address(control));
+				entry->msi_attrib.is_64);
 	}
 	entry->dev = dev;
 	if (entry->msi_attrib.maskbit) {
 		unsigned int maskbits, temp;
 		/* All MSIs are unmasked by default, Mask them all */
 		pci_read_config_dword(dev,
-			msi_mask_bits_reg(pos, is_64bit_address(control)),
+			msi_mask_bits_reg(pos, entry->msi_attrib.is_64),
 			&maskbits);
 		temp = (1 << multi_msi_capable(control));
 		temp = ((temp - 1) & ~temp);
 		maskbits |= temp;
-		pci_write_config_dword(dev,
-			msi_mask_bits_reg(pos, is_64bit_address(control)),
-			maskbits);
+		pci_write_config_dword(dev, entry->msi_attrib.is_64, maskbits);
 		entry->msi_attrib.maskbits_mask = temp;
 	}
 	list_add_tail(&entry->list, &dev->msi_list);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index a13f5348611..b4cdd690ae7 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -43,18 +43,32 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 {
 	struct pci_dynid *dynid;
 	struct pci_driver *pdrv = to_pci_driver(driver);
+	const struct pci_device_id *ids = pdrv->id_table;
 	__u32 vendor, device, subvendor=PCI_ANY_ID,
 		subdevice=PCI_ANY_ID, class=0, class_mask=0;
 	unsigned long driver_data=0;
 	int fields=0;
-	int retval = 0;
+	int retval;
 
-	fields = sscanf(buf, "%x %x %x %x %x %x %lux",
+	fields = sscanf(buf, "%x %x %x %x %x %x %lx",
 			&vendor, &device, &subvendor, &subdevice,
 			&class, &class_mask, &driver_data);
 	if (fields < 2)
 		return -EINVAL;
 
+	/* Only accept driver_data values that match an existing id_table
+	   entry */
+	retval = -EINVAL;
+	while (ids->vendor || ids->subvendor || ids->class_mask) {
+		if (driver_data == ids->driver_data) {
+			retval = 0;
+			break;
+		}
+		ids++;
+	}
+	if (retval)	/* No match */
+		return retval;
+
 	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
 	if (!dynid)
 		return -ENOMEM;
@@ -65,8 +79,7 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 	dynid->id.subdevice = subdevice;
 	dynid->id.class = class;
 	dynid->id.class_mask = class_mask;
-	dynid->id.driver_data = pdrv->dynids.use_driver_data ?
-		driver_data : 0UL;
+	dynid->id.driver_data = driver_data;
 
 	spin_lock(&pdrv->dynids.lock);
 	list_add_tail(&dynid->node, &pdrv->dynids.list);
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 77baff022f7..110022d7868 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -423,7 +423,7 @@ pci_write_vpd(struct kobject *kobj, struct bin_attribute *bin_attr,
  * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_read).
  */
-ssize_t
+static ssize_t
 pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 		   char *buf, loff_t off, size_t count)
 {
@@ -448,7 +448,7 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_write).
  */
-ssize_t
+static ssize_t
 pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 		    char *buf, loff_t off, size_t count)
 {
@@ -468,11 +468,11 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * @attr: struct bin_attribute for this file
  * @vma: struct vm_area_struct passed to mmap
  *
- * Uses an arch specific callback, pci_mmap_legacy_page_range, to mmap
+ * Uses an arch specific callback, pci_mmap_legacy_mem_page_range, to mmap
  * legacy memory space (first meg of bus space) into application virtual
  * memory space.
  */
-int
+static int
 pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
                     struct vm_area_struct *vma)
 {
@@ -480,7 +480,90 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
                                                       struct device,
 						      kobj));
 
-        return pci_mmap_legacy_page_range(bus, vma);
+        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_mem);
+}
+
+/**
+ * pci_mmap_legacy_io - map legacy PCI IO into user memory space
+ * @kobj: kobject corresponding to device to be mapped
+ * @attr: struct bin_attribute for this file
+ * @vma: struct vm_area_struct passed to mmap
+ *
+ * Uses an arch specific callback, pci_mmap_legacy_io_page_range, to mmap
+ * legacy IO space (first meg of bus space) into application virtual
+ * memory space. Returns -ENOSYS if the operation isn't supported
+ */
+static int
+pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
+		   struct vm_area_struct *vma)
+{
+        struct pci_bus *bus = to_pci_bus(container_of(kobj,
+                                                      struct device,
+						      kobj));
+
+        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
+}
+
+/**
+ * pci_create_legacy_files - create legacy I/O port and memory files
+ * @b: bus to create files under
+ *
+ * Some platforms allow access to legacy I/O port and ISA memory space on
+ * a per-bus basis.  This routine creates the files and ties them into
+ * their associated read, write and mmap files from pci-sysfs.c
+ *
+ * On error unwind, but don't propogate the error to the caller
+ * as it is ok to set up the PCI bus without these files.
+ */
+void pci_create_legacy_files(struct pci_bus *b)
+{
+	int error;
+
+	b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
+			       GFP_ATOMIC);
+	if (!b->legacy_io)
+		goto kzalloc_err;
+
+	b->legacy_io->attr.name = "legacy_io";
+	b->legacy_io->size = 0xffff;
+	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+	b->legacy_io->read = pci_read_legacy_io;
+	b->legacy_io->write = pci_write_legacy_io;
+	b->legacy_io->mmap = pci_mmap_legacy_io;
+	error = device_create_bin_file(&b->dev, b->legacy_io);
+	if (error)
+		goto legacy_io_err;
+
+	/* Allocated above after the legacy_io struct */
+	b->legacy_mem = b->legacy_io + 1;
+	b->legacy_mem->attr.name = "legacy_mem";
+	b->legacy_mem->size = 1024*1024;
+	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+	b->legacy_mem->mmap = pci_mmap_legacy_mem;
+	error = device_create_bin_file(&b->dev, b->legacy_mem);
+	if (error)
+		goto legacy_mem_err;
+
+	return;
+
+legacy_mem_err:
+	device_remove_bin_file(&b->dev, b->legacy_io);
+legacy_io_err:
+	kfree(b->legacy_io);
+	b->legacy_io = NULL;
+kzalloc_err:
+	printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
+	       "and ISA memory resources to sysfs\n");
+	return;
+}
+
+void pci_remove_legacy_files(struct pci_bus *b)
+{
+	if (b->legacy_io) {
+		device_remove_bin_file(&b->dev, b->legacy_io);
+		device_remove_bin_file(&b->dev, b->legacy_mem);
+		kfree(b->legacy_io); /* both are allocated here */
+	}
 }
 #endif /* HAVE_PCI_LEGACY */
 
@@ -715,7 +798,7 @@ static struct bin_attribute pci_config_attr = {
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
 	},
-	.size = 256,
+	.size = PCI_CFG_SPACE_SIZE,
 	.read = pci_read_config,
 	.write = pci_write_config,
 };
@@ -725,7 +808,7 @@ static struct bin_attribute pcie_config_attr = {
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
 	},
-	.size = 4096,
+	.size = PCI_CFG_SPACE_EXP_SIZE,
 	.read = pci_read_config,
 	.write = pci_write_config,
 };
@@ -735,86 +818,103 @@ int __attribute__ ((weak)) pcibios_add_platform_entries(struct pci_dev *dev)
 	return 0;
 }
 
+static int pci_create_capabilities_sysfs(struct pci_dev *dev)
+{
+	int retval;
+	struct bin_attribute *attr;
+
+	/* If the device has VPD, try to expose it in sysfs. */
+	if (dev->vpd) {
+		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
+		if (!attr)
+			return -ENOMEM;
+
+		attr->size = dev->vpd->len;
+		attr->attr.name = "vpd";
+		attr->attr.mode = S_IRUSR | S_IWUSR;
+		attr->read = pci_read_vpd;
+		attr->write = pci_write_vpd;
+		retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
+		if (retval) {
+			kfree(dev->vpd->attr);
+			return retval;
+		}
+		dev->vpd->attr = attr;
+	}
+
+	/* Active State Power Management */
+	pcie_aspm_create_sysfs_dev_files(dev);
+
+	return 0;
+}
+
 int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
-	struct bin_attribute *attr = NULL;
 	int retval;
+	int rom_size = 0;
+	struct bin_attribute *attr;
 
 	if (!sysfs_initialized)
 		return -EACCES;
 
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
 	if (retval)
 		goto err;
 
-	/* If the device has VPD, try to expose it in sysfs. */
-	if (pdev->vpd) {
-		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
-		if (attr) {
-			pdev->vpd->attr = attr;
-			attr->size = pdev->vpd->len;
-			attr->attr.name = "vpd";
-			attr->attr.mode = S_IRUSR | S_IWUSR;
-			attr->read = pci_read_vpd;
-			attr->write = pci_write_vpd;
-			retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
-			if (retval)
-				goto err_vpd;
-		} else {
-			retval = -ENOMEM;
-			goto err_config_file;
-		}
-	}
-
 	retval = pci_create_resource_files(pdev);
 	if (retval)
-		goto err_vpd_file;
+		goto err_config_file;
+
+	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+		rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+	else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		rom_size = 0x20000;
 
 	/* If the device has a ROM, try to expose it in sysfs. */
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE) ||
-	    (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)) {
+	if (rom_size) {
 		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
-		if (attr) {
-			pdev->rom_attr = attr;
-			attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
-			attr->attr.name = "rom";
-			attr->attr.mode = S_IRUSR;
-			attr->read = pci_read_rom;
-			attr->write = pci_write_rom;
-			retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
-			if (retval)
-				goto err_rom;
-		} else {
+		if (!attr) {
 			retval = -ENOMEM;
 			goto err_resource_files;
 		}
+		attr->size = rom_size;
+		attr->attr.name = "rom";
+		attr->attr.mode = S_IRUSR;
+		attr->read = pci_read_rom;
+		attr->write = pci_write_rom;
+		retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
+		if (retval) {
+			kfree(attr);
+			goto err_resource_files;
+		}
+		pdev->rom_attr = attr;
 	}
+
 	/* add platform-specific attributes */
-	if (pcibios_add_platform_entries(pdev))
+	retval = pcibios_add_platform_entries(pdev);
+	if (retval)
 		goto err_rom_file;
 
-	pcie_aspm_create_sysfs_dev_files(pdev);
+	/* add sysfs entries for various capabilities */
+	retval = pci_create_capabilities_sysfs(pdev);
+	if (retval)
+		goto err_rom_file;
 
 	return 0;
 
 err_rom_file:
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+	if (rom_size) {
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
-err_rom:
-	kfree(pdev->rom_attr);
+		kfree(pdev->rom_attr);
+		pdev->rom_attr = NULL;
+	}
 err_resource_files:
 	pci_remove_resource_files(pdev);
-err_vpd_file:
-	if (pdev->vpd) {
-		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
-err_vpd:
-		kfree(pdev->vpd->attr);
-	}
 err_config_file:
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
@@ -822,6 +922,16 @@ err:
 	return retval;
 }
 
+static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
+{
+	if (dev->vpd && dev->vpd->attr) {
+		sysfs_remove_bin_file(&dev->dev.kobj, dev->vpd->attr);
+		kfree(dev->vpd->attr);
+	}
+
+	pcie_aspm_remove_sysfs_dev_files(dev);
+}
+
 /**
  * pci_remove_sysfs_dev_files - cleanup PCI specific sysfs files
  * @pdev: device whose entries we should free
@@ -830,27 +940,28 @@ err:
  */
 void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 {
+	int rom_size = 0;
+
 	if (!sysfs_initialized)
 		return;
 
-	pcie_aspm_remove_sysfs_dev_files(pdev);
+	pci_remove_capabilities_sysfs(pdev);
 
-	if (pdev->vpd) {
-		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
-		kfree(pdev->vpd->attr);
-	}
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
 
 	pci_remove_resource_files(pdev);
 
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) {
-		if (pdev->rom_attr) {
-			sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
-			kfree(pdev->rom_attr);
-		}
+	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+		rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+	else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		rom_size = 0x20000;
+
+	if (rom_size && pdev->rom_attr) {
+		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
+		kfree(pdev->rom_attr);
 	}
 }
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9884bba22d..4db261e13e6 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -213,10 +213,13 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap)
 int pci_find_ext_capability(struct pci_dev *dev, int cap)
 {
 	u32 header;
-	int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */
-	int pos = 0x100;
+	int ttl;
+	int pos = PCI_CFG_SPACE_SIZE;
 
-	if (dev->cfg_size <= 256)
+	/* minimum 8 bytes per capability */
+	ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+	if (dev->cfg_size <= PCI_CFG_SPACE_SIZE)
 		return 0;
 
 	if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
@@ -234,7 +237,7 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap)
 			return pos;
 
 		pos = PCI_EXT_CAP_NEXT(header);
-		if (pos < 0x100)
+		if (pos < PCI_CFG_SPACE_SIZE)
 			break;
 
 		if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
@@ -1127,6 +1130,27 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
 }
 
 /**
+ * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
+ * @dev: PCI device to prepare
+ * @enable: True to enable wake-up event generation; false to disable
+ *
+ * Many drivers want the device to wake up the system from D3_hot or D3_cold
+ * and this function allows them to set that up cleanly - pci_enable_wake()
+ * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
+ * ordering constraints.
+ *
+ * This function only returns error code if the device is not capable of
+ * generating PME# from both D3_hot and D3_cold, and the platform is unable to
+ * enable wake-up power for it.
+ */
+int pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+	return pci_pme_capable(dev, PCI_D3cold) ?
+			pci_enable_wake(dev, PCI_D3cold, enable) :
+			pci_enable_wake(dev, PCI_D3hot, enable);
+}
+
+/**
  * pci_target_state - find an appropriate low power state for a given PCI dev
  * @dev: PCI device
  *
@@ -1242,25 +1266,25 @@ void pci_pm_init(struct pci_dev *dev)
 	dev->d1_support = false;
 	dev->d2_support = false;
 	if (!pci_no_d1d2(dev)) {
-		if (pmc & PCI_PM_CAP_D1) {
-			dev_printk(KERN_DEBUG, &dev->dev, "supports D1\n");
+		if (pmc & PCI_PM_CAP_D1)
 			dev->d1_support = true;
-		}
-		if (pmc & PCI_PM_CAP_D2) {
-			dev_printk(KERN_DEBUG, &dev->dev, "supports D2\n");
+		if (pmc & PCI_PM_CAP_D2)
 			dev->d2_support = true;
-		}
+
+		if (dev->d1_support || dev->d2_support)
+			dev_printk(KERN_DEBUG, &dev->dev, "supports%s%s\n",
+				   dev->d1_support ? " D1" : "",
+				   dev->d2_support ? " D2" : "");
 	}
 
 	pmc &= PCI_PM_CAP_PME_MASK;
 	if (pmc) {
-		dev_printk(KERN_INFO, &dev->dev,
-			"PME# supported from%s%s%s%s%s\n",
-			(pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
-			(pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
-			(pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
-			(pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
-			(pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
+		dev_info(&dev->dev, "PME# supported from%s%s%s%s%s\n",
+			 (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
+			 (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
+			 (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
+			 (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
+			 (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
 		dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT;
 		/*
 		 * Make device's PM flags reflect the wake-up capability, but
@@ -1275,6 +1299,38 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 }
 
+/**
+ * pci_enable_ari - enable ARI forwarding if hardware support it
+ * @dev: the PCI device
+ */
+void pci_enable_ari(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap;
+	u16 ctrl;
+
+	if (!dev->is_pcie)
+		return;
+
+	if (dev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
+	    dev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
+		return;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+
+	pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_ARI))
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl |= PCI_EXP_DEVCTL2_ARI;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+	dev->ari_enabled = 1;
+}
+
 int
 pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 {
@@ -1358,11 +1414,10 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
 	return 0;
 
 err_out:
-	dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n",
+	dev_warn(&pdev->dev, "BAR %d: can't reserve %s region %pR\n",
 		 bar,
 		 pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
-		 (unsigned long long)pci_resource_start(pdev, bar),
-		 (unsigned long long)pci_resource_end(pdev, bar));
+		 &pdev->resource[bar]);
 	return -EBUSY;
 }
 
@@ -1943,6 +1998,7 @@ EXPORT_SYMBOL(pci_restore_state);
 EXPORT_SYMBOL(pci_pme_capable);
 EXPORT_SYMBOL(pci_pme_active);
 EXPORT_SYMBOL(pci_enable_wake);
+EXPORT_SYMBOL(pci_wake_from_d3);
 EXPORT_SYMBOL(pci_target_state);
 EXPORT_SYMBOL(pci_prepare_to_sleep);
 EXPORT_SYMBOL(pci_back_from_sleep);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d807cd786f2..b205ab866a1 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1,3 +1,9 @@
+#ifndef DRIVERS_PCI_H
+#define DRIVERS_PCI_H
+
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
+
 /* Functions internal to the PCI core code */
 
 extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env);
@@ -76,7 +82,13 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; }
 /* Functions for PCI Hotplug drivers to use */
 extern unsigned int pci_do_scan_bus(struct pci_bus *bus);
 
+#ifdef HAVE_PCI_LEGACY
+extern void pci_create_legacy_files(struct pci_bus *bus);
 extern void pci_remove_legacy_files(struct pci_bus *bus);
+#else
+static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
+static inline void pci_remove_legacy_files(struct pci_bus *bus) { return; }
+#endif
 
 /* Lock for read/write access to pci device and bus lists */
 extern struct rw_semaphore pci_bus_sem;
@@ -109,6 +121,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev)
 extern int pcie_mch_quirk;
 extern struct device_attribute pci_dev_attrs[];
 extern struct device_attribute dev_attr_cpuaffinity;
+extern struct device_attribute dev_attr_cpulistaffinity;
 
 /**
  * pci_match_one_device - Tell if a PCI device structure has a matching
@@ -144,3 +157,16 @@ struct pci_slot_attribute {
 };
 #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
 
+extern void pci_enable_ari(struct pci_dev *dev);
+/**
+ * pci_ari_enabled - query ARI forwarding status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ARI forwarding is enabled, or 0 if not enabled;
+ */
+static inline int pci_ari_enabled(struct pci_dev *dev)
+{
+	return dev->ari_enabled;
+}
+
+#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 77036f46acf..e390707661d 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -105,7 +105,7 @@ static irqreturn_t aer_irq(int irq, void *context)
 	unsigned long flags;
 	int pos;
 
-	pos = pci_find_aer_capability(pdev->port);
+	pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR);
 	/*
 	 * Must lock access to Root Error Status Reg, Root Error ID Reg,
 	 * and Root error producer/consumer index
@@ -252,7 +252,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 	u32 status;
 	int pos;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 
 	/* Disable Root's interrupt in response to error messages */
 	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0);
@@ -316,7 +316,7 @@ static void aer_error_resume(struct pci_dev *dev)
 	pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16);
 
 	/* Clean AER Root Error Status */
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
 	if (dev->error_state == pci_channel_io_normal)
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index ee5e7b5176d..dfc63d01f20 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -28,41 +28,15 @@
 static int forceload;
 module_param(forceload, bool, 0);
 
-#define PCI_CFG_SPACE_SIZE	(0x100)
-int pci_find_aer_capability(struct pci_dev *dev)
-{
-	int pos;
-	u32 reg32 = 0;
-
-	/* Check if it's a pci-express device */
-	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-	if (!pos)
-		return 0;
-
-	/* Check if it supports pci-express AER */
-	pos = PCI_CFG_SPACE_SIZE;
-	while (pos) {
-		if (pci_read_config_dword(dev, pos, &reg32))
-			return 0;
-
-		/* some broken boards return ~0 */
-		if (reg32 == 0xffffffff)
-			return 0;
-
-		if (PCI_EXT_CAP_ID(reg32) == PCI_EXT_CAP_ID_ERR)
-			break;
-
-		pos = reg32 >> 20;
-	}
-
-	return pos;
-}
-
 int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
 	u16 reg16 = 0;
 	int pos;
 
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return -EIO;
+
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos)
 		return -EIO;
@@ -102,7 +76,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 	int pos;
 	u32 status, mask;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
@@ -123,7 +97,7 @@ int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
 	int pos;
 	u32 status;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
@@ -502,7 +476,7 @@ static void handle_error_source(struct pcie_device * aerdev,
 		 * Correctable error does not need software intevention.
 		 * No need to go through error recovery process.
 		 */
-		pos = pci_find_aer_capability(dev);
+		pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 		if (pos)
 			pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
 					info.status);
@@ -542,7 +516,7 @@ void aer_enable_rootport(struct aer_rpc *rpc)
 	reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
 	pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
 
-	aer_pos = pci_find_aer_capability(pdev);
+	aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
 	/* Clear error status */
 	pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
 	pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
@@ -579,7 +553,7 @@ static void disable_root_aer(struct aer_rpc *rpc)
 	u32 reg32;
 	int pos;
 
-	pos = pci_find_aer_capability(pdev);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
 	/* Disable Root's interrupt in response to error messages */
 	pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
 
@@ -618,7 +592,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
 {
 	int pos;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 
 	/* The device might not support AER */
 	if (!pos)
@@ -755,7 +729,6 @@ int aer_init(struct pcie_device *dev)
 	return AER_SUCCESS;
 }
 
-EXPORT_SYMBOL_GPL(pci_find_aer_capability);
 EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
 EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
 EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 851f5b83cdb..8f63f4c6b85 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -528,9 +528,9 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 		pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
 			&reg32);
 		if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) {
-			printk("Pre-1.1 PCIe device detected, "
-				"disable ASPM for %s. It can be enabled forcedly"
-				" with 'pcie_aspm=force'\n", pci_name(pdev));
+			dev_printk(KERN_INFO, &child_dev->dev, "disabling ASPM"
+				" on pre-1.1 PCIe device.  You can enable it"
+				" with 'pcie_aspm=force'\n");
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 3656e0349dd..2529f3f2ea5 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -25,7 +25,6 @@
 #define PCIE_CAPABILITIES_REG		0x2
 #define PCIE_SLOT_CAPABILITIES_REG	0x14
 #define PCIE_PORT_DEVICE_MAXSERVICES	4
-#define PCI_CFG_SPACE_SIZE		256
 
 #define get_descriptor_id(type, service) (((type - 4) << 4) | service)
 
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 890f0d2b370..2e091e01482 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -195,24 +195,11 @@ static int get_port_device_capability(struct pci_dev *dev)
 	/* PME Capable - root port capability */
 	if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
 		services |= PCIE_PORT_SERVICE_PME;
-	
-	pos = PCI_CFG_SPACE_SIZE;
-	while (pos) {
-		pci_read_config_dword(dev, pos, &reg32);
-		switch (reg32 & 0xffff) {
-		case PCI_EXT_CAP_ID_ERR:
-			services |= PCIE_PORT_SERVICE_AER;
-			pos = reg32 >> 20;
-			break;
-		case PCI_EXT_CAP_ID_VC:
-			services |= PCIE_PORT_SERVICE_VC;
-			pos = reg32 >> 20;
-			break;
-		default:
-			pos = 0;
-			break;
-		}
-	}
+
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
+		services |= PCIE_PORT_SERVICE_AER;
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
+		services |= PCIE_PORT_SERVICE_VC;
 
 	return services;
 }
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 367c9c20000..584422da8d8 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -91,7 +91,7 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
 	
 	pci_set_master(dev);
         if (!dev->irq && dev->pin) {
-		dev_warn(&dev->dev, "device [%04x/%04x] has invalid IRQ; "
+		dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
 			 "check vendor BIOS\n", dev->vendor, dev->device);
 	}
 	if (pcie_port_device_register(dev)) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dd9161a054e..aaaf0a1fed2 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -14,8 +14,6 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
-#define PCI_CFG_SPACE_SIZE	256
-#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -44,72 +42,6 @@ int no_pci_devices(void)
 }
 EXPORT_SYMBOL(no_pci_devices);
 
-#ifdef HAVE_PCI_LEGACY
-/**
- * pci_create_legacy_files - create legacy I/O port and memory files
- * @b: bus to create files under
- *
- * Some platforms allow access to legacy I/O port and ISA memory space on
- * a per-bus basis.  This routine creates the files and ties them into
- * their associated read, write and mmap files from pci-sysfs.c
- *
- * On error unwind, but don't propogate the error to the caller
- * as it is ok to set up the PCI bus without these files.
- */
-static void pci_create_legacy_files(struct pci_bus *b)
-{
-	int error;
-
-	b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
-			       GFP_ATOMIC);
-	if (!b->legacy_io)
-		goto kzalloc_err;
-
-	b->legacy_io->attr.name = "legacy_io";
-	b->legacy_io->size = 0xffff;
-	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
-	b->legacy_io->read = pci_read_legacy_io;
-	b->legacy_io->write = pci_write_legacy_io;
-	error = device_create_bin_file(&b->dev, b->legacy_io);
-	if (error)
-		goto legacy_io_err;
-
-	/* Allocated above after the legacy_io struct */
-	b->legacy_mem = b->legacy_io + 1;
-	b->legacy_mem->attr.name = "legacy_mem";
-	b->legacy_mem->size = 1024*1024;
-	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
-	b->legacy_mem->mmap = pci_mmap_legacy_mem;
-	error = device_create_bin_file(&b->dev, b->legacy_mem);
-	if (error)
-		goto legacy_mem_err;
-
-	return;
-
-legacy_mem_err:
-	device_remove_bin_file(&b->dev, b->legacy_io);
-legacy_io_err:
-	kfree(b->legacy_io);
-	b->legacy_io = NULL;
-kzalloc_err:
-	printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
-	       "and ISA memory resources to sysfs\n");
-	return;
-}
-
-void pci_remove_legacy_files(struct pci_bus *b)
-{
-	if (b->legacy_io) {
-		device_remove_bin_file(&b->dev, b->legacy_io);
-		device_remove_bin_file(&b->dev, b->legacy_mem);
-		kfree(b->legacy_io); /* both are allocated here */
-	}
-}
-#else /* !HAVE_PCI_LEGACY */
-static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
-void pci_remove_legacy_files(struct pci_bus *bus) { return; }
-#endif /* HAVE_PCI_LEGACY */
-
 /*
  * PCI Bus Class Devices
  */
@@ -219,7 +151,7 @@ static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
 
 	res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
 
-	if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64)
+	if (res->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
 		return pci_bar_mem64;
 	return pci_bar_mem32;
 }
@@ -304,9 +236,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 		} else {
 			res->start = l64;
 			res->end = l64 + sz64;
-			printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: [%llx, %llx]\n",
-				pci_name(dev), pos, (unsigned long long)res->start,
-				(unsigned long long)res->end);
+			dev_printk(KERN_DEBUG, &dev->dev,
+				"reg %x 64bit mmio: %pR\n", pos, res);
 		}
 	} else {
 		sz = pci_size(l, sz, mask);
@@ -316,9 +247,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 
 		res->start = l;
 		res->end = l + sz;
-		printk(KERN_DEBUG "PCI: %s reg %x %s: [%llx, %llx]\n", pci_name(dev),
-			pos, (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
-			(unsigned long long)res->start, (unsigned long long)res->end);
+
+		dev_printk(KERN_DEBUG, &dev->dev, "reg %x %s: %pR\n", pos,
+			(res->flags & IORESOURCE_IO) ? "io port" : "32bit mmio",
+			res);
 	}
 
  out:
@@ -389,9 +321,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 			res->start = base;
 		if (!res->end)
 			res->end = limit + 0xfff;
-		printk(KERN_DEBUG "PCI: bridge %s io port: [%llx, %llx]\n",
-			pci_name(dev), (unsigned long long) res->start,
-			(unsigned long long) res->end);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge io port: %pR\n", res);
 	}
 
 	res = child->resource[1];
@@ -403,9 +333,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
 		res->start = base;
 		res->end = limit + 0xfffff;
-		printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: [%llx, %llx]\n",
-			pci_name(dev), (unsigned long long) res->start,
-			(unsigned long long) res->end);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge 32bit mmio: %pR\n",
+			res);
 	}
 
 	res = child->resource[2];
@@ -441,9 +370,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
 		res->start = base;
 		res->end = limit + 0xfffff;
-		printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n",
-			pci_name(dev), (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
-			(unsigned long long) res->start, (unsigned long long) res->end);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n",
+			(res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
+			res);
 	}
 }
 
@@ -764,7 +693,7 @@ static int pci_setup_device(struct pci_dev * dev)
 	dev->class = class;
 	class >>= 8;
 
-	dev_dbg(&dev->dev, "found [%04x/%04x] class %06x header type %02x\n",
+	dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
 		 dev->vendor, dev->device, class, dev->hdr_type);
 
 	/* "Unknown power state" */
@@ -846,6 +775,11 @@ static int pci_setup_device(struct pci_dev * dev)
 	return 0;
 }
 
+static void pci_release_capabilities(struct pci_dev *dev)
+{
+	pci_vpd_release(dev);
+}
+
 /**
  * pci_release_dev - free a pci device structure when all users of it are finished.
  * @dev: device that's been disconnected
@@ -858,7 +792,7 @@ static void pci_release_dev(struct device *dev)
 	struct pci_dev *pci_dev;
 
 	pci_dev = to_pci_dev(dev);
-	pci_vpd_release(pci_dev);
+	pci_release_capabilities(pci_dev);
 	kfree(pci_dev);
 }
 
@@ -889,8 +823,9 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 int pci_cfg_space_size_ext(struct pci_dev *dev)
 {
 	u32 status;
+	int pos = PCI_CFG_SPACE_SIZE;
 
-	if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
+	if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
 		goto fail;
 	if (status == 0xffffffff)
 		goto fail;
@@ -938,8 +873,6 @@ struct pci_dev *alloc_pci_dev(void)
 
 	INIT_LIST_HEAD(&dev->bus_list);
 
-	pci_msi_init_pci_dev(dev);
-
 	return dev;
 }
 EXPORT_SYMBOL(alloc_pci_dev);
@@ -951,6 +884,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 {
 	struct pci_dev *dev;
+	struct pci_slot *slot;
 	u32 l;
 	u8 hdr_type;
 	int delay = 1;
@@ -999,6 +933,10 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 	dev->error_state = pci_channel_io_normal;
 	set_pcie_port_type(dev);
 
+	list_for_each_entry(slot, &bus->slots, list)
+		if (PCI_SLOT(devfn) == slot->number)
+			dev->slot = slot;
+
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
 	dev->dma_mask = 0xffffffff;
@@ -1007,9 +945,22 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 		return NULL;
 	}
 
+	return dev;
+}
+
+static void pci_init_capabilities(struct pci_dev *dev)
+{
+	/* MSI/MSI-X list */
+	pci_msi_init_pci_dev(dev);
+
+	/* Power Management */
+	pci_pm_init(dev);
+
+	/* Vital Product Data */
 	pci_vpd_pci22_init(dev);
 
-	return dev;
+	/* Alternative Routing-ID Forwarding */
+	pci_enable_ari(dev);
 }
 
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
@@ -1028,8 +979,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	/* Fix up broken headers */
 	pci_fixup_device(pci_fixup_header, dev);
 
-	/* Initialize power management of the device */
-	pci_pm_init(dev);
+	/* Initialize various capabilities */
+	pci_init_capabilities(dev);
 
 	/*
 	 * Add the device to our list of discovered devices
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index e872ac925b4..bbf66ea8fd8 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -24,6 +24,14 @@
 #include <linux/kallsyms.h>
 #include "pci.h"
 
+int isa_dma_bridge_buggy;
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+int pci_pci_problems;
+EXPORT_SYMBOL(pci_pci_problems);
+int pcie_mch_quirk;
+EXPORT_SYMBOL(pcie_mch_quirk);
+
+#ifdef CONFIG_PCI_QUIRKS
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
  * PCI scanning code to "skip" this now blacklisted device.
@@ -62,8 +70,6 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_p
     
     This appears to be BIOS not version dependent. So presumably there is a 
     chipset level fix */
-int isa_dma_bridge_buggy;
-EXPORT_SYMBOL(isa_dma_bridge_buggy);
     
 static void __devinit quirk_isa_dma_hangs(struct pci_dev *dev)
 {
@@ -84,9 +90,6 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_1,	quirk_isa_d
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_2,	quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_3,	quirk_isa_dma_hangs);
 
-int pci_pci_problems;
-EXPORT_SYMBOL(pci_pci_problems);
-
 /*
  *	Chipsets where PCI->PCI transfers vanish or hang
  */
@@ -1362,9 +1365,6 @@ static void __init quirk_alder_ioapic(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_EESSC,	quirk_alder_ioapic);
 #endif
 
-int pcie_mch_quirk;
-EXPORT_SYMBOL(pcie_mch_quirk);
-
 static void __devinit quirk_pcie_mch(struct pci_dev *pdev)
 {
 	pcie_mch_quirk = 1;
@@ -1555,84 +1555,6 @@ static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810);
 
-static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
-{
-	while (f < end) {
-		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
- 		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
-#ifdef DEBUG
-			dev_dbg(&dev->dev, "calling %pF\n", f->hook);
-#endif
-			f->hook(dev);
-		}
-		f++;
-	}
-}
-
-extern struct pci_fixup __start_pci_fixups_early[];
-extern struct pci_fixup __end_pci_fixups_early[];
-extern struct pci_fixup __start_pci_fixups_header[];
-extern struct pci_fixup __end_pci_fixups_header[];
-extern struct pci_fixup __start_pci_fixups_final[];
-extern struct pci_fixup __end_pci_fixups_final[];
-extern struct pci_fixup __start_pci_fixups_enable[];
-extern struct pci_fixup __end_pci_fixups_enable[];
-extern struct pci_fixup __start_pci_fixups_resume[];
-extern struct pci_fixup __end_pci_fixups_resume[];
-extern struct pci_fixup __start_pci_fixups_resume_early[];
-extern struct pci_fixup __end_pci_fixups_resume_early[];
-extern struct pci_fixup __start_pci_fixups_suspend[];
-extern struct pci_fixup __end_pci_fixups_suspend[];
-
-
-void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
-{
-	struct pci_fixup *start, *end;
-
-	switch(pass) {
-	case pci_fixup_early:
-		start = __start_pci_fixups_early;
-		end = __end_pci_fixups_early;
-		break;
-
-	case pci_fixup_header:
-		start = __start_pci_fixups_header;
-		end = __end_pci_fixups_header;
-		break;
-
-	case pci_fixup_final:
-		start = __start_pci_fixups_final;
-		end = __end_pci_fixups_final;
-		break;
-
-	case pci_fixup_enable:
-		start = __start_pci_fixups_enable;
-		end = __end_pci_fixups_enable;
-		break;
-
-	case pci_fixup_resume:
-		start = __start_pci_fixups_resume;
-		end = __end_pci_fixups_resume;
-		break;
-
-	case pci_fixup_resume_early:
-		start = __start_pci_fixups_resume_early;
-		end = __end_pci_fixups_resume_early;
-		break;
-
-	case pci_fixup_suspend:
-		start = __start_pci_fixups_suspend;
-		end = __end_pci_fixups_suspend;
-		break;
-
-	default:
-		/* stupid compiler warning, you would think with an enum... */
-		return;
-	}
-	pci_do_fixups(dev, start, end);
-}
-EXPORT_SYMBOL(pci_fixup_device);
-
 /* Enable 1k I/O space granularity on the Intel P64H2 */
 static void __devinit quirk_p64h2_1k_io(struct pci_dev *dev)
 {
@@ -2006,3 +1928,82 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375,
 			quirk_msi_intx_disable_bug);
 
 #endif /* CONFIG_PCI_MSI */
+
+static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
+{
+	while (f < end) {
+		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
+ 		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
+			dev_dbg(&dev->dev, "calling %pF\n", f->hook);
+			f->hook(dev);
+		}
+		f++;
+	}
+}
+
+extern struct pci_fixup __start_pci_fixups_early[];
+extern struct pci_fixup __end_pci_fixups_early[];
+extern struct pci_fixup __start_pci_fixups_header[];
+extern struct pci_fixup __end_pci_fixups_header[];
+extern struct pci_fixup __start_pci_fixups_final[];
+extern struct pci_fixup __end_pci_fixups_final[];
+extern struct pci_fixup __start_pci_fixups_enable[];
+extern struct pci_fixup __end_pci_fixups_enable[];
+extern struct pci_fixup __start_pci_fixups_resume[];
+extern struct pci_fixup __end_pci_fixups_resume[];
+extern struct pci_fixup __start_pci_fixups_resume_early[];
+extern struct pci_fixup __end_pci_fixups_resume_early[];
+extern struct pci_fixup __start_pci_fixups_suspend[];
+extern struct pci_fixup __end_pci_fixups_suspend[];
+
+
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
+{
+	struct pci_fixup *start, *end;
+
+	switch(pass) {
+	case pci_fixup_early:
+		start = __start_pci_fixups_early;
+		end = __end_pci_fixups_early;
+		break;
+
+	case pci_fixup_header:
+		start = __start_pci_fixups_header;
+		end = __end_pci_fixups_header;
+		break;
+
+	case pci_fixup_final:
+		start = __start_pci_fixups_final;
+		end = __end_pci_fixups_final;
+		break;
+
+	case pci_fixup_enable:
+		start = __start_pci_fixups_enable;
+		end = __end_pci_fixups_enable;
+		break;
+
+	case pci_fixup_resume:
+		start = __start_pci_fixups_resume;
+		end = __end_pci_fixups_resume;
+		break;
+
+	case pci_fixup_resume_early:
+		start = __start_pci_fixups_resume_early;
+		end = __end_pci_fixups_resume_early;
+		break;
+
+	case pci_fixup_suspend:
+		start = __start_pci_fixups_suspend;
+		end = __end_pci_fixups_suspend;
+		break;
+
+	default:
+		/* stupid compiler warning, you would think with an enum... */
+		return;
+	}
+	pci_do_fixups(dev, start, end);
+}
+#else
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) {}
+#endif
+EXPORT_SYMBOL(pci_fixup_device);
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index bdc2a44d68e..042e0892442 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -73,6 +73,7 @@ void pci_remove_bus(struct pci_bus *pci_bus)
 	up_write(&pci_bus_sem);
 	pci_remove_legacy_files(pci_bus);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
+	device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
 	device_unregister(&pci_bus->dev);
 }
 EXPORT_SYMBOL(pci_remove_bus);
@@ -114,13 +115,9 @@ void pci_remove_behind_bridge(struct pci_dev *dev)
 {
 	struct list_head *l, *n;
 
-	if (dev->subordinate) {
-		list_for_each_safe(l, n, &dev->subordinate->devices) {
-			struct pci_dev *dev = pci_dev_b(l);
-
-			pci_remove_bus_device(dev);
-		}
-	}
+	if (dev->subordinate)
+		list_for_each_safe(l, n, &dev->subordinate->devices)
+			pci_remove_bus_device(pci_dev_b(l));
 }
 
 static void pci_stop_bus_devices(struct pci_bus *bus)
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index bd5c0e03139..1f5f6143f35 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -21,7 +21,7 @@
  * between the ROM and other resources, so enabling it may disable access
  * to MMIO registers or other card memory.
  */
-static int pci_enable_rom(struct pci_dev *pdev)
+int pci_enable_rom(struct pci_dev *pdev)
 {
 	struct resource *res = pdev->resource + PCI_ROM_RESOURCE;
 	struct pci_bus_region region;
@@ -45,7 +45,7 @@ static int pci_enable_rom(struct pci_dev *pdev)
  * Disable ROM decoding on a PCI device by turning off the last bit in the
  * ROM BAR.
  */
-static void pci_disable_rom(struct pci_dev *pdev)
+void pci_disable_rom(struct pci_dev *pdev)
 {
 	u32 rom_addr;
 	pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr);
@@ -260,3 +260,5 @@ void pci_cleanup_rom(struct pci_dev *pdev)
 
 EXPORT_SYMBOL(pci_map_rom);
 EXPORT_SYMBOL(pci_unmap_rom);
+EXPORT_SYMBOL_GPL(pci_enable_rom);
+EXPORT_SYMBOL_GPL(pci_disable_rom);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d5e2106760f..ea979f2bc6d 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -299,7 +299,7 @@ static void pbus_size_io(struct pci_bus *bus)
 
 			if (r->parent || !(r->flags & IORESOURCE_IO))
 				continue;
-			r_size = r->end - r->start + 1;
+			r_size = resource_size(r);
 
 			if (r_size < 0x400)
 				/* Might be re-aligned for ISA */
@@ -350,16 +350,13 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 
 			if (r->parent || (r->flags & mask) != type)
 				continue;
-			r_size = r->end - r->start + 1;
+			r_size = resource_size(r);
 			/* For bridges size != alignment */
 			align = resource_alignment(r);
 			order = __ffs(align) - 20;
 			if (order > 11) {
 				dev_warn(&dev->dev, "BAR %d bad alignment %llx: "
-				       "%#016llx-%#016llx\n", i,
-				       (unsigned long long)align,
-				       (unsigned long long)r->start,
-				       (unsigned long long)r->end);
+					 "%pR\n", i, (unsigned long long)align, r);
 				r->flags = 0;
 				continue;
 			}
@@ -539,11 +536,9 @@ static void pci_bus_dump_res(struct pci_bus *bus)
                 if (!res)
                         continue;
 
-		printk(KERN_INFO "bus: %02x index %x %s: [%llx, %llx]\n",
-			bus->number, i,
-			(res->flags & IORESOURCE_IO) ? "io port" : "mmio",
-			(unsigned long long) res->start,
-			(unsigned long long) res->end);
+		printk(KERN_INFO "bus: %02x index %x %s: %pR\n",
+		       bus->number, i,
+		       (res->flags & IORESOURCE_IO) ? "io port" : "mmio", res);
         }
 }
 
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 1a5fc83c71b..2dbd96cce2d 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -49,10 +49,8 @@ void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
 
 	pcibios_resource_to_bus(dev, &region, res);
 
-	dev_dbg(&dev->dev, "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] "
-		"flags %#lx\n", resno,
-		 (unsigned long long)res->start,
-		 (unsigned long long)res->end,
+	dev_dbg(&dev->dev, "BAR %d: got res %pR bus [%#llx-%#llx] "
+		"flags %#lx\n", resno, res,
 		 (unsigned long long)region.start,
 		 (unsigned long long)region.end,
 		 (unsigned long)res->flags);
@@ -114,13 +112,11 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 		err = insert_resource(root, res);
 
 	if (err) {
-		dev_err(&dev->dev, "BAR %d: %s of %s [%#llx-%#llx]\n",
+		dev_err(&dev->dev, "BAR %d: %s of %s %pR\n",
 			resource,
 			root ? "address space collision on" :
 				"no parent found for",
-			dtype,
-			(unsigned long long)res->start,
-			(unsigned long long)res->end);
+			dtype, res);
 	}
 
 	return err;
@@ -133,15 +129,14 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	resource_size_t size, min, align;
 	int ret;
 
-	size = res->end - res->start + 1;
+	size = resource_size(res);
 	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
 	align = resource_alignment(res);
 	if (!align) {
 		dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus "
-			"alignment) [%#llx-%#llx] flags %#lx\n",
-			resno, (unsigned long long)res->start,
-			(unsigned long long)res->end, res->flags);
+			"alignment) %pR flags %#lx\n",
+			resno, res, res->flags);
 		return -EINVAL;
 	}
 
@@ -162,11 +157,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	}
 
 	if (ret) {
-		dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
-			"[%#llx-%#llx]\n", resno,
-			res->flags & IORESOURCE_IO ? "I/O" : "mem",
-			(unsigned long long)res->start,
-			(unsigned long long)res->end);
+		dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
+			resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
 	} else {
 		res->flags &= ~IORESOURCE_STARTALIGN;
 		if (resno < PCI_BRIDGE_RESOURCES)
@@ -202,11 +194,8 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
 	}
 
 	if (ret) {
-		dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
-			"[%#llx-%#llx\n]", resno,
-			res->flags & IORESOURCE_IO ? "I/O" : "mem",
-			(unsigned long long)res->start,
-			(unsigned long long)res->end);
+		dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
+			resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
 	} else if (resno < PCI_BRIDGE_RESOURCES) {
 		pci_update_resource(dev, res, resno);
 	}
@@ -237,9 +226,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
 		r_align = resource_alignment(r);
 		if (!r_align) {
 			dev_warn(&dev->dev, "BAR %d: bogus alignment "
-				"[%#llx-%#llx] flags %#lx\n",
-				i, (unsigned long long)r->start,
-				(unsigned long long)r->end, r->flags);
+				"%pR flags %#lx\n",
+				i, r, r->flags);
 			continue;
 		}
 		for (list = head; ; list = list->next) {
@@ -287,9 +275,7 @@ int pci_enable_resources(struct pci_dev *dev, int mask)
 
 		if (!r->parent) {
 			dev_err(&dev->dev, "device not available because of "
-				"BAR %d [%#llx-%#llx] collisions\n", i,
-				(unsigned long long) r->start,
-				(unsigned long long) r->end);
+				"BAR %d %pR collisions\n", i, r);
 			return -EINVAL;
 		}
 
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 7e5b85cbd94..0c6db03698e 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -49,11 +49,16 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf)
 
 static void pci_slot_release(struct kobject *kobj)
 {
+	struct pci_dev *dev;
 	struct pci_slot *slot = to_pci_slot(kobj);
 
 	pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
 		 slot->bus->number, slot->number);
 
+	list_for_each_entry(dev, &slot->bus->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = NULL;
+
 	list_del(&slot->list);
 
 	kfree(slot);
@@ -108,6 +113,7 @@ static struct kobj_type pci_slot_ktype = {
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 				 const char *name)
 {
+	struct pci_dev *dev;
 	struct pci_slot *slot;
 	int err;
 
@@ -150,6 +156,10 @@ placeholder:
 	INIT_LIST_HEAD(&slot->list);
 	list_add(&slot->list, &parent->slots);
 
+	list_for_each_entry(dev, &parent->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot_nr)
+			dev->slot = slot;
+
 	/* Don't care if debug printk has a -1 for slot_nr */
 	pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
 		 __func__, pci_domain_nr(parent), parent->number, slot_nr);
diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile
index b46c60b7270..23e492bf75c 100644
--- a/drivers/pcmcia/Makefile
+++ b/drivers/pcmcia/Makefile
@@ -70,7 +70,7 @@ pxa2xx-obj-$(CONFIG_MACH_MAINSTONE)		+= pxa2xx_mainstone.o
 pxa2xx-obj-$(CONFIG_PXA_SHARPSL)		+= pxa2xx_sharpsl.o
 pxa2xx-obj-$(CONFIG_MACH_ARMCORE)		+= pxa2xx_cm_x2xx_cs.o
 pxa2xx-obj-$(CONFIG_ARCH_VIPER)			+= pxa2xx_viper.o
-pxa2xx-obj-$(CONFIG_TRIZEPS_PCMCIA)		+= pxa2xx_trizeps.o
+pxa2xx-obj-$(CONFIG_TRIZEPS_PCMCIA)		+= pxa2xx_trizeps4.o
 pxa2xx-obj-$(CONFIG_MACH_PALMTX)		+= pxa2xx_palmtx.o
 pxa2xx-obj-$(CONFIG_MACH_PALMLD)		+= pxa2xx_palmld.o
 
diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index a0ffb8ebfe0..9e1140f085f 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -273,7 +273,7 @@ static int __init at91_cf_probe(struct platform_device *pdev)
 			goto fail0d;
 		cf->socket.pci_irq = board->irq_pin;
 	} else
-		cf->socket.pci_irq = NR_IRQS + 1;
+		cf->socket.pci_irq = nr_irqs + 1;
 
 	/* pcmcia layer only remaps "real" memory not iospace */
 	cf->socket.io_offset = (unsigned long)
diff --git a/drivers/pcmcia/hd64465_ss.c b/drivers/pcmcia/hd64465_ss.c
index 117dc12ab43..9ef69cdb318 100644
--- a/drivers/pcmcia/hd64465_ss.c
+++ b/drivers/pcmcia/hd64465_ss.c
@@ -233,15 +233,18 @@ static struct hw_interrupt_type hd64465_ss_irq_type = {
  */
 static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
 {
+	struct irq_desc *desc;
+
     	DPRINTK("hs_map_irq(sock=%d irq=%d)\n", sp->number, irq);
 	
 	if (irq >= HS_NUM_MAPPED_IRQS)
 	    return;
 
+	desc = irq_to_desc(irq);
     	hs_mapped_irq[irq].sock = sp;
 	/* insert ourselves as the irq controller */
-	hs_mapped_irq[irq].old_handler = irq_desc[irq].chip;
-	irq_desc[irq].chip = &hd64465_ss_irq_type;
+	hs_mapped_irq[irq].old_handler = desc->chip;
+	desc->chip = &hd64465_ss_irq_type;
 }
 
 
@@ -250,13 +253,16 @@ static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
  */
 static void hs_unmap_irq(hs_socket_t *sp, unsigned int irq)
 {
+	struct irq_desc *desc;
+
     	DPRINTK("hs_unmap_irq(sock=%d irq=%d)\n", sp->number, irq);
 	
 	if (irq >= HS_NUM_MAPPED_IRQS)
 	    return;
 		
+	desc = irq_to_desc(irq);
 	/* restore the original irq controller */
-	irq_desc[irq].chip = hs_mapped_irq[irq].old_handler;
+	desc->chip = hs_mapped_irq[irq].old_handler;
 }
 
 /*============================================================*/
diff --git a/drivers/pcmcia/vrc4171_card.c b/drivers/pcmcia/vrc4171_card.c
index eee2f1cb213..b2c41241905 100644
--- a/drivers/pcmcia/vrc4171_card.c
+++ b/drivers/pcmcia/vrc4171_card.c
@@ -639,7 +639,7 @@ static int __devinit vrc4171_card_setup(char *options)
 		int irq;
 		options += 4;
 		irq = simple_strtoul(options, &options, 0);
-		if (irq >= 0 && irq < NR_IRQS)
+		if (irq >= 0 && irq < nr_irqs)
 			vrc4171_irq = irq;
 
 		if (*options != ',')
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 63bb5791044..8e0c2b47803 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -51,7 +51,7 @@ config BATTERY_OLPC
 
 config BATTERY_TOSA
 	tristate "Sharp SL-6000 (tosa) battery"
-	depends on MACH_TOSA && MFD_TC6393XB
+	depends on MACH_TOSA && MFD_TC6393XB && TOUCHSCREEN_WM97XX
 	help
 	  Say Y to enable support for the battery on the Sharp Zaurus
 	  SL-6000 (tosa) models.
@@ -62,4 +62,10 @@ config BATTERY_WM97XX
 	help
 	  Say Y to enable support for battery measured by WM97xx aux port.
 
+config BATTERY_BQ27x00
+	tristate "BQ27200 battery driver"
+	depends on I2C
+	help
+	  Say Y here to enable support for batteries with BQ27200(I2C) chip.
+
 endif # POWER_SUPPLY
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 4e20026cc45..e8f1ecec5d8 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -21,4 +21,5 @@ obj-$(CONFIG_BATTERY_DS2760)	+= ds2760_battery.o
 obj-$(CONFIG_BATTERY_PMU)	+= pmu_battery.o
 obj-$(CONFIG_BATTERY_OLPC)	+= olpc_battery.o
 obj-$(CONFIG_BATTERY_TOSA)	+= tosa_battery.o
-obj-$(CONFIG_BATTERY_WM97XX)	+= wm97xx_battery.o
-\ No newline at end of file
+obj-$(CONFIG_BATTERY_WM97XX)	+= wm97xx_battery.o
+obj-$(CONFIG_BATTERY_BQ27x00)	+= bq27x00_battery.o
diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c
new file mode 100644
index 00000000000..0c056fcc01c
--- /dev/null
+++ b/drivers/power/bq27x00_battery.c
@@ -0,0 +1,381 @@
+/*
+ * BQ27x00 battery driver
+ *
+ * Copyright (C) 2008 Rodolfo Giometti <giometti@linux.it>
+ * Copyright (C) 2008 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc.
+ *
+ * This package is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/idr.h>
+#include <linux/i2c.h>
+#include <asm/unaligned.h>
+
+#define DRIVER_VERSION			"1.0.0"
+
+#define BQ27x00_REG_TEMP		0x06
+#define BQ27x00_REG_VOLT		0x08
+#define BQ27x00_REG_RSOC		0x0B /* Relative State-of-Charge */
+#define BQ27x00_REG_AI			0x14
+#define BQ27x00_REG_FLAGS		0x0A
+
+/* If the system has several batteries we need a different name for each
+ * of them...
+ */
+static DEFINE_IDR(battery_id);
+static DEFINE_MUTEX(battery_mutex);
+
+struct bq27x00_device_info;
+struct bq27x00_access_methods {
+	int (*read)(u8 reg, int *rt_value, int b_single,
+		struct bq27x00_device_info *di);
+};
+
+struct bq27x00_device_info {
+	struct device 		*dev;
+	int			id;
+	int			voltage_uV;
+	int			current_uA;
+	int			temp_C;
+	int			charge_rsoc;
+	struct bq27x00_access_methods	*bus;
+	struct power_supply	bat;
+
+	struct i2c_client	*client;
+};
+
+static enum power_supply_property bq27x00_battery_props[] = {
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_TEMP,
+};
+
+/*
+ * Common code for BQ27x00 devices
+ */
+
+static int bq27x00_read(u8 reg, int *rt_value, int b_single,
+			struct bq27x00_device_info *di)
+{
+	int ret;
+
+	ret = di->bus->read(reg, rt_value, b_single, di);
+	*rt_value = be16_to_cpu(*rt_value);
+
+	return ret;
+}
+
+/*
+ * Return the battery temperature in Celcius degrees
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_temperature(struct bq27x00_device_info *di)
+{
+	int ret;
+	int temp = 0;
+
+	ret = bq27x00_read(BQ27x00_REG_TEMP, &temp, 0, di);
+	if (ret) {
+		dev_err(di->dev, "error reading temperature\n");
+		return ret;
+	}
+
+	return (temp >> 2) - 273;
+}
+
+/*
+ * Return the battery Voltage in milivolts
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_voltage(struct bq27x00_device_info *di)
+{
+	int ret;
+	int volt = 0;
+
+	ret = bq27x00_read(BQ27x00_REG_VOLT, &volt, 0, di);
+	if (ret) {
+		dev_err(di->dev, "error reading voltage\n");
+		return ret;
+	}
+
+	return volt;
+}
+
+/*
+ * Return the battery average current
+ * Note that current can be negative signed as well
+ * Or 0 if something fails.
+ */
+static int bq27x00_battery_current(struct bq27x00_device_info *di)
+{
+	int ret;
+	int curr = 0;
+	int flags = 0;
+
+	ret = bq27x00_read(BQ27x00_REG_AI, &curr, 0, di);
+	if (ret) {
+		dev_err(di->dev, "error reading current\n");
+		return 0;
+	}
+	ret = bq27x00_read(BQ27x00_REG_FLAGS, &flags, 0, di);
+	if (ret < 0) {
+		dev_err(di->dev, "error reading flags\n");
+		return 0;
+	}
+	if ((flags & (1 << 7)) != 0) {
+		dev_dbg(di->dev, "negative current!\n");
+		return -curr;
+	}
+	return curr;
+}
+
+/*
+ * Return the battery Relative State-of-Charge
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_rsoc(struct bq27x00_device_info *di)
+{
+	int ret;
+	int rsoc = 0;
+
+	ret = bq27x00_read(BQ27x00_REG_RSOC, &rsoc, 1, di);
+	if (ret) {
+		dev_err(di->dev, "error reading relative State-of-Charge\n");
+		return ret;
+	}
+
+	return rsoc >> 8;
+}
+
+#define to_bq27x00_device_info(x) container_of((x), \
+				struct bq27x00_device_info, bat);
+
+static int bq27x00_battery_get_property(struct power_supply *psy,
+					enum power_supply_property psp,
+					union power_supply_propval *val)
+{
+	struct bq27x00_device_info *di = to_bq27x00_device_info(psy);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = bq27x00_battery_voltage(di);
+		if (psp == POWER_SUPPLY_PROP_PRESENT)
+			val->intval = val->intval <= 0 ? 0 : 1;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		val->intval = bq27x00_battery_current(di);
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		val->intval = bq27x00_battery_rsoc(di);
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		val->intval = bq27x00_battery_temperature(di);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void bq27x00_powersupply_init(struct bq27x00_device_info *di)
+{
+	di->bat.type = POWER_SUPPLY_TYPE_BATTERY;
+	di->bat.properties = bq27x00_battery_props;
+	di->bat.num_properties = ARRAY_SIZE(bq27x00_battery_props);
+	di->bat.get_property = bq27x00_battery_get_property;
+	di->bat.external_power_changed = NULL;
+}
+
+/*
+ * BQ27200 specific code
+ */
+
+static int bq27200_read(u8 reg, int *rt_value, int b_single,
+			struct bq27x00_device_info *di)
+{
+	struct i2c_client *client = di->client;
+	struct i2c_msg msg[1];
+	unsigned char data[2];
+	int err;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	msg->addr = client->addr;
+	msg->flags = 0;
+	msg->len = 1;
+	msg->buf = data;
+
+	data[0] = reg;
+	err = i2c_transfer(client->adapter, msg, 1);
+
+	if (err >= 0) {
+		if (!b_single)
+			msg->len = 2;
+		else
+			msg->len = 1;
+
+		msg->flags = I2C_M_RD;
+		err = i2c_transfer(client->adapter, msg, 1);
+		if (err >= 0) {
+			if (!b_single)
+				*rt_value = get_unaligned_be16(data);
+			else
+				*rt_value = data[0];
+
+			return 0;
+		}
+	}
+	return err;
+}
+
+static int bq27200_battery_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	char *name;
+	struct bq27x00_device_info *di;
+	struct bq27x00_access_methods *bus;
+	int num;
+	int retval = 0;
+
+	/* Get new ID for the new battery device */
+	retval = idr_pre_get(&battery_id, GFP_KERNEL);
+	if (retval == 0)
+		return -ENOMEM;
+	mutex_lock(&battery_mutex);
+	retval = idr_get_new(&battery_id, client, &num);
+	mutex_unlock(&battery_mutex);
+	if (retval < 0)
+		return retval;
+
+	name = kasprintf(GFP_KERNEL, "bq27200-%d", num);
+	if (!name) {
+		dev_err(&client->dev, "failed to allocate device name\n");
+		retval = -ENOMEM;
+		goto batt_failed_1;
+	}
+
+	di = kzalloc(sizeof(*di), GFP_KERNEL);
+	if (!di) {
+		dev_err(&client->dev, "failed to allocate device info data\n");
+		retval = -ENOMEM;
+		goto batt_failed_2;
+	}
+	di->id = num;
+
+	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+	if (!bus) {
+		dev_err(&client->dev, "failed to allocate access method "
+					"data\n");
+		retval = -ENOMEM;
+		goto batt_failed_3;
+	}
+
+	i2c_set_clientdata(client, di);
+	di->dev = &client->dev;
+	di->bat.name = name;
+	bus->read = &bq27200_read;
+	di->bus = bus;
+	di->client = client;
+
+	bq27x00_powersupply_init(di);
+
+	retval = power_supply_register(&client->dev, &di->bat);
+	if (retval) {
+		dev_err(&client->dev, "failed to register battery\n");
+		goto batt_failed_4;
+	}
+
+	dev_info(&client->dev, "support ver. %s enabled\n", DRIVER_VERSION);
+
+	return 0;
+
+batt_failed_4:
+	kfree(bus);
+batt_failed_3:
+	kfree(di);
+batt_failed_2:
+	kfree(name);
+batt_failed_1:
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, num);
+	mutex_unlock(&battery_mutex);
+
+	return retval;
+}
+
+static int bq27200_battery_remove(struct i2c_client *client)
+{
+	struct bq27x00_device_info *di = i2c_get_clientdata(client);
+
+	power_supply_unregister(&di->bat);
+
+	kfree(di->bat.name);
+
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, di->id);
+	mutex_unlock(&battery_mutex);
+
+	kfree(di);
+
+	return 0;
+}
+
+/*
+ * Module stuff
+ */
+
+static const struct i2c_device_id bq27200_id[] = {
+	{ "bq27200", 0 },
+	{},
+};
+
+static struct i2c_driver bq27200_battery_driver = {
+	.driver = {
+		.name = "bq27200-battery",
+	},
+	.probe = bq27200_battery_probe,
+	.remove = bq27200_battery_remove,
+	.id_table = bq27200_id,
+};
+
+static int __init bq27x00_battery_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&bq27200_battery_driver);
+	if (ret)
+		printk(KERN_ERR "Unable to register BQ27200 driver\n");
+
+	return ret;
+}
+module_init(bq27x00_battery_init);
+
+static void __exit bq27x00_battery_exit(void)
+{
+	i2c_del_driver(&bq27200_battery_driver);
+}
+module_exit(bq27x00_battery_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("BQ27x00 battery monitor driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c
index 0471ec743ab..d30bb766fce 100644
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -334,13 +334,16 @@ static int pda_power_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
+static int ac_wakeup_enabled;
+static int usb_wakeup_enabled;
+
 static int pda_power_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	if (device_may_wakeup(&pdev->dev)) {
 		if (ac_irq)
-			enable_irq_wake(ac_irq->start);
+			ac_wakeup_enabled = !enable_irq_wake(ac_irq->start);
 		if (usb_irq)
-			enable_irq_wake(usb_irq->start);
+			usb_wakeup_enabled = !enable_irq_wake(usb_irq->start);
 	}
 
 	return 0;
@@ -349,9 +352,9 @@ static int pda_power_suspend(struct platform_device *pdev, pm_message_t state)
 static int pda_power_resume(struct platform_device *pdev)
 {
 	if (device_may_wakeup(&pdev->dev)) {
-		if (usb_irq)
+		if (usb_irq && usb_wakeup_enabled)
 			disable_irq_wake(usb_irq->start);
-		if (ac_irq)
+		if (ac_irq && ac_wakeup_enabled)
 			disable_irq_wake(ac_irq->start);
 	}
 
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 3007695f90c..5520040449c 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -87,6 +87,30 @@ int power_supply_am_i_supplied(struct power_supply *psy)
 	return error;
 }
 
+static int __power_supply_is_system_supplied(struct device *dev, void *data)
+{
+	union power_supply_propval ret = {0,};
+	struct power_supply *psy = dev_get_drvdata(dev);
+
+	if (psy->type != POWER_SUPPLY_TYPE_BATTERY) {
+		if (psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &ret))
+			return 0;
+		if (ret.intval)
+			return ret.intval;
+	}
+	return 0;
+}
+
+int power_supply_is_system_supplied(void)
+{
+	int error;
+
+	error = class_for_each_device(power_supply_class, NULL, NULL,
+				      __power_supply_is_system_supplied);
+
+	return error;
+}
+
 int power_supply_register(struct device *parent, struct power_supply *psy)
 {
 	int rc = 0;
@@ -148,6 +172,7 @@ static void __exit power_supply_class_exit(void)
 
 EXPORT_SYMBOL_GPL(power_supply_changed);
 EXPORT_SYMBOL_GPL(power_supply_am_i_supplied);
+EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);
 EXPORT_SYMBOL_GPL(power_supply_register);
 EXPORT_SYMBOL_GPL(power_supply_unregister);
 
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index fe2aeb11939..23ae8460f5c 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -30,7 +30,7 @@
 
 #define POWER_SUPPLY_ATTR(_name)					\
 {									\
-	.attr = { .name = #_name, .mode = 0444, .owner = THIS_MODULE },	\
+	.attr = { .name = #_name, .mode = 0444 },	\
 	.show = power_supply_show_property,				\
 	.store = NULL,							\
 }
diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c
index 6f2f90ebb02..06848b254d5 100644
--- a/drivers/ps3/ps3av.c
+++ b/drivers/ps3/ps3av.c
@@ -915,6 +915,22 @@ int ps3av_video_mute(int mute)
 
 EXPORT_SYMBOL_GPL(ps3av_video_mute);
 
+/* mute analog output only */
+int ps3av_audio_mute_analog(int mute)
+{
+	int i, res;
+
+	for (i = 0; i < ps3av->av_hw_conf.num_of_avmulti; i++) {
+		res = ps3av_cmd_av_audio_mute(1,
+			&ps3av->av_port[i + ps3av->av_hw_conf.num_of_hdmi],
+			mute);
+		if (res < 0)
+			return -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ps3av_audio_mute_analog);
+
 int ps3av_audio_mute(int mute)
 {
 	return ps3av_set_audio_mute(mute ? PS3AV_CMD_MUTE_ON
diff --git a/drivers/ps3/ps3av_cmd.c b/drivers/ps3/ps3av_cmd.c
index 7f880c26122..11eb50318fe 100644
--- a/drivers/ps3/ps3av_cmd.c
+++ b/drivers/ps3/ps3av_cmd.c
@@ -660,9 +660,10 @@ u32 ps3av_cmd_set_av_audio_param(void *p, u32 port,
 }
 
 /* default cs val */
-static const u8 ps3av_mode_cs_info[] = {
+u8 ps3av_mode_cs_info[] = {
 	0x00, 0x09, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00
 };
+EXPORT_SYMBOL_GPL(ps3av_mode_cs_info);
 
 #define CS_44	0x00
 #define CS_48	0x02
@@ -677,7 +678,7 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
 			      u32 ch, u32 fs, u32 word_bits, u32 format,
 			      u32 source)
 {
-	int spdif_through, spdif_bitstream;
+	int spdif_through;
 	int i;
 
 	if (!(ch | fs | format | word_bits | source)) {
@@ -687,7 +688,6 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
 		format = PS3AV_CMD_AUDIO_FORMAT_PCM;
 		source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
 	}
-	spdif_through = spdif_bitstream = 0;	/* XXX not supported */
 
 	/* audio mode */
 	memset(audio, 0, sizeof(*audio));
@@ -777,16 +777,17 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
 		break;
 	}
 
+	/* non-audio bit */
+	spdif_through = audio->audio_cs_info[0] & 0x02;
+
 	/* pass through setting */
 	if (spdif_through &&
 	    (avport == PS3AV_CMD_AVPORT_SPDIF_0 ||
-	     avport == PS3AV_CMD_AVPORT_SPDIF_1)) {
+	     avport == PS3AV_CMD_AVPORT_SPDIF_1 ||
+	     avport == PS3AV_CMD_AVPORT_HDMI_0 ||
+	     avport == PS3AV_CMD_AVPORT_HDMI_1)) {
 		audio->audio_word_bits = PS3AV_CMD_AUDIO_WORD_BITS_16;
-		audio->audio_source = PS3AV_CMD_AUDIO_SOURCE_SPDIF;
-		if (spdif_bitstream) {
-			audio->audio_format = PS3AV_CMD_AUDIO_FORMAT_BITSTREAM;
-			audio->audio_cs_info[0] |= CS_BIT;
-		}
+		audio->audio_format = PS3AV_CMD_AUDIO_FORMAT_BITSTREAM;
 	}
 }
 
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index f660ef3e5b2..847481dc8d7 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -246,6 +246,16 @@ config RTC_DRV_TWL92330
 	  platforms.  The support is integrated with the rest of
 	  the Menelaus driver; it's not separate module.
 
+config RTC_DRV_TWL4030
+	tristate "TI TWL4030/TWL5030/TPS659x0"
+	depends on RTC_CLASS && TWL4030_CORE
+	help
+	  If you say yes here you get support for the RTC on the
+	  TWL4030 family chips, used mostly with OMAP3 platforms.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-twl4030.
+
 config RTC_DRV_S35390A
 	tristate "Seiko Instruments S-35390A"
 	select BITREVERSE
@@ -610,6 +620,14 @@ config RTC_DRV_RS5C313
 	help
 	  If you say yes here you get support for the Ricoh RS5C313 RTC chips.
 
+config RTC_DRV_PARISC
+	tristate "PA-RISC firmware RTC support"
+	depends on PARISC
+	help
+	  Say Y or M here to enable RTC support on PA-RISC systems using
+	  firmware calls. If you do not know what you are doing, you should
+	  just say Y.
+
 config RTC_DRV_PPC
        tristate "PowerPC machine dependent RTC support"
        depends on PPC
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index d05928b3ca9..e9e8474cc8f 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PL030)	+= rtc-pl030.o
 obj-$(CONFIG_RTC_DRV_PL031)	+= rtc-pl031.o
+obj-$(CONFIG_RTC_DRV_PARISC)	+= rtc-parisc.o
 obj-$(CONFIG_RTC_DRV_PPC)	+= rtc-ppc.o
 obj-$(CONFIG_RTC_DRV_R9701)	+= rtc-r9701.o
 obj-$(CONFIG_RTC_DRV_RS5C313)	+= rtc-rs5c313.o
@@ -62,6 +63,7 @@ obj-$(CONFIG_RTC_DRV_SA1100)	+= rtc-sa1100.o
 obj-$(CONFIG_RTC_DRV_SH)	+= rtc-sh.o
 obj-$(CONFIG_RTC_DRV_STK17TA8)	+= rtc-stk17ta8.o
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
+obj-$(CONFIG_RTC_DRV_TWL4030)	+= rtc-twl4030.o
 obj-$(CONFIG_RTC_DRV_V3020)	+= rtc-v3020.o
 obj-$(CONFIG_RTC_DRV_VR41XX)	+= rtc-vr41xx.o
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 37082616482..b5bf9370691 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -53,21 +53,21 @@ static void at91_rtc_decodetime(unsigned int timereg, unsigned int calreg,
 	} while ((time != at91_sys_read(timereg)) ||
 			(date != at91_sys_read(calreg)));
 
-	tm->tm_sec  = BCD2BIN((time & AT91_RTC_SEC) >> 0);
-	tm->tm_min  = BCD2BIN((time & AT91_RTC_MIN) >> 8);
-	tm->tm_hour = BCD2BIN((time & AT91_RTC_HOUR) >> 16);
+	tm->tm_sec  = bcd2bin((time & AT91_RTC_SEC) >> 0);
+	tm->tm_min  = bcd2bin((time & AT91_RTC_MIN) >> 8);
+	tm->tm_hour = bcd2bin((time & AT91_RTC_HOUR) >> 16);
 
 	/*
 	 * The Calendar Alarm register does not have a field for
 	 * the year - so these will return an invalid value.  When an
 	 * alarm is set, at91_alarm_year wille store the current year.
 	 */
-	tm->tm_year  = BCD2BIN(date & AT91_RTC_CENT) * 100;	/* century */
-	tm->tm_year += BCD2BIN((date & AT91_RTC_YEAR) >> 8);	/* year */
+	tm->tm_year  = bcd2bin(date & AT91_RTC_CENT) * 100;	/* century */
+	tm->tm_year += bcd2bin((date & AT91_RTC_YEAR) >> 8);	/* year */
 
-	tm->tm_wday = BCD2BIN((date & AT91_RTC_DAY) >> 21) - 1;	/* day of the week [0-6], Sunday=0 */
-	tm->tm_mon  = BCD2BIN((date & AT91_RTC_MONTH) >> 16) - 1;
-	tm->tm_mday = BCD2BIN((date & AT91_RTC_DATE) >> 24);
+	tm->tm_wday = bcd2bin((date & AT91_RTC_DAY) >> 21) - 1;	/* day of the week [0-6], Sunday=0 */
+	tm->tm_mon  = bcd2bin((date & AT91_RTC_MONTH) >> 16) - 1;
+	tm->tm_mday = bcd2bin((date & AT91_RTC_DATE) >> 24);
 }
 
 /*
@@ -106,16 +106,16 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm)
 	at91_sys_write(AT91_RTC_IDR, AT91_RTC_ACKUPD);
 
 	at91_sys_write(AT91_RTC_TIMR,
-			  BIN2BCD(tm->tm_sec) << 0
-			| BIN2BCD(tm->tm_min) << 8
-			| BIN2BCD(tm->tm_hour) << 16);
+			  bin2bcd(tm->tm_sec) << 0
+			| bin2bcd(tm->tm_min) << 8
+			| bin2bcd(tm->tm_hour) << 16);
 
 	at91_sys_write(AT91_RTC_CALR,
-			  BIN2BCD((tm->tm_year + 1900) / 100)	/* century */
-			| BIN2BCD(tm->tm_year % 100) << 8	/* year */
-			| BIN2BCD(tm->tm_mon + 1) << 16		/* tm_mon starts at zero */
-			| BIN2BCD(tm->tm_wday + 1) << 21	/* day of the week [0-6], Sunday=0 */
-			| BIN2BCD(tm->tm_mday) << 24);
+			  bin2bcd((tm->tm_year + 1900) / 100)	/* century */
+			| bin2bcd(tm->tm_year % 100) << 8	/* year */
+			| bin2bcd(tm->tm_mon + 1) << 16		/* tm_mon starts at zero */
+			| bin2bcd(tm->tm_wday + 1) << 21	/* day of the week [0-6], Sunday=0 */
+			| bin2bcd(tm->tm_mday) << 24);
 
 	/* Restart Time/Calendar */
 	cr = at91_sys_read(AT91_RTC_CR);
@@ -162,13 +162,13 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	at91_sys_write(AT91_RTC_IDR, AT91_RTC_ALARM);
 	at91_sys_write(AT91_RTC_TIMALR,
-		  BIN2BCD(tm.tm_sec) << 0
-		| BIN2BCD(tm.tm_min) << 8
-		| BIN2BCD(tm.tm_hour) << 16
+		  bin2bcd(tm.tm_sec) << 0
+		| bin2bcd(tm.tm_min) << 8
+		| bin2bcd(tm.tm_hour) << 16
 		| AT91_RTC_HOUREN | AT91_RTC_MINEN | AT91_RTC_SECEN);
 	at91_sys_write(AT91_RTC_CALALR,
-		  BIN2BCD(tm.tm_mon + 1) << 16		/* tm_mon starts at zero */
-		| BIN2BCD(tm.tm_mday) << 24
+		  bin2bcd(tm.tm_mon + 1) << 16		/* tm_mon starts at zero */
+		| bin2bcd(tm.tm_mday) << 24
 		| AT91_RTC_DATEEN | AT91_RTC_MTHEN);
 
 	if (alrm->enabled) {
diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c
index 189a018bdf3..d00a274df8f 100644
--- a/drivers/rtc/rtc-bq4802.c
+++ b/drivers/rtc/rtc-bq4802.c
@@ -71,14 +71,14 @@ static int bq4802_read_time(struct device *dev, struct rtc_time *tm)
 
 	spin_unlock_irqrestore(&p->lock, flags);
 
-	BCD_TO_BIN(tm->tm_sec);
-	BCD_TO_BIN(tm->tm_min);
-	BCD_TO_BIN(tm->tm_hour);
-	BCD_TO_BIN(tm->tm_mday);
-	BCD_TO_BIN(tm->tm_mon);
-	BCD_TO_BIN(tm->tm_year);
-	BCD_TO_BIN(tm->tm_wday);
-	BCD_TO_BIN(century);
+	tm->tm_sec = bcd2bin(tm->tm_sec);
+	tm->tm_min = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon = bcd2bin(tm->tm_mon);
+	tm->tm_year = bcd2bin(tm->tm_year);
+	tm->tm_wday = bcd2bin(tm->tm_wday);
+	century = bcd2bin(century);
 
 	tm->tm_year += (century * 100);
 	tm->tm_year -= 1900;
@@ -106,13 +106,13 @@ static int bq4802_set_time(struct device *dev, struct rtc_time *tm)
 	min = tm->tm_min;
 	sec = tm->tm_sec;
 
-	BIN_TO_BCD(sec);
-	BIN_TO_BCD(min);
-	BIN_TO_BCD(hrs);
-	BIN_TO_BCD(day);
-	BIN_TO_BCD(mon);
-	BIN_TO_BCD(yrs);
-	BIN_TO_BCD(century);
+	sec = bin2bcd(sec);
+	min = bin2bcd(min);
+	hrs = bin2bcd(hrs);
+	day = bin2bcd(day);
+	mon = bin2bcd(mon);
+	yrs = bin2bcd(yrs);
+	century = bin2bcd(century);
 
 	spin_lock_irqsave(&p->lock, flags);
 
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 963ad0b6a4e..5549231179a 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -143,6 +143,43 @@ static inline int hpet_unregister_irq_handler(irq_handler_t handler)
 
 /*----------------------------------------------------------------*/
 
+#ifdef RTC_PORT
+
+/* Most newer x86 systems have two register banks, the first used
+ * for RTC and NVRAM and the second only for NVRAM.  Caller must
+ * own rtc_lock ... and we won't worry about access during NMI.
+ */
+#define can_bank2	true
+
+static inline unsigned char cmos_read_bank2(unsigned char addr)
+{
+	outb(addr, RTC_PORT(2));
+	return inb(RTC_PORT(3));
+}
+
+static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+{
+	outb(addr, RTC_PORT(2));
+	outb(val, RTC_PORT(2));
+}
+
+#else
+
+#define can_bank2	false
+
+static inline unsigned char cmos_read_bank2(unsigned char addr)
+{
+	return 0;
+}
+
+static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+{
+}
+
+#endif
+
+/*----------------------------------------------------------------*/
+
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
 {
 	/* REVISIT:  if the clock has a "century" register, use
@@ -203,26 +240,26 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 	/* REVISIT this assumes PC style usage:  always BCD */
 
 	if (((unsigned)t->time.tm_sec) < 0x60)
-		t->time.tm_sec = BCD2BIN(t->time.tm_sec);
+		t->time.tm_sec = bcd2bin(t->time.tm_sec);
 	else
 		t->time.tm_sec = -1;
 	if (((unsigned)t->time.tm_min) < 0x60)
-		t->time.tm_min = BCD2BIN(t->time.tm_min);
+		t->time.tm_min = bcd2bin(t->time.tm_min);
 	else
 		t->time.tm_min = -1;
 	if (((unsigned)t->time.tm_hour) < 0x24)
-		t->time.tm_hour = BCD2BIN(t->time.tm_hour);
+		t->time.tm_hour = bcd2bin(t->time.tm_hour);
 	else
 		t->time.tm_hour = -1;
 
 	if (cmos->day_alrm) {
 		if (((unsigned)t->time.tm_mday) <= 0x31)
-			t->time.tm_mday = BCD2BIN(t->time.tm_mday);
+			t->time.tm_mday = bcd2bin(t->time.tm_mday);
 		else
 			t->time.tm_mday = -1;
 		if (cmos->mon_alrm) {
 			if (((unsigned)t->time.tm_mon) <= 0x12)
-				t->time.tm_mon = BCD2BIN(t->time.tm_mon) - 1;
+				t->time.tm_mon = bcd2bin(t->time.tm_mon) - 1;
 			else
 				t->time.tm_mon = -1;
 		}
@@ -294,19 +331,19 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	/* Writing 0xff means "don't care" or "match all".  */
 
 	mon = t->time.tm_mon + 1;
-	mon = (mon <= 12) ? BIN2BCD(mon) : 0xff;
+	mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
 
 	mday = t->time.tm_mday;
-	mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff;
+	mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
 
 	hrs = t->time.tm_hour;
-	hrs = (hrs < 24) ? BIN2BCD(hrs) : 0xff;
+	hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
 
 	min = t->time.tm_min;
-	min = (min < 60) ? BIN2BCD(min) : 0xff;
+	min = (min < 60) ? bin2bcd(min) : 0xff;
 
 	sec = t->time.tm_sec;
-	sec = (sec < 60) ? BIN2BCD(sec) : 0xff;
+	sec = (sec < 60) ? bin2bcd(sec) : 0xff;
 
 	spin_lock_irq(&rtc_lock);
 
@@ -491,12 +528,21 @@ cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 
 	if (unlikely(off >= attr->size))
 		return 0;
+	if (unlikely(off < 0))
+		return -EINVAL;
 	if ((off + count) > attr->size)
 		count = attr->size - off;
 
+	off += NVRAM_OFFSET;
 	spin_lock_irq(&rtc_lock);
-	for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++)
-		*buf++ = CMOS_READ(off);
+	for (retval = 0; count; count--, off++, retval++) {
+		if (off < 128)
+			*buf++ = CMOS_READ(off);
+		else if (can_bank2)
+			*buf++ = cmos_read_bank2(off);
+		else
+			break;
+	}
 	spin_unlock_irq(&rtc_lock);
 
 	return retval;
@@ -512,6 +558,8 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
 	cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
 	if (unlikely(off >= attr->size))
 		return -EFBIG;
+	if (unlikely(off < 0))
+		return -EINVAL;
 	if ((off + count) > attr->size)
 		count = attr->size - off;
 
@@ -520,15 +568,20 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
 	 * here.  If userspace is smart enough to know what fields of
 	 * NVRAM to update, updating checksums is also part of its job.
 	 */
+	off += NVRAM_OFFSET;
 	spin_lock_irq(&rtc_lock);
-	for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++) {
+	for (retval = 0; count; count--, off++, retval++) {
 		/* don't trash RTC registers */
 		if (off == cmos->day_alrm
 				|| off == cmos->mon_alrm
 				|| off == cmos->century)
 			buf++;
-		else
+		else if (off < 128)
 			CMOS_WRITE(*buf++, off);
+		else if (can_bank2)
+			cmos_write_bank2(*buf++, off);
+		else
+			break;
 	}
 	spin_unlock_irq(&rtc_lock);
 
@@ -539,7 +592,6 @@ static struct bin_attribute nvram = {
 	.attr = {
 		.name	= "nvram",
 		.mode	= S_IRUGO | S_IWUSR,
-		.owner	= THIS_MODULE,
 	},
 
 	.read	= cmos_nvram_read,
@@ -631,8 +683,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 
 	/* Heuristic to deduce NVRAM size ... do what the legacy NVRAM
 	 * driver did, but don't reject unknown configs.   Old hardware
-	 * won't address 128 bytes, and for now we ignore the way newer
-	 * chips can address 256 bytes (using two more i/o ports).
+	 * won't address 128 bytes.  Newer chips have multiple banks,
+	 * though they may not be listed in one I/O resource.
 	 */
 #if	defined(CONFIG_ATARI)
 	address_space = 64;
@@ -642,6 +694,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 #warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes.
 	address_space = 128;
 #endif
+	if (can_bank2 && ports->end > (ports->start + 1))
+		address_space = 256;
 
 	/* For ACPI systems extension info comes from the FADT.  On others,
 	 * board specific setup provides it as appropriate.  Systems where
@@ -740,7 +794,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 		goto cleanup2;
 	}
 
-	pr_info("%s: alarms up to one %s%s%s\n",
+	pr_info("%s: alarms up to one %s%s, %zd bytes nvram, %s irqs\n",
 			cmos_rtc.rtc->dev.bus_id,
 			is_valid_irq(rtc_irq)
 				?  (cmos_rtc.mon_alrm
@@ -749,6 +803,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 						? "month" : "day"))
 				: "no",
 			cmos_rtc.century ? ", y3k" : "",
+			nvram.size,
 			is_hpet_enabled() ? ", hpet irqs" : "");
 
 	return 0;
diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c
index 0b17770b032..9a234a4ec06 100644
--- a/drivers/rtc/rtc-ds1216.c
+++ b/drivers/rtc/rtc-ds1216.c
@@ -86,19 +86,19 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	ds1216_switch_ds_to_clock(priv->ioaddr);
 	ds1216_read(priv->ioaddr, (u8 *)&regs);
 
-	tm->tm_sec = BCD2BIN(regs.sec);
-	tm->tm_min = BCD2BIN(regs.min);
+	tm->tm_sec = bcd2bin(regs.sec);
+	tm->tm_min = bcd2bin(regs.min);
 	if (regs.hour & DS1216_HOUR_1224) {
 		/* AM/PM mode */
-		tm->tm_hour = BCD2BIN(regs.hour & 0x1f);
+		tm->tm_hour = bcd2bin(regs.hour & 0x1f);
 		if (regs.hour & DS1216_HOUR_AMPM)
 			tm->tm_hour += 12;
 	} else
-		tm->tm_hour = BCD2BIN(regs.hour & 0x3f);
+		tm->tm_hour = bcd2bin(regs.hour & 0x3f);
 	tm->tm_wday = (regs.wday & 7) - 1;
-	tm->tm_mday = BCD2BIN(regs.mday & 0x3f);
-	tm->tm_mon = BCD2BIN(regs.month & 0x1f);
-	tm->tm_year = BCD2BIN(regs.year);
+	tm->tm_mday = bcd2bin(regs.mday & 0x3f);
+	tm->tm_mon = bcd2bin(regs.month & 0x1f);
+	tm->tm_year = bcd2bin(regs.year);
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
 	return 0;
@@ -114,19 +114,19 @@ static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	ds1216_read(priv->ioaddr, (u8 *)&regs);
 
 	regs.tsec = 0; /* clear 0.1 and 0.01 seconds */
-	regs.sec = BIN2BCD(tm->tm_sec);
-	regs.min = BIN2BCD(tm->tm_min);
+	regs.sec = bin2bcd(tm->tm_sec);
+	regs.min = bin2bcd(tm->tm_min);
 	regs.hour &= DS1216_HOUR_1224;
 	if (regs.hour && tm->tm_hour > 12) {
 		regs.hour |= DS1216_HOUR_AMPM;
 		tm->tm_hour -= 12;
 	}
-	regs.hour |= BIN2BCD(tm->tm_hour);
+	regs.hour |= bin2bcd(tm->tm_hour);
 	regs.wday &= ~7;
 	regs.wday |= tm->tm_wday;
-	regs.mday = BIN2BCD(tm->tm_mday);
-	regs.month = BIN2BCD(tm->tm_mon);
-	regs.year = BIN2BCD(tm->tm_year % 100);
+	regs.mday = bin2bcd(tm->tm_mday);
+	regs.month = bin2bcd(tm->tm_mon);
+	regs.year = bin2bcd(tm->tm_year % 100);
 
 	ds1216_switch_ds_to_clock(priv->ioaddr);
 	ds1216_write(priv->ioaddr, (u8 *)&regs);
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index b9397818f73..18455662077 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -40,7 +40,7 @@
 #define	RTC_SCLK	0x0400
 
 #ifdef CONFIG_SH_SECUREEDGE5410
-#include <asm/snapgear.h>
+#include <mach/snapgear.h>
 #define set_dp(x)	SECUREEDGE_WRITE_IOPORT(x, 0x1c00)
 #define get_dp()	SECUREEDGE_READ_IOPORT()
 #else
@@ -107,13 +107,13 @@ static int ds1302_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	spin_lock_irq(&rtc->lock);
 
-	tm->tm_sec	= BCD2BIN(ds1302_readbyte(RTC_ADDR_SEC));
-	tm->tm_min	= BCD2BIN(ds1302_readbyte(RTC_ADDR_MIN));
-	tm->tm_hour	= BCD2BIN(ds1302_readbyte(RTC_ADDR_HOUR));
-	tm->tm_wday	= BCD2BIN(ds1302_readbyte(RTC_ADDR_DAY));
-	tm->tm_mday	= BCD2BIN(ds1302_readbyte(RTC_ADDR_DATE));
-	tm->tm_mon	= BCD2BIN(ds1302_readbyte(RTC_ADDR_MON)) - 1;
-	tm->tm_year	= BCD2BIN(ds1302_readbyte(RTC_ADDR_YEAR));
+	tm->tm_sec	= bcd2bin(ds1302_readbyte(RTC_ADDR_SEC));
+	tm->tm_min	= bcd2bin(ds1302_readbyte(RTC_ADDR_MIN));
+	tm->tm_hour	= bcd2bin(ds1302_readbyte(RTC_ADDR_HOUR));
+	tm->tm_wday	= bcd2bin(ds1302_readbyte(RTC_ADDR_DAY));
+	tm->tm_mday	= bcd2bin(ds1302_readbyte(RTC_ADDR_DATE));
+	tm->tm_mon	= bcd2bin(ds1302_readbyte(RTC_ADDR_MON)) - 1;
+	tm->tm_year	= bcd2bin(ds1302_readbyte(RTC_ADDR_YEAR));
 
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
@@ -141,13 +141,13 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	/* Stop RTC */
 	ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) | 0x80);
 
-	ds1302_writebyte(RTC_ADDR_SEC, BIN2BCD(tm->tm_sec));
-	ds1302_writebyte(RTC_ADDR_MIN, BIN2BCD(tm->tm_min));
-	ds1302_writebyte(RTC_ADDR_HOUR, BIN2BCD(tm->tm_hour));
-	ds1302_writebyte(RTC_ADDR_DAY, BIN2BCD(tm->tm_wday));
-	ds1302_writebyte(RTC_ADDR_DATE, BIN2BCD(tm->tm_mday));
-	ds1302_writebyte(RTC_ADDR_MON, BIN2BCD(tm->tm_mon + 1));
-	ds1302_writebyte(RTC_ADDR_YEAR, BIN2BCD(tm->tm_year % 100));
+	ds1302_writebyte(RTC_ADDR_SEC, bin2bcd(tm->tm_sec));
+	ds1302_writebyte(RTC_ADDR_MIN, bin2bcd(tm->tm_min));
+	ds1302_writebyte(RTC_ADDR_HOUR, bin2bcd(tm->tm_hour));
+	ds1302_writebyte(RTC_ADDR_DAY, bin2bcd(tm->tm_wday));
+	ds1302_writebyte(RTC_ADDR_DATE, bin2bcd(tm->tm_mday));
+	ds1302_writebyte(RTC_ADDR_MON, bin2bcd(tm->tm_mon + 1));
+	ds1302_writebyte(RTC_ADDR_YEAR, bin2bcd(tm->tm_year % 100));
 
 	/* Start RTC */
 	ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) & ~0x80);
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index b91d02a3ace..fc372df6534 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -114,10 +114,10 @@ static unsigned bcd2hour(u8 bcd)
 			hour = 12;
 			bcd &= ~DS1305_HR_PM;
 		}
-		hour += BCD2BIN(bcd);
+		hour += bcd2bin(bcd);
 		return hour - 1;
 	}
-	return BCD2BIN(bcd);
+	return bcd2bin(bcd);
 }
 
 static u8 hour2bcd(bool hr12, int hour)
@@ -125,11 +125,11 @@ static u8 hour2bcd(bool hr12, int hour)
 	if (hr12) {
 		hour++;
 		if (hour <= 12)
-			return DS1305_HR_12 | BIN2BCD(hour);
+			return DS1305_HR_12 | bin2bcd(hour);
 		hour -= 12;
-		return DS1305_HR_12 | DS1305_HR_PM | BIN2BCD(hour);
+		return DS1305_HR_12 | DS1305_HR_PM | bin2bcd(hour);
 	}
-	return BIN2BCD(hour);
+	return bin2bcd(hour);
 }
 
 /*----------------------------------------------------------------------*/
@@ -206,13 +206,13 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time)
 		buf[4], buf[5], buf[6]);
 
 	/* Decode the registers */
-	time->tm_sec = BCD2BIN(buf[DS1305_SEC]);
-	time->tm_min = BCD2BIN(buf[DS1305_MIN]);
+	time->tm_sec = bcd2bin(buf[DS1305_SEC]);
+	time->tm_min = bcd2bin(buf[DS1305_MIN]);
 	time->tm_hour = bcd2hour(buf[DS1305_HOUR]);
 	time->tm_wday = buf[DS1305_WDAY] - 1;
-	time->tm_mday = BCD2BIN(buf[DS1305_MDAY]);
-	time->tm_mon = BCD2BIN(buf[DS1305_MON]) - 1;
-	time->tm_year = BCD2BIN(buf[DS1305_YEAR]) + 100;
+	time->tm_mday = bcd2bin(buf[DS1305_MDAY]);
+	time->tm_mon = bcd2bin(buf[DS1305_MON]) - 1;
+	time->tm_year = bcd2bin(buf[DS1305_YEAR]) + 100;
 
 	dev_vdbg(dev, "%s secs=%d, mins=%d, "
 		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -239,13 +239,13 @@ static int ds1305_set_time(struct device *dev, struct rtc_time *time)
 	/* Write registers starting at the first time/date address. */
 	*bp++ = DS1305_WRITE | DS1305_SEC;
 
-	*bp++ = BIN2BCD(time->tm_sec);
-	*bp++ = BIN2BCD(time->tm_min);
+	*bp++ = bin2bcd(time->tm_sec);
+	*bp++ = bin2bcd(time->tm_min);
 	*bp++ = hour2bcd(ds1305->hr12, time->tm_hour);
 	*bp++ = (time->tm_wday < 7) ? (time->tm_wday + 1) : 1;
-	*bp++ = BIN2BCD(time->tm_mday);
-	*bp++ = BIN2BCD(time->tm_mon + 1);
-	*bp++ = BIN2BCD(time->tm_year - 100);
+	*bp++ = bin2bcd(time->tm_mday);
+	*bp++ = bin2bcd(time->tm_mon + 1);
+	*bp++ = bin2bcd(time->tm_year - 100);
 
 	dev_dbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
 		"write", buf[1], buf[2], buf[3],
@@ -329,8 +329,8 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm)
 	 * fill in the rest ... and also handle rollover to tomorrow when
 	 * that's needed.
 	 */
-	alm->time.tm_sec = BCD2BIN(buf[DS1305_SEC]);
-	alm->time.tm_min = BCD2BIN(buf[DS1305_MIN]);
+	alm->time.tm_sec = bcd2bin(buf[DS1305_SEC]);
+	alm->time.tm_min = bcd2bin(buf[DS1305_MIN]);
 	alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]);
 	alm->time.tm_mday = -1;
 	alm->time.tm_mon = -1;
@@ -387,8 +387,8 @@ static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 
 	/* write alarm */
 	buf[0] = DS1305_WRITE | DS1305_ALM0(DS1305_SEC);
-	buf[1 + DS1305_SEC] = BIN2BCD(alm->time.tm_sec);
-	buf[1 + DS1305_MIN] = BIN2BCD(alm->time.tm_min);
+	buf[1 + DS1305_SEC] = bin2bcd(alm->time.tm_sec);
+	buf[1 + DS1305_MIN] = bin2bcd(alm->time.tm_min);
 	buf[1 + DS1305_HOUR] = hour2bcd(ds1305->hr12, alm->time.tm_hour);
 	buf[1 + DS1305_WDAY] = DS1305_ALM_DISABLE;
 
@@ -606,7 +606,6 @@ ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
 static struct bin_attribute nvram = {
 	.attr.name	= "nvram",
 	.attr.mode	= S_IRUGO | S_IWUSR,
-	.attr.owner	= THIS_MODULE,
 	.read		= ds1305_nvram_read,
 	.write		= ds1305_nvram_write,
 	.size		= DS1305_NVRAM_LEN,
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 4fcf0734a6e..162330b9d1d 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -222,17 +222,17 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 			ds1307->regs[4], ds1307->regs[5],
 			ds1307->regs[6]);
 
-	t->tm_sec = BCD2BIN(ds1307->regs[DS1307_REG_SECS] & 0x7f);
-	t->tm_min = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f);
+	t->tm_sec = bcd2bin(ds1307->regs[DS1307_REG_SECS] & 0x7f);
+	t->tm_min = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
 	tmp = ds1307->regs[DS1307_REG_HOUR] & 0x3f;
-	t->tm_hour = BCD2BIN(tmp);
-	t->tm_wday = BCD2BIN(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
-	t->tm_mday = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
+	t->tm_hour = bcd2bin(tmp);
+	t->tm_wday = bcd2bin(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
+	t->tm_mday = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
 	tmp = ds1307->regs[DS1307_REG_MONTH] & 0x1f;
-	t->tm_mon = BCD2BIN(tmp) - 1;
+	t->tm_mon = bcd2bin(tmp) - 1;
 
 	/* assume 20YY not 19YY, and ignore DS1337_BIT_CENTURY */
-	t->tm_year = BCD2BIN(ds1307->regs[DS1307_REG_YEAR]) + 100;
+	t->tm_year = bcd2bin(ds1307->regs[DS1307_REG_YEAR]) + 100;
 
 	dev_dbg(dev, "%s secs=%d, mins=%d, "
 		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -258,16 +258,16 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 		t->tm_mon, t->tm_year, t->tm_wday);
 
 	*buf++ = 0;		/* first register addr */
-	buf[DS1307_REG_SECS] = BIN2BCD(t->tm_sec);
-	buf[DS1307_REG_MIN] = BIN2BCD(t->tm_min);
-	buf[DS1307_REG_HOUR] = BIN2BCD(t->tm_hour);
-	buf[DS1307_REG_WDAY] = BIN2BCD(t->tm_wday + 1);
-	buf[DS1307_REG_MDAY] = BIN2BCD(t->tm_mday);
-	buf[DS1307_REG_MONTH] = BIN2BCD(t->tm_mon + 1);
+	buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec);
+	buf[DS1307_REG_MIN] = bin2bcd(t->tm_min);
+	buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour);
+	buf[DS1307_REG_WDAY] = bin2bcd(t->tm_wday + 1);
+	buf[DS1307_REG_MDAY] = bin2bcd(t->tm_mday);
+	buf[DS1307_REG_MONTH] = bin2bcd(t->tm_mon + 1);
 
 	/* assume 20YY not 19YY */
 	tmp = t->tm_year - 100;
-	buf[DS1307_REG_YEAR] = BIN2BCD(tmp);
+	buf[DS1307_REG_YEAR] = bin2bcd(tmp);
 
 	switch (ds1307->type) {
 	case ds_1337:
@@ -551,7 +551,6 @@ static struct bin_attribute nvram = {
 	.attr = {
 		.name	= "nvram",
 		.mode	= S_IRUGO | S_IWUSR,
-		.owner	= THIS_MODULE,
 	},
 
 	.read	= ds1307_nvram_read,
@@ -709,18 +708,18 @@ read_rtc:
 	}
 
 	tmp = ds1307->regs[DS1307_REG_SECS];
-	tmp = BCD2BIN(tmp & 0x7f);
+	tmp = bcd2bin(tmp & 0x7f);
 	if (tmp > 60)
 		goto exit_bad;
-	tmp = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f);
+	tmp = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
 	if (tmp > 60)
 		goto exit_bad;
 
-	tmp = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
+	tmp = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
 	if (tmp == 0 || tmp > 31)
 		goto exit_bad;
 
-	tmp = BCD2BIN(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
+	tmp = bcd2bin(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
 	if (tmp == 0 || tmp > 12)
 		goto exit_bad;
 
@@ -739,14 +738,14 @@ read_rtc:
 		/* Be sure we're in 24 hour mode.  Multi-master systems
 		 * take note...
 		 */
-		tmp = BCD2BIN(tmp & 0x1f);
+		tmp = bcd2bin(tmp & 0x1f);
 		if (tmp == 12)
 			tmp = 0;
 		if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
 			tmp += 12;
 		i2c_smbus_write_byte_data(client,
 				DS1307_REG_HOUR,
-				BIN2BCD(tmp));
+				bin2bcd(tmp));
 	}
 
 	ds1307->rtc = rtc_device_register(client->name, &client->dev,
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 86981d34fbb..25caada7839 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -153,8 +153,8 @@ ds1511_wdog_set(unsigned long deciseconds)
 	/*
 	 * set the wdog values in the wdog registers
 	 */
-	rtc_write(BIN2BCD(deciseconds % 100), DS1511_WD_MSEC);
-	rtc_write(BIN2BCD(deciseconds / 100), DS1511_WD_SEC);
+	rtc_write(bin2bcd(deciseconds % 100), DS1511_WD_MSEC);
+	rtc_write(bin2bcd(deciseconds / 100), DS1511_WD_SEC);
 	/*
 	 * set wdog enable and wdog 'steering' bit to issue a reset
 	 */
@@ -220,13 +220,13 @@ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
 	/*
 	 * each register is a different number of valid bits
 	 */
-	sec = BIN2BCD(sec) & 0x7f;
-	min = BIN2BCD(min) & 0x7f;
-	hrs = BIN2BCD(hrs) & 0x3f;
-	day = BIN2BCD(day) & 0x3f;
-	mon = BIN2BCD(mon) & 0x1f;
-	yrs = BIN2BCD(yrs) & 0xff;
-	cen = BIN2BCD(cen) & 0xff;
+	sec = bin2bcd(sec) & 0x7f;
+	min = bin2bcd(min) & 0x7f;
+	hrs = bin2bcd(hrs) & 0x3f;
+	day = bin2bcd(day) & 0x3f;
+	mon = bin2bcd(mon) & 0x1f;
+	yrs = bin2bcd(yrs) & 0xff;
+	cen = bin2bcd(cen) & 0xff;
 
 	spin_lock_irqsave(&ds1511_lock, flags);
 	rtc_disable_update();
@@ -264,14 +264,14 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_enable_update();
 	spin_unlock_irqrestore(&ds1511_lock, flags);
 
-	rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec);
-	rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min);
-	rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour);
-	rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday);
-	rtc_tm->tm_wday = BCD2BIN(rtc_tm->tm_wday);
-	rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon);
-	rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year);
-	century = BCD2BIN(century) * 100;
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
+	century = bcd2bin(century) * 100;
 
 	/*
 	 * Account for differences between how the RTC uses the values
@@ -304,16 +304,16 @@ ds1511_rtc_update_alarm(struct rtc_plat_data *pdata)
 
 	spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
 	rtc_write(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_mday) & 0x3f,
+	       0x80 : bin2bcd(pdata->alrm_mday) & 0x3f,
 	       RTC_ALARM_DATE);
 	rtc_write(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_hour) & 0x3f,
+	       0x80 : bin2bcd(pdata->alrm_hour) & 0x3f,
 	       RTC_ALARM_HOUR);
 	rtc_write(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_min) & 0x7f,
+	       0x80 : bin2bcd(pdata->alrm_min) & 0x7f,
 	       RTC_ALARM_MIN);
 	rtc_write(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_sec) & 0x7f,
+	       0x80 : bin2bcd(pdata->alrm_sec) & 0x7f,
 	       RTC_ALARM_SEC);
 	rtc_write(rtc_read(RTC_CMD) | (pdata->irqen ? RTC_TIE : 0), RTC_CMD);
 	rtc_read(RTC_CMD1);	/* clear interrupts */
@@ -481,7 +481,6 @@ static struct bin_attribute ds1511_nvram_attr = {
 	.attr = {
 		.name = "nvram",
 		.mode = S_IRUGO | S_IWUGO,
-		.owner = THIS_MODULE,
 	},
 	.size = DS1511_RAM_MAX,
 	.read = ds1511_nvram_read,
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 4ef59285b48..b9475cd2021 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -78,17 +78,17 @@ static int ds1553_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	void __iomem *ioaddr = pdata->ioaddr;
 	u8 century;
 
-	century = BIN2BCD((tm->tm_year + 1900) / 100);
+	century = bin2bcd((tm->tm_year + 1900) / 100);
 
 	writeb(RTC_WRITE, pdata->ioaddr + RTC_CONTROL);
 
-	writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-	writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-	writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-	writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-	writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-	writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-	writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+	writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+	writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+	writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+	writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+	writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+	writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+	writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
 
 	/* RTC_CENTURY and RTC_CONTROL share same register */
 	writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -118,14 +118,14 @@ static int ds1553_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	year = readb(ioaddr + RTC_YEAR);
 	century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
 	writeb(0, ioaddr + RTC_CONTROL);
-	tm->tm_sec = BCD2BIN(second);
-	tm->tm_min = BCD2BIN(minute);
-	tm->tm_hour = BCD2BIN(hour);
-	tm->tm_mday = BCD2BIN(day);
-	tm->tm_wday = BCD2BIN(week);
-	tm->tm_mon = BCD2BIN(month) - 1;
+	tm->tm_sec = bcd2bin(second);
+	tm->tm_min = bcd2bin(minute);
+	tm->tm_hour = bcd2bin(hour);
+	tm->tm_mday = bcd2bin(day);
+	tm->tm_wday = bcd2bin(week);
+	tm->tm_mon = bcd2bin(month) - 1;
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
 	if (rtc_valid_tm(tm) < 0) {
 		dev_err(dev, "retrieved date/time is not valid.\n");
@@ -141,16 +141,16 @@ static void ds1553_rtc_update_alarm(struct rtc_plat_data *pdata)
 
 	spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
 	writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_mday),
+	       0x80 : bin2bcd(pdata->alrm_mday),
 	       ioaddr + RTC_DATE_ALARM);
 	writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_hour),
+	       0x80 : bin2bcd(pdata->alrm_hour),
 	       ioaddr + RTC_HOURS_ALARM);
 	writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_min),
+	       0x80 : bin2bcd(pdata->alrm_min),
 	       ioaddr + RTC_MINUTES_ALARM);
 	writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_sec),
+	       0x80 : bin2bcd(pdata->alrm_sec),
 	       ioaddr + RTC_SECONDS_ALARM);
 	writeb(pdata->irqen ? RTC_INTS_AE : 0, ioaddr + RTC_INTERRUPTS);
 	readb(ioaddr + RTC_FLAGS);	/* clear interrupts */
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 24d35ede2db..8bc8501bffc 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -66,17 +66,17 @@ static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	void __iomem *ioaddr = pdata->ioaddr_rtc;
 	u8 century;
 
-	century = BIN2BCD((tm->tm_year + 1900) / 100);
+	century = bin2bcd((tm->tm_year + 1900) / 100);
 
 	writeb(RTC_WRITE, ioaddr + RTC_CONTROL);
 
-	writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-	writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-	writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-	writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-	writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-	writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-	writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+	writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+	writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+	writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+	writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+	writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+	writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+	writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
 
 	/* RTC_CENTURY and RTC_CONTROL share same register */
 	writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -106,14 +106,14 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	year = readb(ioaddr + RTC_YEAR);
 	century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
 	writeb(0, ioaddr + RTC_CONTROL);
-	tm->tm_sec = BCD2BIN(second);
-	tm->tm_min = BCD2BIN(minute);
-	tm->tm_hour = BCD2BIN(hour);
-	tm->tm_mday = BCD2BIN(day);
-	tm->tm_wday = BCD2BIN(week);
-	tm->tm_mon = BCD2BIN(month) - 1;
+	tm->tm_sec = bcd2bin(second);
+	tm->tm_min = bcd2bin(minute);
+	tm->tm_hour = bcd2bin(hour);
+	tm->tm_mday = bcd2bin(day);
+	tm->tm_wday = bcd2bin(week);
+	tm->tm_mon = bcd2bin(month) - 1;
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
 	if (rtc_valid_tm(tm) < 0) {
 		dev_err(dev, "retrieved date/time is not valid.\n");
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index abfdfcbaa05..3a7be11cc6b 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -131,17 +131,17 @@ static int fm3130_get_time(struct device *dev, struct rtc_time *t)
 			fm3130->regs[0xc], fm3130->regs[0xd],
 			fm3130->regs[0xe]);
 
-	t->tm_sec = BCD2BIN(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
-	t->tm_min = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
+	t->tm_sec = bcd2bin(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
+	t->tm_min = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
 	tmp = fm3130->regs[FM3130_RTC_HOURS] & 0x3f;
-	t->tm_hour = BCD2BIN(tmp);
-	t->tm_wday = BCD2BIN(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
-	t->tm_mday = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
+	t->tm_hour = bcd2bin(tmp);
+	t->tm_wday = bcd2bin(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
+	t->tm_mday = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
 	tmp = fm3130->regs[FM3130_RTC_MONTHS] & 0x1f;
-	t->tm_mon = BCD2BIN(tmp) - 1;
+	t->tm_mon = bcd2bin(tmp) - 1;
 
 	/* assume 20YY not 19YY, and ignore CF bit */
-	t->tm_year = BCD2BIN(fm3130->regs[FM3130_RTC_YEARS]) + 100;
+	t->tm_year = bcd2bin(fm3130->regs[FM3130_RTC_YEARS]) + 100;
 
 	dev_dbg(dev, "%s secs=%d, mins=%d, "
 		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -167,16 +167,16 @@ static int fm3130_set_time(struct device *dev, struct rtc_time *t)
 		t->tm_mon, t->tm_year, t->tm_wday);
 
 	/* first register addr */
-	buf[FM3130_RTC_SECONDS] = BIN2BCD(t->tm_sec);
-	buf[FM3130_RTC_MINUTES] = BIN2BCD(t->tm_min);
-	buf[FM3130_RTC_HOURS] = BIN2BCD(t->tm_hour);
-	buf[FM3130_RTC_DAY] = BIN2BCD(t->tm_wday + 1);
-	buf[FM3130_RTC_DATE] = BIN2BCD(t->tm_mday);
-	buf[FM3130_RTC_MONTHS] = BIN2BCD(t->tm_mon + 1);
+	buf[FM3130_RTC_SECONDS] = bin2bcd(t->tm_sec);
+	buf[FM3130_RTC_MINUTES] = bin2bcd(t->tm_min);
+	buf[FM3130_RTC_HOURS] = bin2bcd(t->tm_hour);
+	buf[FM3130_RTC_DAY] = bin2bcd(t->tm_wday + 1);
+	buf[FM3130_RTC_DATE] = bin2bcd(t->tm_mday);
+	buf[FM3130_RTC_MONTHS] = bin2bcd(t->tm_mon + 1);
 
 	/* assume 20YY not 19YY */
 	tmp = t->tm_year - 100;
-	buf[FM3130_RTC_YEARS] = BIN2BCD(tmp);
+	buf[FM3130_RTC_YEARS] = bin2bcd(tmp);
 
 	dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x"
 		"%02x %02x %02x %02x %02x %02x %02x %02x\n",
@@ -222,11 +222,11 @@ static int fm3130_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 			fm3130->regs[FM3130_ALARM_MONTHS]);
 
 
-	tm->tm_sec	= BCD2BIN(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
-	tm->tm_min	= BCD2BIN(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
-	tm->tm_hour	= BCD2BIN(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
-	tm->tm_mday	= BCD2BIN(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
-	tm->tm_mon	= BCD2BIN(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
+	tm->tm_sec	= bcd2bin(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
+	tm->tm_min	= bcd2bin(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
+	tm->tm_hour	= bcd2bin(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
+	tm->tm_mday	= bcd2bin(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
+	tm->tm_mon	= bcd2bin(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
 	if (tm->tm_mon > 0)
 		tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */
 	dev_dbg(dev, "%s secs=%d, mins=%d, "
@@ -252,23 +252,23 @@ static int fm3130_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	if (tm->tm_sec != -1)
 		fm3130->regs[FM3130_ALARM_SECONDS] =
-			BIN2BCD(tm->tm_sec) | 0x80;
+			bin2bcd(tm->tm_sec) | 0x80;
 
 	if (tm->tm_min != -1)
 		fm3130->regs[FM3130_ALARM_MINUTES] =
-			BIN2BCD(tm->tm_min) | 0x80;
+			bin2bcd(tm->tm_min) | 0x80;
 
 	if (tm->tm_hour != -1)
 		fm3130->regs[FM3130_ALARM_HOURS] =
-			BIN2BCD(tm->tm_hour) | 0x80;
+			bin2bcd(tm->tm_hour) | 0x80;
 
 	if (tm->tm_mday != -1)
 		fm3130->regs[FM3130_ALARM_DATE] =
-			BIN2BCD(tm->tm_mday) | 0x80;
+			bin2bcd(tm->tm_mday) | 0x80;
 
 	if (tm->tm_mon != -1)
 		fm3130->regs[FM3130_ALARM_MONTHS] =
-			BIN2BCD(tm->tm_mon + 1) | 0x80;
+			bin2bcd(tm->tm_mon + 1) | 0x80;
 
 	dev_dbg(dev, "alarm write %02x %02x %02x %02x %02x\n",
 			fm3130->regs[FM3130_ALARM_SECONDS],
@@ -414,18 +414,18 @@ static int __devinit fm3130_probe(struct i2c_client *client,
 	/* TODO */
 	/* TODO need to sanity check alarm */
 	tmp = fm3130->regs[FM3130_RTC_SECONDS];
-	tmp = BCD2BIN(tmp & 0x7f);
+	tmp = bcd2bin(tmp & 0x7f);
 	if (tmp > 60)
 		goto exit_bad;
-	tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
+	tmp = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
 	if (tmp > 60)
 		goto exit_bad;
 
-	tmp = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
+	tmp = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
 	if (tmp == 0 || tmp > 31)
 		goto exit_bad;
 
-	tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
+	tmp = bcd2bin(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
 	if (tmp == 0 || tmp > 12)
 		goto exit_bad;
 
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index a81adab6e51..2cd77ab8fc6 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -259,26 +259,26 @@ isl1208_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
 		return sr;
 	}
 
-	tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SC]);
-	tm->tm_min = BCD2BIN(regs[ISL1208_REG_MN]);
+	tm->tm_sec = bcd2bin(regs[ISL1208_REG_SC]);
+	tm->tm_min = bcd2bin(regs[ISL1208_REG_MN]);
 
 	/* HR field has a more complex interpretation */
 	{
 		const u8 _hr = regs[ISL1208_REG_HR];
 		if (_hr & ISL1208_REG_HR_MIL)	/* 24h format */
-			tm->tm_hour = BCD2BIN(_hr & 0x3f);
+			tm->tm_hour = bcd2bin(_hr & 0x3f);
 		else {
 			/* 12h format */
-			tm->tm_hour = BCD2BIN(_hr & 0x1f);
+			tm->tm_hour = bcd2bin(_hr & 0x1f);
 			if (_hr & ISL1208_REG_HR_PM)	/* PM flag set */
 				tm->tm_hour += 12;
 		}
 	}
 
-	tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DT]);
-	tm->tm_mon = BCD2BIN(regs[ISL1208_REG_MO]) - 1;	/* rtc starts at 1 */
-	tm->tm_year = BCD2BIN(regs[ISL1208_REG_YR]) + 100;
-	tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DW]);
+	tm->tm_mday = bcd2bin(regs[ISL1208_REG_DT]);
+	tm->tm_mon = bcd2bin(regs[ISL1208_REG_MO]) - 1;	/* rtc starts at 1 */
+	tm->tm_year = bcd2bin(regs[ISL1208_REG_YR]) + 100;
+	tm->tm_wday = bcd2bin(regs[ISL1208_REG_DW]);
 
 	return 0;
 }
@@ -305,13 +305,13 @@ isl1208_i2c_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alarm)
 	}
 
 	/* MSB of each alarm register is an enable bit */
-	tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
-	tm->tm_min = BCD2BIN(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
-	tm->tm_hour = BCD2BIN(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
-	tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
+	tm->tm_sec = bcd2bin(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
+	tm->tm_min = bcd2bin(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
+	tm->tm_hour = bcd2bin(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
+	tm->tm_mday = bcd2bin(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
 	tm->tm_mon =
-		BCD2BIN(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
-	tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
+		bcd2bin(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
+	tm->tm_wday = bcd2bin(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
 
 	return 0;
 }
@@ -328,15 +328,15 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
 	int sr;
 	u8 regs[ISL1208_RTC_SECTION_LEN] = { 0, };
 
-	regs[ISL1208_REG_SC] = BIN2BCD(tm->tm_sec);
-	regs[ISL1208_REG_MN] = BIN2BCD(tm->tm_min);
-	regs[ISL1208_REG_HR] = BIN2BCD(tm->tm_hour) | ISL1208_REG_HR_MIL;
+	regs[ISL1208_REG_SC] = bin2bcd(tm->tm_sec);
+	regs[ISL1208_REG_MN] = bin2bcd(tm->tm_min);
+	regs[ISL1208_REG_HR] = bin2bcd(tm->tm_hour) | ISL1208_REG_HR_MIL;
 
-	regs[ISL1208_REG_DT] = BIN2BCD(tm->tm_mday);
-	regs[ISL1208_REG_MO] = BIN2BCD(tm->tm_mon + 1);
-	regs[ISL1208_REG_YR] = BIN2BCD(tm->tm_year - 100);
+	regs[ISL1208_REG_DT] = bin2bcd(tm->tm_mday);
+	regs[ISL1208_REG_MO] = bin2bcd(tm->tm_mon + 1);
+	regs[ISL1208_REG_YR] = bin2bcd(tm->tm_year - 100);
 
-	regs[ISL1208_REG_DW] = BIN2BCD(tm->tm_wday & 7);
+	regs[ISL1208_REG_DW] = bin2bcd(tm->tm_wday & 7);
 
 	sr = isl1208_i2c_get_sr(client);
 	if (sr < 0) {
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 470fb2d2954..893f7dece23 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -110,15 +110,15 @@ static int m41t80_get_datetime(struct i2c_client *client,
 		return -EIO;
 	}
 
-	tm->tm_sec = BCD2BIN(buf[M41T80_REG_SEC] & 0x7f);
-	tm->tm_min = BCD2BIN(buf[M41T80_REG_MIN] & 0x7f);
-	tm->tm_hour = BCD2BIN(buf[M41T80_REG_HOUR] & 0x3f);
-	tm->tm_mday = BCD2BIN(buf[M41T80_REG_DAY] & 0x3f);
+	tm->tm_sec = bcd2bin(buf[M41T80_REG_SEC] & 0x7f);
+	tm->tm_min = bcd2bin(buf[M41T80_REG_MIN] & 0x7f);
+	tm->tm_hour = bcd2bin(buf[M41T80_REG_HOUR] & 0x3f);
+	tm->tm_mday = bcd2bin(buf[M41T80_REG_DAY] & 0x3f);
 	tm->tm_wday = buf[M41T80_REG_WDAY] & 0x07;
-	tm->tm_mon = BCD2BIN(buf[M41T80_REG_MON] & 0x1f) - 1;
+	tm->tm_mon = bcd2bin(buf[M41T80_REG_MON] & 0x1f) - 1;
 
 	/* assume 20YY not 19YY, and ignore the Century Bit */
-	tm->tm_year = BCD2BIN(buf[M41T80_REG_YEAR]) + 100;
+	tm->tm_year = bcd2bin(buf[M41T80_REG_YEAR]) + 100;
 	return 0;
 }
 
@@ -161,19 +161,19 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	/* Merge time-data and register flags into buf[0..7] */
 	buf[M41T80_REG_SSEC] = 0;
 	buf[M41T80_REG_SEC] =
-		BIN2BCD(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
+		bin2bcd(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
 	buf[M41T80_REG_MIN] =
-		BIN2BCD(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
+		bin2bcd(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
 	buf[M41T80_REG_HOUR] =
-		BIN2BCD(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
+		bin2bcd(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
 	buf[M41T80_REG_WDAY] =
 		(tm->tm_wday & 0x07) | (buf[M41T80_REG_WDAY] & ~0x07);
 	buf[M41T80_REG_DAY] =
-		BIN2BCD(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
+		bin2bcd(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
 	buf[M41T80_REG_MON] =
-		BIN2BCD(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
+		bin2bcd(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
 	/* assume 20YY not 19YY */
-	buf[M41T80_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+	buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 
 	if (i2c_transfer(client->adapter, msgs, 1) != 1) {
 		dev_err(&client->dev, "write error\n");
@@ -288,15 +288,15 @@ static int m41t80_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 	wbuf[0] = M41T80_REG_ALARM_MON; /* offset into rtc's regs */
 	reg[M41T80_REG_ALARM_SEC] |= t->time.tm_sec >= 0 ?
-		BIN2BCD(t->time.tm_sec) : 0x80;
+		bin2bcd(t->time.tm_sec) : 0x80;
 	reg[M41T80_REG_ALARM_MIN] |= t->time.tm_min >= 0 ?
-		BIN2BCD(t->time.tm_min) : 0x80;
+		bin2bcd(t->time.tm_min) : 0x80;
 	reg[M41T80_REG_ALARM_HOUR] |= t->time.tm_hour >= 0 ?
-		BIN2BCD(t->time.tm_hour) : 0x80;
+		bin2bcd(t->time.tm_hour) : 0x80;
 	reg[M41T80_REG_ALARM_DAY] |= t->time.tm_mday >= 0 ?
-		BIN2BCD(t->time.tm_mday) : 0x80;
+		bin2bcd(t->time.tm_mday) : 0x80;
 	if (t->time.tm_mon >= 0)
-		reg[M41T80_REG_ALARM_MON] |= BIN2BCD(t->time.tm_mon + 1);
+		reg[M41T80_REG_ALARM_MON] |= bin2bcd(t->time.tm_mon + 1);
 	else
 		reg[M41T80_REG_ALARM_DAY] |= 0x40;
 
@@ -347,15 +347,15 @@ static int m41t80_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 	t->time.tm_mday = -1;
 	t->time.tm_mon = -1;
 	if (!(reg[M41T80_REG_ALARM_SEC] & 0x80))
-		t->time.tm_sec = BCD2BIN(reg[M41T80_REG_ALARM_SEC] & 0x7f);
+		t->time.tm_sec = bcd2bin(reg[M41T80_REG_ALARM_SEC] & 0x7f);
 	if (!(reg[M41T80_REG_ALARM_MIN] & 0x80))
-		t->time.tm_min = BCD2BIN(reg[M41T80_REG_ALARM_MIN] & 0x7f);
+		t->time.tm_min = bcd2bin(reg[M41T80_REG_ALARM_MIN] & 0x7f);
 	if (!(reg[M41T80_REG_ALARM_HOUR] & 0x80))
-		t->time.tm_hour = BCD2BIN(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
+		t->time.tm_hour = bcd2bin(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
 	if (!(reg[M41T80_REG_ALARM_DAY] & 0x80))
-		t->time.tm_mday = BCD2BIN(reg[M41T80_REG_ALARM_DAY] & 0x3f);
+		t->time.tm_mday = bcd2bin(reg[M41T80_REG_ALARM_DAY] & 0x3f);
 	if (!(reg[M41T80_REG_ALARM_DAY] & 0x40))
-		t->time.tm_mon = BCD2BIN(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
+		t->time.tm_mon = bcd2bin(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
 	t->time.tm_year = -1;
 	t->time.tm_wday = -1;
 	t->time.tm_yday = -1;
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index 9b19499c829..c3a18c58daf 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -41,17 +41,17 @@ static int m41t94_set_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_mon, tm->tm_year, tm->tm_wday);
 
 	buf[0] = 0x80 | M41T94_REG_SECONDS; /* write time + date */
-	buf[M41T94_REG_SECONDS] = BIN2BCD(tm->tm_sec);
-	buf[M41T94_REG_MINUTES] = BIN2BCD(tm->tm_min);
-	buf[M41T94_REG_HOURS]   = BIN2BCD(tm->tm_hour);
-	buf[M41T94_REG_WDAY]    = BIN2BCD(tm->tm_wday + 1);
-	buf[M41T94_REG_DAY]     = BIN2BCD(tm->tm_mday);
-	buf[M41T94_REG_MONTH]   = BIN2BCD(tm->tm_mon + 1);
+	buf[M41T94_REG_SECONDS] = bin2bcd(tm->tm_sec);
+	buf[M41T94_REG_MINUTES] = bin2bcd(tm->tm_min);
+	buf[M41T94_REG_HOURS]   = bin2bcd(tm->tm_hour);
+	buf[M41T94_REG_WDAY]    = bin2bcd(tm->tm_wday + 1);
+	buf[M41T94_REG_DAY]     = bin2bcd(tm->tm_mday);
+	buf[M41T94_REG_MONTH]   = bin2bcd(tm->tm_mon + 1);
 
 	buf[M41T94_REG_HOURS] |= M41T94_BIT_CEB;
 	if (tm->tm_year >= 100)
 		buf[M41T94_REG_HOURS] |= M41T94_BIT_CB;
-	buf[M41T94_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+	buf[M41T94_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 
 	return spi_write(spi, buf, 8);
 }
@@ -82,14 +82,14 @@ static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
 		spi_write(spi, buf, 2);
 	}
 
-	tm->tm_sec  = BCD2BIN(spi_w8r8(spi, M41T94_REG_SECONDS));
-	tm->tm_min  = BCD2BIN(spi_w8r8(spi, M41T94_REG_MINUTES));
+	tm->tm_sec  = bcd2bin(spi_w8r8(spi, M41T94_REG_SECONDS));
+	tm->tm_min  = bcd2bin(spi_w8r8(spi, M41T94_REG_MINUTES));
 	hour = spi_w8r8(spi, M41T94_REG_HOURS);
-	tm->tm_hour = BCD2BIN(hour & 0x3f);
-	tm->tm_wday = BCD2BIN(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
-	tm->tm_mday = BCD2BIN(spi_w8r8(spi, M41T94_REG_DAY));
-	tm->tm_mon  = BCD2BIN(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
-	tm->tm_year = BCD2BIN(spi_w8r8(spi, M41T94_REG_YEAR));
+	tm->tm_hour = bcd2bin(hour & 0x3f);
+	tm->tm_wday = bcd2bin(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
+	tm->tm_mday = bcd2bin(spi_w8r8(spi, M41T94_REG_DAY));
+	tm->tm_mon  = bcd2bin(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
+	tm->tm_year = bcd2bin(spi_w8r8(spi, M41T94_REG_YEAR));
 	if ((hour & M41T94_BIT_CB) || !(hour & M41T94_BIT_CEB))
 		tm->tm_year += 100;
 
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index ce4eff6a8d5..04b63dab693 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -76,10 +76,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* Issue the READ command */
 	M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
 
-	tm->tm_year	= BCD2BIN(M48T59_READ(M48T59_YEAR));
+	tm->tm_year	= bcd2bin(M48T59_READ(M48T59_YEAR));
 	/* tm_mon is 0-11 */
-	tm->tm_mon	= BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1;
-	tm->tm_mday	= BCD2BIN(M48T59_READ(M48T59_MDAY));
+	tm->tm_mon	= bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
+	tm->tm_mday	= bcd2bin(M48T59_READ(M48T59_MDAY));
 
 	val = M48T59_READ(M48T59_WDAY);
 	if ((pdata->type == M48T59RTC_TYPE_M48T59) &&
@@ -88,10 +88,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_year += 100;	/* one century */
 	}
 
-	tm->tm_wday	= BCD2BIN(val & 0x07);
-	tm->tm_hour	= BCD2BIN(M48T59_READ(M48T59_HOUR) & 0x3F);
-	tm->tm_min	= BCD2BIN(M48T59_READ(M48T59_MIN) & 0x7F);
-	tm->tm_sec	= BCD2BIN(M48T59_READ(M48T59_SEC) & 0x7F);
+	tm->tm_wday	= bcd2bin(val & 0x07);
+	tm->tm_hour	= bcd2bin(M48T59_READ(M48T59_HOUR) & 0x3F);
+	tm->tm_min	= bcd2bin(M48T59_READ(M48T59_MIN) & 0x7F);
+	tm->tm_sec	= bcd2bin(M48T59_READ(M48T59_SEC) & 0x7F);
 
 	/* Clear the READ bit */
 	M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -119,17 +119,17 @@ static int m48t59_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	/* Issue the WRITE command */
 	M48T59_SET_BITS(M48T59_CNTL_WRITE, M48T59_CNTL);
 
-	M48T59_WRITE((BIN2BCD(tm->tm_sec) & 0x7F), M48T59_SEC);
-	M48T59_WRITE((BIN2BCD(tm->tm_min) & 0x7F), M48T59_MIN);
-	M48T59_WRITE((BIN2BCD(tm->tm_hour) & 0x3F), M48T59_HOUR);
-	M48T59_WRITE((BIN2BCD(tm->tm_mday) & 0x3F), M48T59_MDAY);
+	M48T59_WRITE((bin2bcd(tm->tm_sec) & 0x7F), M48T59_SEC);
+	M48T59_WRITE((bin2bcd(tm->tm_min) & 0x7F), M48T59_MIN);
+	M48T59_WRITE((bin2bcd(tm->tm_hour) & 0x3F), M48T59_HOUR);
+	M48T59_WRITE((bin2bcd(tm->tm_mday) & 0x3F), M48T59_MDAY);
 	/* tm_mon is 0-11 */
-	M48T59_WRITE((BIN2BCD(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
-	M48T59_WRITE(BIN2BCD(tm->tm_year % 100), M48T59_YEAR);
+	M48T59_WRITE((bin2bcd(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
+	M48T59_WRITE(bin2bcd(tm->tm_year % 100), M48T59_YEAR);
 
 	if (pdata->type == M48T59RTC_TYPE_M48T59 && (tm->tm_year / 100))
 		val = (M48T59_WDAY_CEB | M48T59_WDAY_CB);
-	val |= (BIN2BCD(tm->tm_wday) & 0x07);
+	val |= (bin2bcd(tm->tm_wday) & 0x07);
 	M48T59_WRITE(val, M48T59_WDAY);
 
 	/* Clear the WRITE bit */
@@ -158,18 +158,18 @@ static int m48t59_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	/* Issue the READ command */
 	M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
 
-	tm->tm_year = BCD2BIN(M48T59_READ(M48T59_YEAR));
+	tm->tm_year = bcd2bin(M48T59_READ(M48T59_YEAR));
 	/* tm_mon is 0-11 */
-	tm->tm_mon = BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1;
+	tm->tm_mon = bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
 
 	val = M48T59_READ(M48T59_WDAY);
 	if ((val & M48T59_WDAY_CEB) && (val & M48T59_WDAY_CB))
 		tm->tm_year += 100;	/* one century */
 
-	tm->tm_mday = BCD2BIN(M48T59_READ(M48T59_ALARM_DATE));
-	tm->tm_hour = BCD2BIN(M48T59_READ(M48T59_ALARM_HOUR));
-	tm->tm_min = BCD2BIN(M48T59_READ(M48T59_ALARM_MIN));
-	tm->tm_sec = BCD2BIN(M48T59_READ(M48T59_ALARM_SEC));
+	tm->tm_mday = bcd2bin(M48T59_READ(M48T59_ALARM_DATE));
+	tm->tm_hour = bcd2bin(M48T59_READ(M48T59_ALARM_HOUR));
+	tm->tm_min = bcd2bin(M48T59_READ(M48T59_ALARM_MIN));
+	tm->tm_sec = bcd2bin(M48T59_READ(M48T59_ALARM_SEC));
 
 	/* Clear the READ bit */
 	M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -201,18 +201,18 @@ static int m48t59_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	 * 0xff means "always match"
 	 */
 	mday = tm->tm_mday;
-	mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff;
+	mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
 	if (mday == 0xff)
 		mday = M48T59_READ(M48T59_MDAY);
 
 	hour = tm->tm_hour;
-	hour = (hour < 24) ? BIN2BCD(hour) : 0x00;
+	hour = (hour < 24) ? bin2bcd(hour) : 0x00;
 
 	min = tm->tm_min;
-	min = (min < 60) ? BIN2BCD(min) : 0x00;
+	min = (min < 60) ? bin2bcd(min) : 0x00;
 
 	sec = tm->tm_sec;
-	sec = (sec < 60) ? BIN2BCD(sec) : 0x00;
+	sec = (sec < 60) ? bin2bcd(sec) : 0x00;
 
 	spin_lock_irqsave(&m48t59->lock, flags);
 	/* Issue the WRITE command */
@@ -360,7 +360,6 @@ static struct bin_attribute m48t59_nvram_attr = {
 	.attr = {
 		.name = "nvram",
 		.mode = S_IRUGO | S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.read = m48t59_nvram_read,
 	.write = m48t59_nvram_write,
diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c
index 3f7f99a5d96..7c045cffa9f 100644
--- a/drivers/rtc/rtc-m48t86.c
+++ b/drivers/rtc/rtc-m48t86.c
@@ -62,14 +62,14 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_wday	= ops->readbyte(M48T86_REG_DOW);
 	} else {
 		/* bcd mode */
-		tm->tm_sec	= BCD2BIN(ops->readbyte(M48T86_REG_SEC));
-		tm->tm_min	= BCD2BIN(ops->readbyte(M48T86_REG_MIN));
-		tm->tm_hour	= BCD2BIN(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
-		tm->tm_mday	= BCD2BIN(ops->readbyte(M48T86_REG_DOM));
+		tm->tm_sec	= bcd2bin(ops->readbyte(M48T86_REG_SEC));
+		tm->tm_min	= bcd2bin(ops->readbyte(M48T86_REG_MIN));
+		tm->tm_hour	= bcd2bin(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
+		tm->tm_mday	= bcd2bin(ops->readbyte(M48T86_REG_DOM));
 		/* tm_mon is 0-11 */
-		tm->tm_mon	= BCD2BIN(ops->readbyte(M48T86_REG_MONTH)) - 1;
-		tm->tm_year	= BCD2BIN(ops->readbyte(M48T86_REG_YEAR)) + 100;
-		tm->tm_wday	= BCD2BIN(ops->readbyte(M48T86_REG_DOW));
+		tm->tm_mon	= bcd2bin(ops->readbyte(M48T86_REG_MONTH)) - 1;
+		tm->tm_year	= bcd2bin(ops->readbyte(M48T86_REG_YEAR)) + 100;
+		tm->tm_wday	= bcd2bin(ops->readbyte(M48T86_REG_DOW));
 	}
 
 	/* correct the hour if the clock is in 12h mode */
@@ -103,13 +103,13 @@ static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		ops->writebyte(tm->tm_wday, M48T86_REG_DOW);
 	} else {
 		/* bcd mode */
-		ops->writebyte(BIN2BCD(tm->tm_sec), M48T86_REG_SEC);
-		ops->writebyte(BIN2BCD(tm->tm_min), M48T86_REG_MIN);
-		ops->writebyte(BIN2BCD(tm->tm_hour), M48T86_REG_HOUR);
-		ops->writebyte(BIN2BCD(tm->tm_mday), M48T86_REG_DOM);
-		ops->writebyte(BIN2BCD(tm->tm_mon + 1), M48T86_REG_MONTH);
-		ops->writebyte(BIN2BCD(tm->tm_year % 100), M48T86_REG_YEAR);
-		ops->writebyte(BIN2BCD(tm->tm_wday), M48T86_REG_DOW);
+		ops->writebyte(bin2bcd(tm->tm_sec), M48T86_REG_SEC);
+		ops->writebyte(bin2bcd(tm->tm_min), M48T86_REG_MIN);
+		ops->writebyte(bin2bcd(tm->tm_hour), M48T86_REG_HOUR);
+		ops->writebyte(bin2bcd(tm->tm_mday), M48T86_REG_DOM);
+		ops->writebyte(bin2bcd(tm->tm_mon + 1), M48T86_REG_MONTH);
+		ops->writebyte(bin2bcd(tm->tm_year % 100), M48T86_REG_YEAR);
+		ops->writebyte(bin2bcd(tm->tm_wday), M48T86_REG_DOW);
 	}
 
 	/* update ended */
diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c
index 12c9cd25cad..80782798763 100644
--- a/drivers/rtc/rtc-max6900.c
+++ b/drivers/rtc/rtc-max6900.c
@@ -150,14 +150,14 @@ static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
 	if (rc < 0)
 		return rc;
 
-	tm->tm_sec = BCD2BIN(regs[MAX6900_REG_SC]);
-	tm->tm_min = BCD2BIN(regs[MAX6900_REG_MN]);
-	tm->tm_hour = BCD2BIN(regs[MAX6900_REG_HR] & 0x3f);
-	tm->tm_mday = BCD2BIN(regs[MAX6900_REG_DT]);
-	tm->tm_mon = BCD2BIN(regs[MAX6900_REG_MO]) - 1;
-	tm->tm_year = BCD2BIN(regs[MAX6900_REG_YR]) +
-	    BCD2BIN(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
-	tm->tm_wday = BCD2BIN(regs[MAX6900_REG_DW]);
+	tm->tm_sec = bcd2bin(regs[MAX6900_REG_SC]);
+	tm->tm_min = bcd2bin(regs[MAX6900_REG_MN]);
+	tm->tm_hour = bcd2bin(regs[MAX6900_REG_HR] & 0x3f);
+	tm->tm_mday = bcd2bin(regs[MAX6900_REG_DT]);
+	tm->tm_mon = bcd2bin(regs[MAX6900_REG_MO]) - 1;
+	tm->tm_year = bcd2bin(regs[MAX6900_REG_YR]) +
+		      bcd2bin(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
+	tm->tm_wday = bcd2bin(regs[MAX6900_REG_DW]);
 
 	return 0;
 }
@@ -184,14 +184,14 @@ max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
 	if (rc < 0)
 		return rc;
 
-	regs[MAX6900_REG_SC] = BIN2BCD(tm->tm_sec);
-	regs[MAX6900_REG_MN] = BIN2BCD(tm->tm_min);
-	regs[MAX6900_REG_HR] = BIN2BCD(tm->tm_hour);
-	regs[MAX6900_REG_DT] = BIN2BCD(tm->tm_mday);
-	regs[MAX6900_REG_MO] = BIN2BCD(tm->tm_mon + 1);
-	regs[MAX6900_REG_DW] = BIN2BCD(tm->tm_wday);
-	regs[MAX6900_REG_YR] = BIN2BCD(tm->tm_year % 100);
-	regs[MAX6900_REG_CENTURY] = BIN2BCD((tm->tm_year + 1900) / 100);
+	regs[MAX6900_REG_SC] = bin2bcd(tm->tm_sec);
+	regs[MAX6900_REG_MN] = bin2bcd(tm->tm_min);
+	regs[MAX6900_REG_HR] = bin2bcd(tm->tm_hour);
+	regs[MAX6900_REG_DT] = bin2bcd(tm->tm_mday);
+	regs[MAX6900_REG_MO] = bin2bcd(tm->tm_mon + 1);
+	regs[MAX6900_REG_DW] = bin2bcd(tm->tm_wday);
+	regs[MAX6900_REG_YR] = bin2bcd(tm->tm_year % 100);
+	regs[MAX6900_REG_CENTURY] = bin2bcd((tm->tm_year + 1900) / 100);
 	/* set write protect */
 	regs[MAX6900_REG_CT] = MAX6900_REG_CT_WP;
 
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 78b2551fb19..2f6507df7b4 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -124,15 +124,15 @@ static int max6902_get_datetime(struct device *dev, struct rtc_time *dt)
 
 	/* The chip sends data in this order:
 	 * Seconds, Minutes, Hours, Date, Month, Day, Year */
-	dt->tm_sec	= BCD2BIN(chip->buf[1]);
-	dt->tm_min	= BCD2BIN(chip->buf[2]);
-	dt->tm_hour	= BCD2BIN(chip->buf[3]);
-	dt->tm_mday	= BCD2BIN(chip->buf[4]);
-	dt->tm_mon	= BCD2BIN(chip->buf[5]) - 1;
-	dt->tm_wday	= BCD2BIN(chip->buf[6]);
-	dt->tm_year = BCD2BIN(chip->buf[7]);
+	dt->tm_sec	= bcd2bin(chip->buf[1]);
+	dt->tm_min	= bcd2bin(chip->buf[2]);
+	dt->tm_hour	= bcd2bin(chip->buf[3]);
+	dt->tm_mday	= bcd2bin(chip->buf[4]);
+	dt->tm_mon	= bcd2bin(chip->buf[5]) - 1;
+	dt->tm_wday	= bcd2bin(chip->buf[6]);
+	dt->tm_year = bcd2bin(chip->buf[7]);
 
-	century = BCD2BIN(tmp) * 100;
+	century = bcd2bin(tmp) * 100;
 
 	dt->tm_year += century;
 	dt->tm_year -= 1900;
@@ -168,15 +168,15 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt)
 	/* Remove write protection */
 	max6902_set_reg(dev, 0xF, 0);
 
-	max6902_set_reg(dev, 0x01, BIN2BCD(dt->tm_sec));
-	max6902_set_reg(dev, 0x03, BIN2BCD(dt->tm_min));
-	max6902_set_reg(dev, 0x05, BIN2BCD(dt->tm_hour));
+	max6902_set_reg(dev, 0x01, bin2bcd(dt->tm_sec));
+	max6902_set_reg(dev, 0x03, bin2bcd(dt->tm_min));
+	max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour));
 
-	max6902_set_reg(dev, 0x07, BIN2BCD(dt->tm_mday));
-	max6902_set_reg(dev, 0x09, BIN2BCD(dt->tm_mon+1));
-	max6902_set_reg(dev, 0x0B, BIN2BCD(dt->tm_wday));
-	max6902_set_reg(dev, 0x0D, BIN2BCD(dt->tm_year%100));
-	max6902_set_reg(dev, 0x13, BIN2BCD(dt->tm_year/100));
+	max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday));
+	max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon+1));
+	max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday));
+	max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year%100));
+	max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year/100));
 
 	/* Compulab used a delay here. However, the datasheet
 	 * does not mention a delay being required anywhere... */
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 8876605d4d4..2cbeb0794f1 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -186,30 +186,30 @@ static int tm2bcd(struct rtc_time *tm)
 	if (rtc_valid_tm(tm) != 0)
 		return -EINVAL;
 
-	tm->tm_sec = BIN2BCD(tm->tm_sec);
-	tm->tm_min = BIN2BCD(tm->tm_min);
-	tm->tm_hour = BIN2BCD(tm->tm_hour);
-	tm->tm_mday = BIN2BCD(tm->tm_mday);
+	tm->tm_sec = bin2bcd(tm->tm_sec);
+	tm->tm_min = bin2bcd(tm->tm_min);
+	tm->tm_hour = bin2bcd(tm->tm_hour);
+	tm->tm_mday = bin2bcd(tm->tm_mday);
 
-	tm->tm_mon = BIN2BCD(tm->tm_mon + 1);
+	tm->tm_mon = bin2bcd(tm->tm_mon + 1);
 
 	/* epoch == 1900 */
 	if (tm->tm_year < 100 || tm->tm_year > 199)
 		return -EINVAL;
-	tm->tm_year = BIN2BCD(tm->tm_year - 100);
+	tm->tm_year = bin2bcd(tm->tm_year - 100);
 
 	return 0;
 }
 
 static void bcd2tm(struct rtc_time *tm)
 {
-	tm->tm_sec = BCD2BIN(tm->tm_sec);
-	tm->tm_min = BCD2BIN(tm->tm_min);
-	tm->tm_hour = BCD2BIN(tm->tm_hour);
-	tm->tm_mday = BCD2BIN(tm->tm_mday);
-	tm->tm_mon = BCD2BIN(tm->tm_mon) - 1;
+	tm->tm_sec = bcd2bin(tm->tm_sec);
+	tm->tm_min = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon = bcd2bin(tm->tm_mon) - 1;
 	/* epoch == 1900 */
-	tm->tm_year = BCD2BIN(tm->tm_year) + 100;
+	tm->tm_year = bcd2bin(tm->tm_year) + 100;
 }
 
 
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
new file mode 100644
index 00000000000..346d633655e
--- /dev/null
+++ b/drivers/rtc/rtc-parisc.c
@@ -0,0 +1,111 @@
+/* rtc-parisc: RTC for HP PA-RISC firmware
+ *
+ * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/platform_device.h>
+
+#include <asm/rtc.h>
+
+/* as simple as can be, and no simpler. */
+struct parisc_rtc {
+	struct rtc_device *rtc;
+	spinlock_t lock;
+};
+
+static int parisc_get_time(struct device *dev, struct rtc_time *tm)
+{
+	struct parisc_rtc *p = dev_get_drvdata(dev);
+	unsigned long flags, ret;
+
+	spin_lock_irqsave(&p->lock, flags);
+	ret = get_rtc_time(tm);
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	if (ret & RTC_BATT_BAD)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int parisc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct parisc_rtc *p = dev_get_drvdata(dev);
+	unsigned long flags, ret;
+
+	spin_lock_irqsave(&p->lock, flags);
+	ret = set_rtc_time(tm);
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	if (ret < 0)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static const struct rtc_class_ops parisc_rtc_ops = {
+	.read_time = parisc_get_time,
+	.set_time = parisc_set_time,
+};
+
+static int __devinit parisc_rtc_probe(struct platform_device *dev)
+{
+	struct parisc_rtc *p;
+
+	p = kzalloc(sizeof (*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	spin_lock_init(&p->lock);
+
+	p->rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops,
+					THIS_MODULE);
+	if (IS_ERR(p->rtc)) {
+		int err = PTR_ERR(p->rtc);
+		kfree(p);
+		return err;
+	}
+
+	platform_set_drvdata(dev, p);
+
+	return 0;
+}
+
+static int __devexit parisc_rtc_remove(struct platform_device *dev)
+{
+	struct parisc_rtc *p = platform_get_drvdata(dev);
+
+	rtc_device_unregister(p->rtc);
+	kfree(p);
+
+	return 0;
+}
+
+static struct platform_driver parisc_rtc_driver = {
+	.driver = {
+		.name = "rtc-parisc",
+		.owner = THIS_MODULE,
+	},
+	.probe = parisc_rtc_probe,
+	.remove = __devexit_p(parisc_rtc_remove),
+};
+
+static int __init parisc_rtc_init(void)
+{
+	return platform_driver_register(&parisc_rtc_driver);
+}
+
+static void __exit parisc_rtc_fini(void)
+{
+	platform_driver_unregister(&parisc_rtc_driver);
+}
+
+module_init(parisc_rtc_init);
+module_exit(parisc_rtc_fini);
+
+MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("HP PA-RISC RTC driver");
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index a829f20ad6d..b725913ccbe 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -97,13 +97,13 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		buf[8]);
 
 
-	tm->tm_sec = BCD2BIN(buf[PCF8563_REG_SC] & 0x7F);
-	tm->tm_min = BCD2BIN(buf[PCF8563_REG_MN] & 0x7F);
-	tm->tm_hour = BCD2BIN(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
-	tm->tm_mday = BCD2BIN(buf[PCF8563_REG_DM] & 0x3F);
+	tm->tm_sec = bcd2bin(buf[PCF8563_REG_SC] & 0x7F);
+	tm->tm_min = bcd2bin(buf[PCF8563_REG_MN] & 0x7F);
+	tm->tm_hour = bcd2bin(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
+	tm->tm_mday = bcd2bin(buf[PCF8563_REG_DM] & 0x3F);
 	tm->tm_wday = buf[PCF8563_REG_DW] & 0x07;
-	tm->tm_mon = BCD2BIN(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
-	tm->tm_year = BCD2BIN(buf[PCF8563_REG_YR]);
+	tm->tm_mon = bcd2bin(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
+	tm->tm_year = bcd2bin(buf[PCF8563_REG_YR]);
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;	/* assume we are in 1970...2069 */
 	/* detect the polarity heuristically. see note above. */
@@ -138,17 +138,17 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
 	/* hours, minutes and seconds */
-	buf[PCF8563_REG_SC] = BIN2BCD(tm->tm_sec);
-	buf[PCF8563_REG_MN] = BIN2BCD(tm->tm_min);
-	buf[PCF8563_REG_HR] = BIN2BCD(tm->tm_hour);
+	buf[PCF8563_REG_SC] = bin2bcd(tm->tm_sec);
+	buf[PCF8563_REG_MN] = bin2bcd(tm->tm_min);
+	buf[PCF8563_REG_HR] = bin2bcd(tm->tm_hour);
 
-	buf[PCF8563_REG_DM] = BIN2BCD(tm->tm_mday);
+	buf[PCF8563_REG_DM] = bin2bcd(tm->tm_mday);
 
 	/* month, 1 - 12 */
-	buf[PCF8563_REG_MO] = BIN2BCD(tm->tm_mon + 1);
+	buf[PCF8563_REG_MO] = bin2bcd(tm->tm_mon + 1);
 
 	/* year and century */
-	buf[PCF8563_REG_YR] = BIN2BCD(tm->tm_year % 100);
+	buf[PCF8563_REG_YR] = bin2bcd(tm->tm_year % 100);
 	if (pcf8563->c_polarity ? (tm->tm_year >= 100) : (tm->tm_year < 100))
 		buf[PCF8563_REG_MO] |= PCF8563_MO_C;
 
diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c
index d388c662bf4..7d33cda3f8f 100644
--- a/drivers/rtc/rtc-pcf8583.c
+++ b/drivers/rtc/rtc-pcf8583.c
@@ -76,11 +76,11 @@ static int pcf8583_get_datetime(struct i2c_client *client, struct rtc_time *dt)
 		buf[4] &= 0x3f;
 		buf[5] &= 0x1f;
 
-		dt->tm_sec = BCD2BIN(buf[1]);
-		dt->tm_min = BCD2BIN(buf[2]);
-		dt->tm_hour = BCD2BIN(buf[3]);
-		dt->tm_mday = BCD2BIN(buf[4]);
-		dt->tm_mon = BCD2BIN(buf[5]) - 1;
+		dt->tm_sec = bcd2bin(buf[1]);
+		dt->tm_min = bcd2bin(buf[2]);
+		dt->tm_hour = bcd2bin(buf[3]);
+		dt->tm_mday = bcd2bin(buf[4]);
+		dt->tm_mon = bcd2bin(buf[5]) - 1;
 	}
 
 	return ret == 2 ? 0 : -EIO;
@@ -94,14 +94,14 @@ static int pcf8583_set_datetime(struct i2c_client *client, struct rtc_time *dt,
 	buf[0] = 0;
 	buf[1] = get_ctrl(client) | 0x80;
 	buf[2] = 0;
-	buf[3] = BIN2BCD(dt->tm_sec);
-	buf[4] = BIN2BCD(dt->tm_min);
-	buf[5] = BIN2BCD(dt->tm_hour);
+	buf[3] = bin2bcd(dt->tm_sec);
+	buf[4] = bin2bcd(dt->tm_min);
+	buf[5] = bin2bcd(dt->tm_hour);
 
 	if (datetoo) {
 		len = 8;
-		buf[6] = BIN2BCD(dt->tm_mday) | (dt->tm_year << 6);
-		buf[7] = BIN2BCD(dt->tm_mon + 1)  | (dt->tm_wday << 5);
+		buf[6] = bin2bcd(dt->tm_mday) | (dt->tm_year << 6);
+		buf[7] = bin2bcd(dt->tm_mon + 1)  | (dt->tm_wday << 5);
 	}
 
 	ret = i2c_master_send(client, (char *)buf, len);
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index 395985b339c..42028f233be 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -80,13 +80,13 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt)
 
 	memset(dt, 0, sizeof(*dt));
 
-	dt->tm_sec = BCD2BIN(buf[0]); /* RSECCNT */
-	dt->tm_min = BCD2BIN(buf[1]); /* RMINCNT */
-	dt->tm_hour = BCD2BIN(buf[2]); /* RHRCNT */
+	dt->tm_sec = bcd2bin(buf[0]); /* RSECCNT */
+	dt->tm_min = bcd2bin(buf[1]); /* RMINCNT */
+	dt->tm_hour = bcd2bin(buf[2]); /* RHRCNT */
 
-	dt->tm_mday = BCD2BIN(buf[3]); /* RDAYCNT */
-	dt->tm_mon = BCD2BIN(buf[4]) - 1; /* RMONCNT */
-	dt->tm_year = BCD2BIN(buf[5]) + 100; /* RYRCNT */
+	dt->tm_mday = bcd2bin(buf[3]); /* RDAYCNT */
+	dt->tm_mon = bcd2bin(buf[4]) - 1; /* RMONCNT */
+	dt->tm_year = bcd2bin(buf[5]) + 100; /* RYRCNT */
 
 	/* the rtc device may contain illegal values on power up
 	 * according to the data sheet. make sure they are valid.
@@ -103,12 +103,12 @@ static int r9701_set_datetime(struct device *dev, struct rtc_time *dt)
 	if (year >= 2100 || year < 2000)
 		return -EINVAL;
 
-	ret = write_reg(dev, RHRCNT, BIN2BCD(dt->tm_hour));
-	ret = ret ? ret : write_reg(dev, RMINCNT, BIN2BCD(dt->tm_min));
-	ret = ret ? ret : write_reg(dev, RSECCNT, BIN2BCD(dt->tm_sec));
-	ret = ret ? ret : write_reg(dev, RDAYCNT, BIN2BCD(dt->tm_mday));
-	ret = ret ? ret : write_reg(dev, RMONCNT, BIN2BCD(dt->tm_mon + 1));
-	ret = ret ? ret : write_reg(dev, RYRCNT, BIN2BCD(dt->tm_year - 100));
+	ret = write_reg(dev, RHRCNT, bin2bcd(dt->tm_hour));
+	ret = ret ? ret : write_reg(dev, RMINCNT, bin2bcd(dt->tm_min));
+	ret = ret ? ret : write_reg(dev, RSECCNT, bin2bcd(dt->tm_sec));
+	ret = ret ? ret : write_reg(dev, RDAYCNT, bin2bcd(dt->tm_mday));
+	ret = ret ? ret : write_reg(dev, RMONCNT, bin2bcd(dt->tm_mon + 1));
+	ret = ret ? ret : write_reg(dev, RYRCNT, bin2bcd(dt->tm_year - 100));
 	ret = ret ? ret : write_reg(dev, RWKCNT, 1 << dt->tm_wday);
 
 	return ret;
diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c
index 1c14d4497c4..e6ea3f5ee1e 100644
--- a/drivers/rtc/rtc-rs5c313.c
+++ b/drivers/rtc/rtc-rs5c313.c
@@ -235,33 +235,33 @@ static int rs5c313_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	data = rs5c313_read_reg(RS5C313_ADDR_SEC);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_SEC10) << 4);
-	tm->tm_sec = BCD2BIN(data);
+	tm->tm_sec = bcd2bin(data);
 
 	data = rs5c313_read_reg(RS5C313_ADDR_MIN);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_MIN10) << 4);
-	tm->tm_min = BCD2BIN(data);
+	tm->tm_min = bcd2bin(data);
 
 	data = rs5c313_read_reg(RS5C313_ADDR_HOUR);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_HOUR10) << 4);
-	tm->tm_hour = BCD2BIN(data);
+	tm->tm_hour = bcd2bin(data);
 
 	data = rs5c313_read_reg(RS5C313_ADDR_DAY);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_DAY10) << 4);
-	tm->tm_mday = BCD2BIN(data);
+	tm->tm_mday = bcd2bin(data);
 
 	data = rs5c313_read_reg(RS5C313_ADDR_MON);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_MON10) << 4);
-	tm->tm_mon = BCD2BIN(data) - 1;
+	tm->tm_mon = bcd2bin(data) - 1;
 
 	data = rs5c313_read_reg(RS5C313_ADDR_YEAR);
 	data |= (rs5c313_read_reg(RS5C313_ADDR_YEAR10) << 4);
-	tm->tm_year = BCD2BIN(data);
+	tm->tm_year = bcd2bin(data);
 
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
 
 	data = rs5c313_read_reg(RS5C313_ADDR_WEEK);
-	tm->tm_wday = BCD2BIN(data);
+	tm->tm_wday = bcd2bin(data);
 
 	RS5C313_CEDISABLE;
 	ndelay(700);		/* CE:L */
@@ -294,31 +294,31 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		}
 	}
 
-	data = BIN2BCD(tm->tm_sec);
+	data = bin2bcd(tm->tm_sec);
 	rs5c313_write_reg(RS5C313_ADDR_SEC, data);
 	rs5c313_write_reg(RS5C313_ADDR_SEC10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_min);
+	data = bin2bcd(tm->tm_min);
 	rs5c313_write_reg(RS5C313_ADDR_MIN, data );
 	rs5c313_write_reg(RS5C313_ADDR_MIN10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_hour);
+	data = bin2bcd(tm->tm_hour);
 	rs5c313_write_reg(RS5C313_ADDR_HOUR, data);
 	rs5c313_write_reg(RS5C313_ADDR_HOUR10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_mday);
+	data = bin2bcd(tm->tm_mday);
 	rs5c313_write_reg(RS5C313_ADDR_DAY, data);
 	rs5c313_write_reg(RS5C313_ADDR_DAY10, (data>> 4));
 
-	data = BIN2BCD(tm->tm_mon + 1);
+	data = bin2bcd(tm->tm_mon + 1);
 	rs5c313_write_reg(RS5C313_ADDR_MON, data);
 	rs5c313_write_reg(RS5C313_ADDR_MON10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_year % 100);
+	data = bin2bcd(tm->tm_year % 100);
 	rs5c313_write_reg(RS5C313_ADDR_YEAR, data);
 	rs5c313_write_reg(RS5C313_ADDR_YEAR10, (data >> 4));
 
-	data = BIN2BCD(tm->tm_wday);
+	data = bin2bcd(tm->tm_wday);
 	rs5c313_write_reg(RS5C313_ADDR_WEEK, data);
 
 	RS5C313_CEDISABLE;	/* CE:H */
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index 839462659af..dd1e2bc7a47 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -74,20 +74,20 @@ rs5c348_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	txbuf[3] = 0;	/* dummy */
 	txbuf[4] = RS5C348_CMD_MW(RS5C348_REG_SECS); /* cmd, sec, ... */
 	txp = &txbuf[5];
-	txp[RS5C348_REG_SECS] = BIN2BCD(tm->tm_sec);
-	txp[RS5C348_REG_MINS] = BIN2BCD(tm->tm_min);
+	txp[RS5C348_REG_SECS] = bin2bcd(tm->tm_sec);
+	txp[RS5C348_REG_MINS] = bin2bcd(tm->tm_min);
 	if (pdata->rtc_24h) {
-		txp[RS5C348_REG_HOURS] = BIN2BCD(tm->tm_hour);
+		txp[RS5C348_REG_HOURS] = bin2bcd(tm->tm_hour);
 	} else {
 		/* hour 0 is AM12, noon is PM12 */
-		txp[RS5C348_REG_HOURS] = BIN2BCD((tm->tm_hour + 11) % 12 + 1) |
+		txp[RS5C348_REG_HOURS] = bin2bcd((tm->tm_hour + 11) % 12 + 1) |
 			(tm->tm_hour >= 12 ? RS5C348_BIT_PM : 0);
 	}
-	txp[RS5C348_REG_WDAY] = BIN2BCD(tm->tm_wday);
-	txp[RS5C348_REG_DAY] = BIN2BCD(tm->tm_mday);
-	txp[RS5C348_REG_MONTH] = BIN2BCD(tm->tm_mon + 1) |
+	txp[RS5C348_REG_WDAY] = bin2bcd(tm->tm_wday);
+	txp[RS5C348_REG_DAY] = bin2bcd(tm->tm_mday);
+	txp[RS5C348_REG_MONTH] = bin2bcd(tm->tm_mon + 1) |
 		(tm->tm_year >= 100 ? RS5C348_BIT_Y2K : 0);
-	txp[RS5C348_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+	txp[RS5C348_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 	/* write in one transfer to avoid data inconsistency */
 	ret = spi_write_then_read(spi, txbuf, sizeof(txbuf), NULL, 0);
 	udelay(62);	/* Tcsr 62us */
@@ -116,20 +116,20 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	if (ret < 0)
 		return ret;
 
-	tm->tm_sec = BCD2BIN(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
-	tm->tm_min = BCD2BIN(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
-	tm->tm_hour = BCD2BIN(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
+	tm->tm_sec = bcd2bin(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
+	tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
+	tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
 	if (!pdata->rtc_24h) {
 		tm->tm_hour %= 12;
 		if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM)
 			tm->tm_hour += 12;
 	}
-	tm->tm_wday = BCD2BIN(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
-	tm->tm_mday = BCD2BIN(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
+	tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
+	tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
 	tm->tm_mon =
-		BCD2BIN(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
+		bcd2bin(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(rxbuf[RS5C348_REG_YEAR]) +
+	tm->tm_year = bcd2bin(rxbuf[RS5C348_REG_YEAR]) +
 		((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0);
 
 	if (rtc_valid_tm(tm) < 0) {
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 8b561958fb1..2f2c68d476d 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -148,9 +148,9 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
 	unsigned	hour;
 
 	if (rs5c->time24)
-		return BCD2BIN(reg & 0x3f);
+		return bcd2bin(reg & 0x3f);
 
-	hour = BCD2BIN(reg & 0x1f);
+	hour = bcd2bin(reg & 0x1f);
 	if (hour == 12)
 		hour = 0;
 	if (reg & 0x20)
@@ -161,15 +161,15 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
 static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
 {
 	if (rs5c->time24)
-		return BIN2BCD(hour);
+		return bin2bcd(hour);
 
 	if (hour > 12)
-		return 0x20 | BIN2BCD(hour - 12);
+		return 0x20 | bin2bcd(hour - 12);
 	if (hour == 12)
-		return 0x20 | BIN2BCD(12);
+		return 0x20 | bin2bcd(12);
 	if (hour == 0)
-		return BIN2BCD(12);
-	return BIN2BCD(hour);
+		return bin2bcd(12);
+	return bin2bcd(hour);
 }
 
 static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
@@ -180,18 +180,18 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 	if (status < 0)
 		return status;
 
-	tm->tm_sec = BCD2BIN(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
-	tm->tm_min = BCD2BIN(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
+	tm->tm_sec = bcd2bin(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
+	tm->tm_min = bcd2bin(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
 	tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]);
 
-	tm->tm_wday = BCD2BIN(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
-	tm->tm_mday = BCD2BIN(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
+	tm->tm_wday = bcd2bin(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
+	tm->tm_mday = bcd2bin(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
 
 	/* tm->tm_mon is zero-based */
-	tm->tm_mon = BCD2BIN(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
+	tm->tm_mon = bcd2bin(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
 
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(rs5c->regs[RS5C372_REG_YEAR]) + 100;
+	tm->tm_year = bcd2bin(rs5c->regs[RS5C372_REG_YEAR]) + 100;
 
 	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
 		"mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -216,13 +216,13 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
 	addr   = RS5C_ADDR(RS5C372_REG_SECS);
-	buf[0] = BIN2BCD(tm->tm_sec);
-	buf[1] = BIN2BCD(tm->tm_min);
+	buf[0] = bin2bcd(tm->tm_sec);
+	buf[1] = bin2bcd(tm->tm_min);
 	buf[2] = rs5c_hr2reg(rs5c, tm->tm_hour);
-	buf[3] = BIN2BCD(tm->tm_wday);
-	buf[4] = BIN2BCD(tm->tm_mday);
-	buf[5] = BIN2BCD(tm->tm_mon + 1);
-	buf[6] = BIN2BCD(tm->tm_year - 100);
+	buf[3] = bin2bcd(tm->tm_wday);
+	buf[4] = bin2bcd(tm->tm_mday);
+	buf[5] = bin2bcd(tm->tm_mon + 1);
+	buf[6] = bin2bcd(tm->tm_year - 100);
 
 	if (i2c_smbus_write_i2c_block_data(client, addr, sizeof(buf), buf) < 0) {
 		dev_err(&client->dev, "%s: write error\n", __func__);
@@ -367,7 +367,7 @@ static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 	/* report alarm time */
 	t->time.tm_sec = 0;
-	t->time.tm_min = BCD2BIN(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
+	t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
 	t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
 	t->time.tm_mday = -1;
 	t->time.tm_mon = -1;
@@ -413,7 +413,7 @@ static int rs5c_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	}
 
 	/* set alarm */
-	buf[0] = BIN2BCD(t->time.tm_min);
+	buf[0] = bin2bcd(t->time.tm_min);
 	buf[1] = rs5c_hr2reg(rs5c, t->time.tm_hour);
 	buf[2] = 0x7f;	/* any/all days */
 
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index a6fa1f2f2ca..def4d396d0b 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -104,12 +104,12 @@ static int s35390a_disable_test_mode(struct s35390a *s35390a)
 static char s35390a_hr2reg(struct s35390a *s35390a, int hour)
 {
 	if (s35390a->twentyfourhour)
-		return BIN2BCD(hour);
+		return bin2bcd(hour);
 
 	if (hour < 12)
-		return BIN2BCD(hour);
+		return bin2bcd(hour);
 
-	return 0x40 | BIN2BCD(hour - 12);
+	return 0x40 | bin2bcd(hour - 12);
 }
 
 static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
@@ -117,9 +117,9 @@ static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
 	unsigned hour;
 
 	if (s35390a->twentyfourhour)
-		return BCD2BIN(reg & 0x3f);
+		return bcd2bin(reg & 0x3f);
 
-	hour = BCD2BIN(reg & 0x3f);
+	hour = bcd2bin(reg & 0x3f);
 	if (reg & 0x40)
 		hour += 12;
 
@@ -137,13 +137,13 @@ static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year,
 		tm->tm_wday);
 
-	buf[S35390A_BYTE_YEAR] = BIN2BCD(tm->tm_year - 100);
-	buf[S35390A_BYTE_MONTH] = BIN2BCD(tm->tm_mon + 1);
-	buf[S35390A_BYTE_DAY] = BIN2BCD(tm->tm_mday);
-	buf[S35390A_BYTE_WDAY] = BIN2BCD(tm->tm_wday);
+	buf[S35390A_BYTE_YEAR] = bin2bcd(tm->tm_year - 100);
+	buf[S35390A_BYTE_MONTH] = bin2bcd(tm->tm_mon + 1);
+	buf[S35390A_BYTE_DAY] = bin2bcd(tm->tm_mday);
+	buf[S35390A_BYTE_WDAY] = bin2bcd(tm->tm_wday);
 	buf[S35390A_BYTE_HOURS] = s35390a_hr2reg(s35390a, tm->tm_hour);
-	buf[S35390A_BYTE_MINS] = BIN2BCD(tm->tm_min);
-	buf[S35390A_BYTE_SECS] = BIN2BCD(tm->tm_sec);
+	buf[S35390A_BYTE_MINS] = bin2bcd(tm->tm_min);
+	buf[S35390A_BYTE_SECS] = bin2bcd(tm->tm_sec);
 
 	/* This chip expects the bits of each byte to be in reverse order */
 	for (i = 0; i < 7; ++i)
@@ -168,13 +168,13 @@ static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 	for (i = 0; i < 7; ++i)
 		buf[i] = bitrev8(buf[i]);
 
-	tm->tm_sec = BCD2BIN(buf[S35390A_BYTE_SECS]);
-	tm->tm_min = BCD2BIN(buf[S35390A_BYTE_MINS]);
+	tm->tm_sec = bcd2bin(buf[S35390A_BYTE_SECS]);
+	tm->tm_min = bcd2bin(buf[S35390A_BYTE_MINS]);
 	tm->tm_hour = s35390a_reg2hr(s35390a, buf[S35390A_BYTE_HOURS]);
-	tm->tm_wday = BCD2BIN(buf[S35390A_BYTE_WDAY]);
-	tm->tm_mday = BCD2BIN(buf[S35390A_BYTE_DAY]);
-	tm->tm_mon = BCD2BIN(buf[S35390A_BYTE_MONTH]) - 1;
-	tm->tm_year = BCD2BIN(buf[S35390A_BYTE_YEAR]) + 100;
+	tm->tm_wday = bcd2bin(buf[S35390A_BYTE_WDAY]);
+	tm->tm_mday = bcd2bin(buf[S35390A_BYTE_DAY]);
+	tm->tm_mon = bcd2bin(buf[S35390A_BYTE_MONTH]) - 1;
+	tm->tm_year = bcd2bin(buf[S35390A_BYTE_YEAR]) + 100;
 
 	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, mday=%d, "
 		"mon=%d, year=%d, wday=%d\n", __func__, tm->tm_sec,
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index e7d19b6c265..910bc704939 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -134,12 +134,12 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 		 rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
 		 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
-	BCD_TO_BIN(rtc_tm->tm_sec);
-	BCD_TO_BIN(rtc_tm->tm_min);
-	BCD_TO_BIN(rtc_tm->tm_hour);
-	BCD_TO_BIN(rtc_tm->tm_mday);
-	BCD_TO_BIN(rtc_tm->tm_mon);
-	BCD_TO_BIN(rtc_tm->tm_year);
+	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	rtc_tm->tm_year += 100;
 	rtc_tm->tm_mon -= 1;
@@ -163,12 +163,12 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 		return -EINVAL;
 	}
 
-	writeb(BIN2BCD(tm->tm_sec),  base + S3C2410_RTCSEC);
-	writeb(BIN2BCD(tm->tm_min),  base + S3C2410_RTCMIN);
-	writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR);
-	writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE);
-	writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON);
-	writeb(BIN2BCD(year), base + S3C2410_RTCYEAR);
+	writeb(bin2bcd(tm->tm_sec),  base + S3C2410_RTCSEC);
+	writeb(bin2bcd(tm->tm_min),  base + S3C2410_RTCMIN);
+	writeb(bin2bcd(tm->tm_hour), base + S3C2410_RTCHOUR);
+	writeb(bin2bcd(tm->tm_mday), base + S3C2410_RTCDATE);
+	writeb(bin2bcd(tm->tm_mon + 1), base + S3C2410_RTCMON);
+	writeb(bin2bcd(year), base + S3C2410_RTCYEAR);
 
 	return 0;
 }
@@ -199,34 +199,34 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	/* decode the alarm enable field */
 
 	if (alm_en & S3C2410_RTCALM_SECEN)
-		BCD_TO_BIN(alm_tm->tm_sec);
+		alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
 	else
 		alm_tm->tm_sec = 0xff;
 
 	if (alm_en & S3C2410_RTCALM_MINEN)
-		BCD_TO_BIN(alm_tm->tm_min);
+		alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
 	else
 		alm_tm->tm_min = 0xff;
 
 	if (alm_en & S3C2410_RTCALM_HOUREN)
-		BCD_TO_BIN(alm_tm->tm_hour);
+		alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
 	else
 		alm_tm->tm_hour = 0xff;
 
 	if (alm_en & S3C2410_RTCALM_DAYEN)
-		BCD_TO_BIN(alm_tm->tm_mday);
+		alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday);
 	else
 		alm_tm->tm_mday = 0xff;
 
 	if (alm_en & S3C2410_RTCALM_MONEN) {
-		BCD_TO_BIN(alm_tm->tm_mon);
+		alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon);
 		alm_tm->tm_mon -= 1;
 	} else {
 		alm_tm->tm_mon = 0xff;
 	}
 
 	if (alm_en & S3C2410_RTCALM_YEAREN)
-		BCD_TO_BIN(alm_tm->tm_year);
+		alm_tm->tm_year = bcd2bin(alm_tm->tm_year);
 	else
 		alm_tm->tm_year = 0xffff;
 
@@ -250,17 +250,17 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	if (tm->tm_sec < 60 && tm->tm_sec >= 0) {
 		alrm_en |= S3C2410_RTCALM_SECEN;
-		writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC);
+		writeb(bin2bcd(tm->tm_sec), base + S3C2410_ALMSEC);
 	}
 
 	if (tm->tm_min < 60 && tm->tm_min >= 0) {
 		alrm_en |= S3C2410_RTCALM_MINEN;
-		writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN);
+		writeb(bin2bcd(tm->tm_min), base + S3C2410_ALMMIN);
 	}
 
 	if (tm->tm_hour < 24 && tm->tm_hour >= 0) {
 		alrm_en |= S3C2410_RTCALM_HOUREN;
-		writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR);
+		writeb(bin2bcd(tm->tm_hour), base + S3C2410_ALMHOUR);
 	}
 
 	pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en);
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index fcead4c4cd1..aaf9d6a337c 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -324,23 +324,23 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 		sec128 = readb(rtc->regbase + R64CNT);
 
-		tm->tm_sec	= BCD2BIN(readb(rtc->regbase + RSECCNT));
-		tm->tm_min	= BCD2BIN(readb(rtc->regbase + RMINCNT));
-		tm->tm_hour	= BCD2BIN(readb(rtc->regbase + RHRCNT));
-		tm->tm_wday	= BCD2BIN(readb(rtc->regbase + RWKCNT));
-		tm->tm_mday	= BCD2BIN(readb(rtc->regbase + RDAYCNT));
-		tm->tm_mon	= BCD2BIN(readb(rtc->regbase + RMONCNT)) - 1;
+		tm->tm_sec	= bcd2bin(readb(rtc->regbase + RSECCNT));
+		tm->tm_min	= bcd2bin(readb(rtc->regbase + RMINCNT));
+		tm->tm_hour	= bcd2bin(readb(rtc->regbase + RHRCNT));
+		tm->tm_wday	= bcd2bin(readb(rtc->regbase + RWKCNT));
+		tm->tm_mday	= bcd2bin(readb(rtc->regbase + RDAYCNT));
+		tm->tm_mon	= bcd2bin(readb(rtc->regbase + RMONCNT)) - 1;
 
 		if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
 			yr  = readw(rtc->regbase + RYRCNT);
-			yr100 = BCD2BIN(yr >> 8);
+			yr100 = bcd2bin(yr >> 8);
 			yr &= 0xff;
 		} else {
 			yr  = readb(rtc->regbase + RYRCNT);
-			yr100 = BCD2BIN((yr == 0x99) ? 0x19 : 0x20);
+			yr100 = bcd2bin((yr == 0x99) ? 0x19 : 0x20);
 		}
 
-		tm->tm_year = (yr100 * 100 + BCD2BIN(yr)) - 1900;
+		tm->tm_year = (yr100 * 100 + bcd2bin(yr)) - 1900;
 
 		sec2 = readb(rtc->regbase + R64CNT);
 		cf_bit = readb(rtc->regbase + RCR1) & RCR1_CF;
@@ -382,20 +382,20 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	tmp &= ~RCR2_START;
 	writeb(tmp, rtc->regbase + RCR2);
 
-	writeb(BIN2BCD(tm->tm_sec),  rtc->regbase + RSECCNT);
-	writeb(BIN2BCD(tm->tm_min),  rtc->regbase + RMINCNT);
-	writeb(BIN2BCD(tm->tm_hour), rtc->regbase + RHRCNT);
-	writeb(BIN2BCD(tm->tm_wday), rtc->regbase + RWKCNT);
-	writeb(BIN2BCD(tm->tm_mday), rtc->regbase + RDAYCNT);
-	writeb(BIN2BCD(tm->tm_mon + 1), rtc->regbase + RMONCNT);
+	writeb(bin2bcd(tm->tm_sec),  rtc->regbase + RSECCNT);
+	writeb(bin2bcd(tm->tm_min),  rtc->regbase + RMINCNT);
+	writeb(bin2bcd(tm->tm_hour), rtc->regbase + RHRCNT);
+	writeb(bin2bcd(tm->tm_wday), rtc->regbase + RWKCNT);
+	writeb(bin2bcd(tm->tm_mday), rtc->regbase + RDAYCNT);
+	writeb(bin2bcd(tm->tm_mon + 1), rtc->regbase + RMONCNT);
 
 	if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
-		year = (BIN2BCD((tm->tm_year + 1900) / 100) << 8) |
-			BIN2BCD(tm->tm_year % 100);
+		year = (bin2bcd((tm->tm_year + 1900) / 100) << 8) |
+			bin2bcd(tm->tm_year % 100);
 		writew(year, rtc->regbase + RYRCNT);
 	} else {
 		year = tm->tm_year % 100;
-		writeb(BIN2BCD(year), rtc->regbase + RYRCNT);
+		writeb(bin2bcd(year), rtc->regbase + RYRCNT);
 	}
 
 	/* Start RTC */
@@ -417,7 +417,7 @@ static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
 	byte = readb(rtc->regbase + reg_off);
 	if (byte & AR_ENB) {
 		byte &= ~AR_ENB;	/* strip the enable bit */
-		value = BCD2BIN(byte);
+		value = bcd2bin(byte);
 	}
 
 	return value;
@@ -455,7 +455,7 @@ static inline void sh_rtc_write_alarm_value(struct sh_rtc *rtc,
 	if (value < 0)
 		writeb(0, rtc->regbase + reg_off);
 	else
-		writeb(BIN2BCD(value) | AR_ENB,  rtc->regbase + reg_off);
+		writeb(bin2bcd(value) | AR_ENB,  rtc->regbase + reg_off);
 }
 
 static int sh_rtc_check_alarm(struct rtc_time *tm)
@@ -568,7 +568,7 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev)
 	struct sh_rtc *rtc;
 	struct resource *res;
 	unsigned int tmp;
-	int ret = -ENOENT;
+	int ret;
 
 	rtc = kzalloc(sizeof(struct sh_rtc), GFP_KERNEL);
 	if (unlikely(!rtc))
@@ -577,26 +577,33 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev)
 	spin_lock_init(&rtc->lock);
 
 	/* get periodic/carry/alarm irqs */
-	rtc->periodic_irq = platform_get_irq(pdev, 0);
-	if (unlikely(rtc->periodic_irq < 0)) {
+	ret = platform_get_irq(pdev, 0);
+	if (unlikely(ret < 0)) {
+		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for period\n");
 		goto err_badres;
 	}
+	rtc->periodic_irq = ret;
 
-	rtc->carry_irq = platform_get_irq(pdev, 1);
-	if (unlikely(rtc->carry_irq < 0)) {
+	ret = platform_get_irq(pdev, 1);
+	if (unlikely(ret < 0)) {
+		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for carry\n");
 		goto err_badres;
 	}
+	rtc->carry_irq = ret;
 
-	rtc->alarm_irq = platform_get_irq(pdev, 2);
-	if (unlikely(rtc->alarm_irq < 0)) {
+	ret = platform_get_irq(pdev, 2);
+	if (unlikely(ret < 0)) {
+		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for alarm\n");
 		goto err_badres;
 	}
+	rtc->alarm_irq = ret;
 
 	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
 	if (unlikely(res == NULL)) {
+		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IO resource\n");
 		goto err_badres;
 	}
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index 9a7e920315f..f4cd46e15af 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -82,14 +82,14 @@ static int stk17ta8_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	flags = readb(pdata->ioaddr + RTC_FLAGS);
 	writeb(flags | RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
 
-	writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-	writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-	writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-	writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-	writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-	writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-	writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
-	writeb(BIN2BCD((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
+	writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+	writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+	writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+	writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+	writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+	writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+	writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+	writeb(bin2bcd((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
 
 	writeb(flags & ~RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
 	return 0;
@@ -120,14 +120,14 @@ static int stk17ta8_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	year = readb(ioaddr + RTC_YEAR);
 	century = readb(ioaddr + RTC_CENTURY);
 	writeb(flags & ~RTC_READ, ioaddr + RTC_FLAGS);
-	tm->tm_sec = BCD2BIN(second);
-	tm->tm_min = BCD2BIN(minute);
-	tm->tm_hour = BCD2BIN(hour);
-	tm->tm_mday = BCD2BIN(day);
-	tm->tm_wday = BCD2BIN(week);
-	tm->tm_mon = BCD2BIN(month) - 1;
+	tm->tm_sec = bcd2bin(second);
+	tm->tm_min = bcd2bin(minute);
+	tm->tm_hour = bcd2bin(hour);
+	tm->tm_mday = bcd2bin(day);
+	tm->tm_wday = bcd2bin(week);
+	tm->tm_mon = bcd2bin(month) - 1;
 	/* year is 1900 + tm->tm_year */
-	tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
 	if (rtc_valid_tm(tm) < 0) {
 		dev_err(dev, "retrieved date/time is not valid.\n");
@@ -148,16 +148,16 @@ static void stk17ta8_rtc_update_alarm(struct rtc_plat_data *pdata)
 	writeb(flags | RTC_WRITE, ioaddr + RTC_FLAGS);
 
 	writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_mday),
+	       0x80 : bin2bcd(pdata->alrm_mday),
 	       ioaddr + RTC_DATE_ALARM);
 	writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_hour),
+	       0x80 : bin2bcd(pdata->alrm_hour),
 	       ioaddr + RTC_HOURS_ALARM);
 	writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_min),
+	       0x80 : bin2bcd(pdata->alrm_min),
 	       ioaddr + RTC_MINUTES_ALARM);
 	writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-	       0x80 : BIN2BCD(pdata->alrm_sec),
+	       0x80 : bin2bcd(pdata->alrm_sec),
 	       ioaddr + RTC_SECONDS_ALARM);
 	writeb(pdata->irqen ? RTC_INTS_AIE : 0, ioaddr + RTC_INTERRUPTS);
 	readb(ioaddr + RTC_FLAGS);	/* clear interrupts */
@@ -280,7 +280,6 @@ static struct bin_attribute stk17ta8_nvram_attr = {
 	.attr = {
 		.name = "nvram",
 		.mode = S_IRUGO | S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.size = RTC_OFFSET,
 	.read = stk17ta8_nvram_read,
diff --git a/drivers/rtc/rtc-twl4030.c b/drivers/rtc/rtc-twl4030.c
new file mode 100644
index 00000000000..abe87a4d266
--- /dev/null
+++ b/drivers/rtc/rtc-twl4030.c
@@ -0,0 +1,564 @@
+/*
+ * rtc-twl4030.c -- TWL4030 Real Time Clock interface
+ *
+ * Copyright (C) 2007 MontaVista Software, Inc
+ * Author: Alexandre Rusev <source@mvista.com>
+ *
+ * Based on original TI driver twl4030-rtc.c
+ *   Copyright (C) 2006 Texas Instruments, Inc.
+ *
+ * Based on rtc-omap.c
+ *   Copyright (C) 2003 MontaVista Software, Inc.
+ *   Author: George G. Davis <gdavis@mvista.com> or <source@mvista.com>
+ *   Copyright (C) 2006 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/rtc.h>
+#include <linux/bcd.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+
+#include <linux/i2c/twl4030.h>
+
+
+/*
+ * RTC block register offsets (use TWL_MODULE_RTC)
+ */
+#define REG_SECONDS_REG                          0x00
+#define REG_MINUTES_REG                          0x01
+#define REG_HOURS_REG                            0x02
+#define REG_DAYS_REG                             0x03
+#define REG_MONTHS_REG                           0x04
+#define REG_YEARS_REG                            0x05
+#define REG_WEEKS_REG                            0x06
+
+#define REG_ALARM_SECONDS_REG                    0x07
+#define REG_ALARM_MINUTES_REG                    0x08
+#define REG_ALARM_HOURS_REG                      0x09
+#define REG_ALARM_DAYS_REG                       0x0A
+#define REG_ALARM_MONTHS_REG                     0x0B
+#define REG_ALARM_YEARS_REG                      0x0C
+
+#define REG_RTC_CTRL_REG                         0x0D
+#define REG_RTC_STATUS_REG                       0x0E
+#define REG_RTC_INTERRUPTS_REG                   0x0F
+
+#define REG_RTC_COMP_LSB_REG                     0x10
+#define REG_RTC_COMP_MSB_REG                     0x11
+
+/* RTC_CTRL_REG bitfields */
+#define BIT_RTC_CTRL_REG_STOP_RTC_M              0x01
+#define BIT_RTC_CTRL_REG_ROUND_30S_M             0x02
+#define BIT_RTC_CTRL_REG_AUTO_COMP_M             0x04
+#define BIT_RTC_CTRL_REG_MODE_12_24_M            0x08
+#define BIT_RTC_CTRL_REG_TEST_MODE_M             0x10
+#define BIT_RTC_CTRL_REG_SET_32_COUNTER_M        0x20
+#define BIT_RTC_CTRL_REG_GET_TIME_M              0x40
+
+/* RTC_STATUS_REG bitfields */
+#define BIT_RTC_STATUS_REG_RUN_M                 0x02
+#define BIT_RTC_STATUS_REG_1S_EVENT_M            0x04
+#define BIT_RTC_STATUS_REG_1M_EVENT_M            0x08
+#define BIT_RTC_STATUS_REG_1H_EVENT_M            0x10
+#define BIT_RTC_STATUS_REG_1D_EVENT_M            0x20
+#define BIT_RTC_STATUS_REG_ALARM_M               0x40
+#define BIT_RTC_STATUS_REG_POWER_UP_M            0x80
+
+/* RTC_INTERRUPTS_REG bitfields */
+#define BIT_RTC_INTERRUPTS_REG_EVERY_M           0x03
+#define BIT_RTC_INTERRUPTS_REG_IT_TIMER_M        0x04
+#define BIT_RTC_INTERRUPTS_REG_IT_ALARM_M        0x08
+
+
+/* REG_SECONDS_REG through REG_YEARS_REG is how many registers? */
+#define ALL_TIME_REGS		6
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Supports 1 byte read from TWL4030 RTC register.
+ */
+static int twl4030_rtc_read_u8(u8 *data, u8 reg)
+{
+	int ret;
+
+	ret = twl4030_i2c_read_u8(TWL4030_MODULE_RTC, data, reg);
+	if (ret < 0)
+		pr_err("twl4030_rtc: Could not read TWL4030"
+		       "register %X - error %d\n", reg, ret);
+	return ret;
+}
+
+/*
+ * Supports 1 byte write to TWL4030 RTC registers.
+ */
+static int twl4030_rtc_write_u8(u8 data, u8 reg)
+{
+	int ret;
+
+	ret = twl4030_i2c_write_u8(TWL4030_MODULE_RTC, data, reg);
+	if (ret < 0)
+		pr_err("twl4030_rtc: Could not write TWL4030"
+		       "register %X - error %d\n", reg, ret);
+	return ret;
+}
+
+/*
+ * Cache the value for timer/alarm interrupts register; this is
+ * only changed by callers holding rtc ops lock (or resume).
+ */
+static unsigned char rtc_irq_bits;
+
+/*
+ * Enable timer and/or alarm interrupts.
+ */
+static int set_rtc_irq_bit(unsigned char bit)
+{
+	unsigned char val;
+	int ret;
+
+	val = rtc_irq_bits | bit;
+	ret = twl4030_rtc_write_u8(val, REG_RTC_INTERRUPTS_REG);
+	if (ret == 0)
+		rtc_irq_bits = val;
+
+	return ret;
+}
+
+/*
+ * Disable timer and/or alarm interrupts.
+ */
+static int mask_rtc_irq_bit(unsigned char bit)
+{
+	unsigned char val;
+	int ret;
+
+	val = rtc_irq_bits & ~bit;
+	ret = twl4030_rtc_write_u8(val, REG_RTC_INTERRUPTS_REG);
+	if (ret == 0)
+		rtc_irq_bits = val;
+
+	return ret;
+}
+
+static inline int twl4030_rtc_alarm_irq_set_state(int enabled)
+{
+	int ret;
+
+	if (enabled)
+		ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+	else
+		ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+
+	return ret;
+}
+
+static inline int twl4030_rtc_irq_set_state(int enabled)
+{
+	int ret;
+
+	if (enabled)
+		ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
+	else
+		ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
+
+	return ret;
+}
+
+/*
+ * Gets current TWL4030 RTC time and date parameters.
+ *
+ * The RTC's time/alarm representation is not what gmtime(3) requires
+ * Linux to use:
+ *
+ *  - Months are 1..12 vs Linux 0-11
+ *  - Years are 0..99 vs Linux 1900..N (we assume 21st century)
+ */
+static int twl4030_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	int ret;
+	u8 save_control;
+
+	ret = twl4030_rtc_read_u8(&save_control, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		return ret;
+
+	save_control |= BIT_RTC_CTRL_REG_GET_TIME_M;
+
+	ret = twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		return ret;
+
+	ret = twl4030_i2c_read(TWL4030_MODULE_RTC, rtc_data,
+			       REG_SECONDS_REG, ALL_TIME_REGS);
+
+	if (ret < 0) {
+		dev_err(dev, "rtc_read_time error %d\n", ret);
+		return ret;
+	}
+
+	tm->tm_sec = bcd2bin(rtc_data[0]);
+	tm->tm_min = bcd2bin(rtc_data[1]);
+	tm->tm_hour = bcd2bin(rtc_data[2]);
+	tm->tm_mday = bcd2bin(rtc_data[3]);
+	tm->tm_mon = bcd2bin(rtc_data[4]) - 1;
+	tm->tm_year = bcd2bin(rtc_data[5]) + 100;
+
+	return ret;
+}
+
+static int twl4030_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned char save_control;
+	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	int ret;
+
+	rtc_data[1] = bin2bcd(tm->tm_sec);
+	rtc_data[2] = bin2bcd(tm->tm_min);
+	rtc_data[3] = bin2bcd(tm->tm_hour);
+	rtc_data[4] = bin2bcd(tm->tm_mday);
+	rtc_data[5] = bin2bcd(tm->tm_mon + 1);
+	rtc_data[6] = bin2bcd(tm->tm_year - 100);
+
+	/* Stop RTC while updating the TC registers */
+	ret = twl4030_rtc_read_u8(&save_control, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		goto out;
+
+	save_control &= ~BIT_RTC_CTRL_REG_STOP_RTC_M;
+	twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		goto out;
+
+	/* update all the time registers in one shot */
+	ret = twl4030_i2c_write(TWL4030_MODULE_RTC, rtc_data,
+			REG_SECONDS_REG, ALL_TIME_REGS);
+	if (ret < 0) {
+		dev_err(dev, "rtc_set_time error %d\n", ret);
+		goto out;
+	}
+
+	/* Start back RTC */
+	save_control |= BIT_RTC_CTRL_REG_STOP_RTC_M;
+	ret = twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
+
+out:
+	return ret;
+}
+
+/*
+ * Gets current TWL4030 RTC alarm time.
+ */
+static int twl4030_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	int ret;
+
+	ret = twl4030_i2c_read(TWL4030_MODULE_RTC, rtc_data,
+			       REG_ALARM_SECONDS_REG, ALL_TIME_REGS);
+	if (ret < 0) {
+		dev_err(dev, "rtc_read_alarm error %d\n", ret);
+		return ret;
+	}
+
+	/* some of these fields may be wildcard/"match all" */
+	alm->time.tm_sec = bcd2bin(rtc_data[0]);
+	alm->time.tm_min = bcd2bin(rtc_data[1]);
+	alm->time.tm_hour = bcd2bin(rtc_data[2]);
+	alm->time.tm_mday = bcd2bin(rtc_data[3]);
+	alm->time.tm_mon = bcd2bin(rtc_data[4]) - 1;
+	alm->time.tm_year = bcd2bin(rtc_data[5]) + 100;
+
+	/* report cached alarm enable state */
+	if (rtc_irq_bits & BIT_RTC_INTERRUPTS_REG_IT_ALARM_M)
+		alm->enabled = 1;
+
+	return ret;
+}
+
+static int twl4030_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	unsigned char alarm_data[ALL_TIME_REGS + 1];
+	int ret;
+
+	ret = twl4030_rtc_alarm_irq_set_state(0);
+	if (ret)
+		goto out;
+
+	alarm_data[1] = bin2bcd(alm->time.tm_sec);
+	alarm_data[2] = bin2bcd(alm->time.tm_min);
+	alarm_data[3] = bin2bcd(alm->time.tm_hour);
+	alarm_data[4] = bin2bcd(alm->time.tm_mday);
+	alarm_data[5] = bin2bcd(alm->time.tm_mon + 1);
+	alarm_data[6] = bin2bcd(alm->time.tm_year - 100);
+
+	/* update all the alarm registers in one shot */
+	ret = twl4030_i2c_write(TWL4030_MODULE_RTC, alarm_data,
+			REG_ALARM_SECONDS_REG, ALL_TIME_REGS);
+	if (ret) {
+		dev_err(dev, "rtc_set_alarm error %d\n", ret);
+		goto out;
+	}
+
+	if (alm->enabled)
+		ret = twl4030_rtc_alarm_irq_set_state(1);
+out:
+	return ret;
+}
+
+#ifdef	CONFIG_RTC_INTF_DEV
+
+static int twl4030_rtc_ioctl(struct device *dev, unsigned int cmd,
+			     unsigned long arg)
+{
+	switch (cmd) {
+	case RTC_AIE_OFF:
+		return twl4030_rtc_alarm_irq_set_state(0);
+	case RTC_AIE_ON:
+		return twl4030_rtc_alarm_irq_set_state(1);
+	case RTC_UIE_OFF:
+		return twl4030_rtc_irq_set_state(0);
+	case RTC_UIE_ON:
+		return twl4030_rtc_irq_set_state(1);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
+#else
+#define	omap_rtc_ioctl	NULL
+#endif
+
+static irqreturn_t twl4030_rtc_interrupt(int irq, void *rtc)
+{
+	unsigned long events = 0;
+	int ret = IRQ_NONE;
+	int res;
+	u8 rd_reg;
+
+#ifdef CONFIG_LOCKDEP
+	/* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which
+	 * we don't want and can't tolerate.  Although it might be
+	 * friendlier not to borrow this thread context...
+	 */
+	local_irq_enable();
+#endif
+
+	res = twl4030_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
+	if (res)
+		goto out;
+	/*
+	 * Figure out source of interrupt: ALARM or TIMER in RTC_STATUS_REG.
+	 * only one (ALARM or RTC) interrupt source may be enabled
+	 * at time, we also could check our results
+	 * by reading RTS_INTERRUPTS_REGISTER[IT_TIMER,IT_ALARM]
+	 */
+	if (rd_reg & BIT_RTC_STATUS_REG_ALARM_M)
+		events |= RTC_IRQF | RTC_AF;
+	else
+		events |= RTC_IRQF | RTC_UF;
+
+	res = twl4030_rtc_write_u8(rd_reg | BIT_RTC_STATUS_REG_ALARM_M,
+				   REG_RTC_STATUS_REG);
+	if (res)
+		goto out;
+
+	/* Clear on Read enabled. RTC_IT bit of TWL4030_INT_PWR_ISR1
+	 * needs 2 reads to clear the interrupt. One read is done in
+	 * do_twl4030_pwrirq(). Doing the second read, to clear
+	 * the bit.
+	 *
+	 * FIXME the reason PWR_ISR1 needs an extra read is that
+	 * RTC_IF retriggered until we cleared REG_ALARM_M above.
+	 * But re-reading like this is a bad hack; by doing so we
+	 * risk wrongly clearing status for some other IRQ (losing
+	 * the interrupt).  Be smarter about handling RTC_UF ...
+	 */
+	res = twl4030_i2c_read_u8(TWL4030_MODULE_INT,
+			&rd_reg, TWL4030_INT_PWR_ISR1);
+	if (res)
+		goto out;
+
+	/* Notify RTC core on event */
+	rtc_update_irq(rtc, 1, events);
+
+	ret = IRQ_HANDLED;
+out:
+	return ret;
+}
+
+static struct rtc_class_ops twl4030_rtc_ops = {
+	.ioctl		= twl4030_rtc_ioctl,
+	.read_time	= twl4030_rtc_read_time,
+	.set_time	= twl4030_rtc_set_time,
+	.read_alarm	= twl4030_rtc_read_alarm,
+	.set_alarm	= twl4030_rtc_set_alarm,
+};
+
+/*----------------------------------------------------------------------*/
+
+static int __devinit twl4030_rtc_probe(struct platform_device *pdev)
+{
+	struct rtc_device *rtc;
+	int ret = 0;
+	int irq = platform_get_irq(pdev, 0);
+	u8 rd_reg;
+
+	if (irq < 0)
+		return irq;
+
+	rtc = rtc_device_register(pdev->name,
+				  &pdev->dev, &twl4030_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc)) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
+			PTR_ERR(rtc));
+		goto out0;
+
+	}
+
+	platform_set_drvdata(pdev, rtc);
+
+	ret = twl4030_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
+
+	if (ret < 0)
+		goto out1;
+
+	if (rd_reg & BIT_RTC_STATUS_REG_POWER_UP_M)
+		dev_warn(&pdev->dev, "Power up reset detected.\n");
+
+	if (rd_reg & BIT_RTC_STATUS_REG_ALARM_M)
+		dev_warn(&pdev->dev, "Pending Alarm interrupt detected.\n");
+
+	/* Clear RTC Power up reset and pending alarm interrupts */
+	ret = twl4030_rtc_write_u8(rd_reg, REG_RTC_STATUS_REG);
+	if (ret < 0)
+		goto out1;
+
+	ret = request_irq(irq, twl4030_rtc_interrupt,
+				IRQF_TRIGGER_RISING,
+				rtc->dev.bus_id, rtc);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "IRQ is not free.\n");
+		goto out1;
+	}
+
+	/* Check RTC module status, Enable if it is off */
+	ret = twl4030_rtc_read_u8(&rd_reg, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		goto out2;
+
+	if (!(rd_reg & BIT_RTC_CTRL_REG_STOP_RTC_M)) {
+		dev_info(&pdev->dev, "Enabling TWL4030-RTC.\n");
+		rd_reg = BIT_RTC_CTRL_REG_STOP_RTC_M;
+		ret = twl4030_rtc_write_u8(rd_reg, REG_RTC_CTRL_REG);
+		if (ret < 0)
+			goto out2;
+	}
+
+	/* init cached IRQ enable bits */
+	ret = twl4030_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG);
+	if (ret < 0)
+		goto out2;
+
+	return ret;
+
+
+out2:
+	free_irq(irq, rtc);
+out1:
+	rtc_device_unregister(rtc);
+out0:
+	return ret;
+}
+
+/*
+ * Disable all TWL4030 RTC module interrupts.
+ * Sets status flag to free.
+ */
+static int __devexit twl4030_rtc_remove(struct platform_device *pdev)
+{
+	/* leave rtc running, but disable irqs */
+	struct rtc_device *rtc = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
+
+	free_irq(irq, rtc);
+
+	rtc_device_unregister(rtc);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static void twl4030_rtc_shutdown(struct platform_device *pdev)
+{
+	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M |
+			 BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+}
+
+#ifdef CONFIG_PM
+
+static unsigned char irqstat;
+
+static int twl4030_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	irqstat = rtc_irq_bits;
+
+	/* REVISIT alarm may need to wake us from sleep */
+	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M |
+			 BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+	return 0;
+}
+
+static int twl4030_rtc_resume(struct platform_device *pdev)
+{
+	set_rtc_irq_bit(irqstat);
+	return 0;
+}
+
+#else
+#define twl4030_rtc_suspend NULL
+#define twl4030_rtc_resume  NULL
+#endif
+
+MODULE_ALIAS("platform:twl4030_rtc");
+
+static struct platform_driver twl4030rtc_driver = {
+	.probe		= twl4030_rtc_probe,
+	.remove		= __devexit_p(twl4030_rtc_remove),
+	.shutdown	= twl4030_rtc_shutdown,
+	.suspend	= twl4030_rtc_suspend,
+	.resume		= twl4030_rtc_resume,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "twl4030_rtc",
+	},
+};
+
+static int __init twl4030_rtc_init(void)
+{
+	return platform_driver_register(&twl4030rtc_driver);
+}
+module_init(twl4030_rtc_init);
+
+static void __exit twl4030_rtc_exit(void)
+{
+	platform_driver_unregister(&twl4030rtc_driver);
+}
+module_exit(twl4030_rtc_exit);
+
+MODULE_AUTHOR("Texas Instruments, MontaVista Software");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c
index 10025d84026..14d4f036a76 100644
--- a/drivers/rtc/rtc-v3020.c
+++ b/drivers/rtc/rtc-v3020.c
@@ -92,19 +92,19 @@ static int v3020_read_time(struct device *dev, struct rtc_time *dt)
 
 	/* ...and then read constant values. */
 	tmp = v3020_get_reg(chip, V3020_SECONDS);
-	dt->tm_sec	= BCD2BIN(tmp);
+	dt->tm_sec	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_MINUTES);
-	dt->tm_min	= BCD2BIN(tmp);
+	dt->tm_min	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_HOURS);
-	dt->tm_hour	= BCD2BIN(tmp);
+	dt->tm_hour	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_MONTH_DAY);
-	dt->tm_mday	= BCD2BIN(tmp);
+	dt->tm_mday	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_MONTH);
-	dt->tm_mon    = BCD2BIN(tmp) - 1;
+	dt->tm_mon    = bcd2bin(tmp) - 1;
 	tmp = v3020_get_reg(chip, V3020_WEEK_DAY);
-	dt->tm_wday	= BCD2BIN(tmp);
+	dt->tm_wday	= bcd2bin(tmp);
 	tmp = v3020_get_reg(chip, V3020_YEAR);
-	dt->tm_year = BCD2BIN(tmp)+100;
+	dt->tm_year = bcd2bin(tmp)+100;
 
 #ifdef DEBUG
 	printk("\n%s : Read RTC values\n",__func__);
@@ -136,13 +136,13 @@ static int v3020_set_time(struct device *dev, struct rtc_time *dt)
 #endif
 
 	/* Write all the values to ram... */
-	v3020_set_reg(chip, V3020_SECONDS, 	BIN2BCD(dt->tm_sec));
-	v3020_set_reg(chip, V3020_MINUTES, 	BIN2BCD(dt->tm_min));
-	v3020_set_reg(chip, V3020_HOURS, 	BIN2BCD(dt->tm_hour));
-	v3020_set_reg(chip, V3020_MONTH_DAY,	BIN2BCD(dt->tm_mday));
-	v3020_set_reg(chip, V3020_MONTH,     BIN2BCD(dt->tm_mon + 1));
-	v3020_set_reg(chip, V3020_WEEK_DAY, 	BIN2BCD(dt->tm_wday));
-	v3020_set_reg(chip, V3020_YEAR, 	BIN2BCD(dt->tm_year % 100));
+	v3020_set_reg(chip, V3020_SECONDS, 	bin2bcd(dt->tm_sec));
+	v3020_set_reg(chip, V3020_MINUTES, 	bin2bcd(dt->tm_min));
+	v3020_set_reg(chip, V3020_HOURS, 	bin2bcd(dt->tm_hour));
+	v3020_set_reg(chip, V3020_MONTH_DAY,	bin2bcd(dt->tm_mday));
+	v3020_set_reg(chip, V3020_MONTH,     bin2bcd(dt->tm_mon + 1));
+	v3020_set_reg(chip, V3020_WEEK_DAY, 	bin2bcd(dt->tm_wday));
+	v3020_set_reg(chip, V3020_YEAR, 	bin2bcd(dt->tm_year % 100));
 
 	/* ...and set the clock. */
 	v3020_set_reg(chip, V3020_CMD_RAM2CLOCK, 0);
diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 884b635f028..834dcc6d785 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -360,7 +360,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
 	spin_unlock_irq(&rtc_lock);
 
 	aie_irq = platform_get_irq(pdev, 0);
-	if (aie_irq < 0 || aie_irq >= NR_IRQS) {
+	if (aie_irq < 0 || aie_irq >= nr_irqs) {
 		retval = -EBUSY;
 		goto err_device_unregister;
 	}
@@ -371,7 +371,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
 		goto err_device_unregister;
 
 	pie_irq = platform_get_irq(pdev, 1);
-	if (pie_irq < 0 || pie_irq >= NR_IRQS)
+	if (pie_irq < 0 || pie_irq >= nr_irqs)
 		goto err_free_irq;
 
 	retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED,
diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c
index 7dcfba1bbfe..310c10795e9 100644
--- a/drivers/rtc/rtc-x1205.c
+++ b/drivers/rtc/rtc-x1205.c
@@ -118,13 +118,13 @@ static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm,
 		for (i = 0; i <= 4; i++)
 			buf[i] &= 0x7F;
 
-	tm->tm_sec = BCD2BIN(buf[CCR_SEC]);
-	tm->tm_min = BCD2BIN(buf[CCR_MIN]);
-	tm->tm_hour = BCD2BIN(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
-	tm->tm_mday = BCD2BIN(buf[CCR_MDAY]);
-	tm->tm_mon = BCD2BIN(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
-	tm->tm_year = BCD2BIN(buf[CCR_YEAR])
-			+ (BCD2BIN(buf[CCR_Y2K]) * 100) - 1900;
+	tm->tm_sec = bcd2bin(buf[CCR_SEC]);
+	tm->tm_min = bcd2bin(buf[CCR_MIN]);
+	tm->tm_hour = bcd2bin(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
+	tm->tm_mday = bcd2bin(buf[CCR_MDAY]);
+	tm->tm_mon = bcd2bin(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
+	tm->tm_year = bcd2bin(buf[CCR_YEAR])
+			+ (bcd2bin(buf[CCR_Y2K]) * 100) - 1900;
 	tm->tm_wday = buf[CCR_WDAY];
 
 	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
@@ -174,11 +174,11 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
 		__func__,
 		tm->tm_sec, tm->tm_min, tm->tm_hour);
 
-	buf[CCR_SEC] = BIN2BCD(tm->tm_sec);
-	buf[CCR_MIN] = BIN2BCD(tm->tm_min);
+	buf[CCR_SEC] = bin2bcd(tm->tm_sec);
+	buf[CCR_MIN] = bin2bcd(tm->tm_min);
 
 	/* set hour and 24hr bit */
-	buf[CCR_HOUR] = BIN2BCD(tm->tm_hour) | X1205_HR_MIL;
+	buf[CCR_HOUR] = bin2bcd(tm->tm_hour) | X1205_HR_MIL;
 
 	/* should we also set the date? */
 	if (datetoo) {
@@ -187,15 +187,15 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
 			__func__,
 			tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-		buf[CCR_MDAY] = BIN2BCD(tm->tm_mday);
+		buf[CCR_MDAY] = bin2bcd(tm->tm_mday);
 
 		/* month, 1 - 12 */
-		buf[CCR_MONTH] = BIN2BCD(tm->tm_mon + 1);
+		buf[CCR_MONTH] = bin2bcd(tm->tm_mon + 1);
 
 		/* year, since the rtc epoch*/
-		buf[CCR_YEAR] = BIN2BCD(tm->tm_year % 100);
+		buf[CCR_YEAR] = bin2bcd(tm->tm_year % 100);
 		buf[CCR_WDAY] = tm->tm_wday & 0x07;
-		buf[CCR_Y2K] = BIN2BCD(tm->tm_year / 100);
+		buf[CCR_Y2K] = bin2bcd(tm->tm_year / 100);
 	}
 
 	/* If writing alarm registers, set compare bits on registers 0-4 */
@@ -437,7 +437,7 @@ static int x1205_validate_client(struct i2c_client *client)
 			return -EIO;
 		}
 
-		value = BCD2BIN(reg & probe_limits_pattern[i].mask);
+		value = bcd2bin(reg & probe_limits_pattern[i].mask);
 
 		if (value > probe_limits_pattern[i].max ||
 			value < probe_limits_pattern[i].min) {
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index b5a868d85eb..1e5478abd90 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -337,7 +337,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc)
 #else
 #define IRQ_MIN 9
 #if defined(__PPC)
-#define IRQ_MAX (NR_IRQS-1)
+#define IRQ_MAX (nr_irqs-1)
 #else
 #define IRQ_MAX 12
 #endif
diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
index 69f8346aa28..5877f29a600 100644
--- a/drivers/scsi/arcmsr/arcmsr_attr.c
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -189,7 +189,6 @@ static struct bin_attribute arcmsr_sysfs_message_read_attr = {
 	.attr = {
 		.name = "mu_read",
 		.mode = S_IRUSR ,
-		.owner = THIS_MODULE,
 	},
 	.size = 1032,
 	.read = arcmsr_sysfs_iop_message_read,
@@ -199,7 +198,6 @@ static struct bin_attribute arcmsr_sysfs_message_write_attr = {
 	.attr = {
 		.name = "mu_write",
 		.mode = S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.size = 1032,
 	.write = arcmsr_sysfs_iop_message_write,
@@ -209,7 +207,6 @@ static struct bin_attribute arcmsr_sysfs_message_clear_attr = {
 	.attr = {
 		.name = "mu_clear",
 		.mode = S_IWUSR,
-		.owner = THIS_MODULE,
 	},
 	.size = 1,
 	.write = arcmsr_sysfs_iop_message_clear,
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 740bad43599..afc96e844a2 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -343,6 +343,11 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
 }
 
 #ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t idescsi_proc[] = {
+	{ "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
+	{ NULL, 0, NULL, NULL }
+};
+
 #define ide_scsi_devset_get(name, field) \
 static int get_##name(ide_drive_t *drive) \
 { \
@@ -378,6 +383,16 @@ static const struct ide_proc_devset idescsi_settings[] = {
 	IDE_PROC_DEVSET(transform, 0,	 3),
 	{ 0 },
 };
+
+static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive)
+{
+	return idescsi_proc;
+}
+
+static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive)
+{
+	return idescsi_settings;
+}
 #endif
 
 /*
@@ -419,13 +434,6 @@ static void ide_scsi_remove(ide_drive_t *drive)
 
 static int ide_scsi_probe(ide_drive_t *);
 
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t idescsi_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
-	{ NULL, 0, NULL, NULL }
-};
-#endif
-
 static ide_driver_t idescsi_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
@@ -439,8 +447,8 @@ static ide_driver_t idescsi_driver = {
 	.end_request		= idescsi_end_request,
 	.error                  = idescsi_atapi_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idescsi_proc,
-	.settings		= idescsi_settings,
+	.proc_entries		= ide_scsi_proc_entries,
+	.proc_devsets		= ide_scsi_proc_devsets,
 #endif
 };
 
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d30eb7ba018..098739deb02 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7859,7 +7859,6 @@ static struct pci_driver ipr_driver = {
 	.remove = ipr_remove,
 	.shutdown = ipr_shutdown,
 	.err_handler = &ipr_err_handler,
-	.dynids.use_driver_data = 1
 };
 
 /**
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 83c81921677..f25f41a499e 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2108,7 +2108,7 @@ struct scsi_qla_host;
 
 struct qla_msix_entry {
 	int have_irq;
-	uint16_t msix_vector;
+	uint32_t msix_vector;
 	uint16_t msix_entry;
 };
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 2aed4721c0d..21dd182ad51 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1566,9 +1566,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			goto probe_out;
 	}
 
-	if (pci_find_aer_capability(pdev))
-		if (pci_enable_pcie_error_reporting(pdev))
-			goto probe_out;
+	/* This may fail but that's ok */
+	pci_enable_pcie_error_reporting(pdev);
 
 	host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
 	if (host == NULL) {
diff --git a/drivers/scsi/sr_vendor.c b/drivers/scsi/sr_vendor.c
index 4eb3da996b3..4ad3e017213 100644
--- a/drivers/scsi/sr_vendor.c
+++ b/drivers/scsi/sr_vendor.c
@@ -223,9 +223,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
 				no_multi = 1;
 				break;
 			}
-			min = BCD2BIN(buffer[15]);
-			sec = BCD2BIN(buffer[16]);
-			frame = BCD2BIN(buffer[17]);
+			min = bcd2bin(buffer[15]);
+			sec = bcd2bin(buffer[16]);
+			frame = bcd2bin(buffer[17]);
 			sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
 			break;
 		}
@@ -252,9 +252,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
 			}
 			if (rc != 0)
 				break;
-			min = BCD2BIN(buffer[1]);
-			sec = BCD2BIN(buffer[2]);
-			frame = BCD2BIN(buffer[3]);
+			min = bcd2bin(buffer[1]);
+			sec = bcd2bin(buffer[2]);
+			frame = bcd2bin(buffer[3]);
 			sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
 			if (sector)
 				sector -= CD_MSF_OFFSET;
diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c
index 381b12ac20e..d935b2d04f9 100644
--- a/drivers/serial/68328serial.c
+++ b/drivers/serial/68328serial.c
@@ -66,7 +66,6 @@
 #endif
 
 static struct m68k_serial m68k_soft[NR_PORTS];
-struct m68k_serial *IRQ_ports[NR_IRQS];
 
 static unsigned int uart_irqs[NR_PORTS] = UART_IRQ_DEFNS;
 
@@ -375,15 +374,11 @@ clear_and_return:
  */
 irqreturn_t rs_interrupt(int irq, void *dev_id)
 {
-	struct m68k_serial * info;
+	struct m68k_serial *info = dev_id;
 	m68328_uart *uart;
 	unsigned short rx;
 	unsigned short tx;
 
-	info = IRQ_ports[irq];
-	if(!info)
-	    return IRQ_NONE;
-
 	uart = &uart_addr[info->line];
 	rx = uart->urx.w;
 
@@ -1383,8 +1378,6 @@ rs68328_init(void)
 		   info->port, info->irq);
 	    printk(" is a builtin MC68328 UART\n");
 	    
-	    IRQ_ports[info->irq] = info;	/* waste of space */
-
 #ifdef CONFIG_M68VZ328
 		if (i > 0 )
 			PJSEL &= 0xCF;  /* PSW enable second port output */
@@ -1393,7 +1386,7 @@ rs68328_init(void)
 	    if (request_irq(uart_irqs[i],
 			    rs_interrupt,
 			    IRQF_DISABLED,
-			    "M68328_UART", NULL))
+			    "M68328_UART", info))
                 panic("Unable to attach 68328 serial interrupt\n");
 	}
 	local_irq_restore(flags);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 1528de23a65..303272af386 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -156,11 +156,15 @@ struct uart_8250_port {
 };
 
 struct irq_info {
-	spinlock_t		lock;
+	struct			hlist_node node;
+	int			irq;
+	spinlock_t		lock;	/* Protects list not the hash */
 	struct list_head	*head;
 };
 
-static struct irq_info irq_lists[NR_IRQS];
+#define NR_IRQ_HASH		32	/* Can be adjusted later */
+static struct hlist_head irq_lists[NR_IRQ_HASH];
+static DEFINE_MUTEX(hash_mutex);	/* Used to walk the hash */
 
 /*
  * Here we define the default xmit fifo size used for each type of UART.
@@ -1545,15 +1549,43 @@ static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
 		BUG_ON(i->head != &up->list);
 		i->head = NULL;
 	}
-
 	spin_unlock_irq(&i->lock);
+	/* List empty so throw away the hash node */
+	if (i->head == NULL) {
+		hlist_del(&i->node);
+		kfree(i);
+	}
 }
 
 static int serial_link_irq_chain(struct uart_8250_port *up)
 {
-	struct irq_info *i = irq_lists + up->port.irq;
+	struct hlist_head *h;
+	struct hlist_node *n;
+	struct irq_info *i;
 	int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
 
+	mutex_lock(&hash_mutex);
+
+	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+	hlist_for_each(n, h) {
+		i = hlist_entry(n, struct irq_info, node);
+		if (i->irq == up->port.irq)
+			break;
+	}
+
+	if (n == NULL) {
+		i = kzalloc(sizeof(struct irq_info), GFP_KERNEL);
+		if (i == NULL) {
+			mutex_unlock(&hash_mutex);
+			return -ENOMEM;
+		}
+		spin_lock_init(&i->lock);
+		i->irq = up->port.irq;
+		hlist_add_head(&i->node, h);
+	}
+	mutex_unlock(&hash_mutex);
+
 	spin_lock_irq(&i->lock);
 
 	if (i->head) {
@@ -1577,14 +1609,28 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
 
 static void serial_unlink_irq_chain(struct uart_8250_port *up)
 {
-	struct irq_info *i = irq_lists + up->port.irq;
+	struct irq_info *i;
+	struct hlist_node *n;
+	struct hlist_head *h;
 
+	mutex_lock(&hash_mutex);
+
+	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+	hlist_for_each(n, h) {
+		i = hlist_entry(n, struct irq_info, node);
+		if (i->irq == up->port.irq)
+			break;
+	}
+
+	BUG_ON(n == NULL);
 	BUG_ON(i->head == NULL);
 
 	if (list_empty(i->head))
 		free_irq(up->port.irq, i);
 
 	serial_do_unlink(i, up);
+	mutex_unlock(&hash_mutex);
 }
 
 /* Base timer interval for polling */
@@ -2447,7 +2493,7 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 static int
 serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	if (ser->irq >= NR_IRQS || ser->irq < 0 ||
+	if (ser->irq >= nr_irqs || ser->irq < 0 ||
 	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
 	    ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS ||
 	    ser->type == PORT_STARTECH)
@@ -2967,7 +3013,7 @@ EXPORT_SYMBOL(serial8250_unregister_port);
 
 static int __init serial8250_init(void)
 {
-	int ret, i;
+	int ret;
 
 	if (nr_uarts > UART_NR)
 		nr_uarts = UART_NR;
@@ -2976,9 +3022,6 @@ static int __init serial8250_init(void)
 		"%d ports, IRQ sharing %sabled\n", nr_uarts,
 		share_irqs ? "en" : "dis");
 
-	for (i = 0; i < NR_IRQS; i++)
-		spin_lock_init(&irq_lists[i].lock);
-
 #ifdef CONFIG_SPARC
 	ret = sunserial_register_minors(&serial8250_reg, UART_NR);
 #else
@@ -3006,15 +3049,15 @@ static int __init serial8250_init(void)
 		goto out;
 
 	platform_device_del(serial8250_isa_devs);
- put_dev:
+put_dev:
 	platform_device_put(serial8250_isa_devs);
- unreg_uart_drv:
+unreg_uart_drv:
 #ifdef CONFIG_SPARC
 	sunserial_unregister_minors(&serial8250_reg, UART_NR);
 #else
 	uart_unregister_driver(&serial8250_reg);
 #endif
- out:
+out:
 	return ret;
 }
 
diff --git a/drivers/serial/8250_gsc.c b/drivers/serial/8250_gsc.c
index 0416ad3bc12..418b4fe9a0a 100644
--- a/drivers/serial/8250_gsc.c
+++ b/drivers/serial/8250_gsc.c
@@ -111,7 +111,7 @@ static struct parisc_driver serial_driver = {
 	.probe		= serial_init_chip,
 };
 
-int __init probe_serial_gsc(void)
+static int __init probe_serial_gsc(void)
 {
 	register_parisc_driver(&lasi_driver);
 	register_parisc_driver(&serial_driver);
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index db783b77a88..c94d3c4b752 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -457,7 +457,7 @@ config SERIAL_SAMSUNG
 
 config SERIAL_SAMSUNG_DEBUG
 	bool "Samsung SoC serial debug"
-	depends on SERIAL_SAMSUNG
+	depends on SERIAL_SAMSUNG && DEBUG_LL
 	help
 	  Add support for debugging the serial driver. Since this is
 	  generally being used as a console, we use our own output
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 90b56c2c31e..71562689116 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -512,7 +512,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
 	int ret = 0;
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 9d08f27208a..b7180046f8d 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -572,7 +572,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
 	int ret = 0;
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index a6c4d744495..bde4b4b0b80 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -623,7 +623,7 @@ static int cpm_uart_verify_port(struct uart_port *port,
 
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c
index 23d03051101..611c97a1565 100644
--- a/drivers/serial/m32r_sio.c
+++ b/drivers/serial/m32r_sio.c
@@ -922,7 +922,7 @@ static void m32r_sio_config_port(struct uart_port *port, int flags)
 static int
 m32r_sio_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	if (ser->irq >= NR_IRQS || ser->irq < 0 ||
+	if (ser->irq >= nr_irqs || ser->irq < 0 ||
 	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
 	    ser->type >= ARRAY_SIZE(uart_config))
 		return -EINVAL;
@@ -1162,7 +1162,7 @@ static int __init m32r_sio_init(void)
 
 	printk(KERN_INFO "Serial: M32R SIO driver\n");
 
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		spin_lock_init(&irq_lists[i].lock);
 
 	ret = uart_register_driver(&m32r_sio_reg);
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 6bdf3362e3b..874786a11fe 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -741,7 +741,7 @@ static int uart_set_info(struct uart_state *state,
 	if (port->ops->verify_port)
 		retval = port->ops->verify_port(port, &new_serial);
 
-	if ((new_serial.irq >= NR_IRQS) || (new_serial.irq < 0) ||
+	if ((new_serial.irq >= nr_irqs) || (new_serial.irq < 0) ||
 	    (new_serial.baud_base < 9600))
 		retval = -EINVAL;
 
diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c
index cb49a5ac022..61dc8b3daa2 100644
--- a/drivers/serial/serial_lh7a40x.c
+++ b/drivers/serial/serial_lh7a40x.c
@@ -460,7 +460,7 @@ static int lh7a40xuart_verify_port (struct uart_port* port,
 
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_LH7A40X)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600) /* *** FIXME: is this true? */
 		ret = -EINVAL;
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index 8fcb4c5b9a2..7313c2edcb8 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -1039,7 +1039,7 @@ static int __devinit serial_txx9_probe(struct platform_device *dev)
 		ret = serial_txx9_register_port(&port);
 		if (ret < 0) {
 			dev_err(&dev->dev, "unable to register port at index %d "
-				"(IO%x MEM%llx IRQ%d): %d\n", i,
+				"(IO%lx MEM%llx IRQ%d): %d\n", i,
 				p->iobase, (unsigned long long)p->mapbase,
 				p->irq, ret);
 		}
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 3df2aaec829..f0658d2c45b 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -3,7 +3,7 @@
  *
  * SuperH on-chip serial module support.  (SCI with no FIFO / with FIFO)
  *
- *  Copyright (C) 2002 - 2006  Paul Mundt
+ *  Copyright (C) 2002 - 2008  Paul Mundt
  *  Modified to support SH7720 SCIF. Markus Brunner, Mark Jonas (Jul 2007).
  *
  * based off of the old drivers/char/sh-sci.c by:
@@ -46,6 +46,7 @@
 #include <linux/cpufreq.h>
 #include <linux/clk.h>
 #include <linux/ctype.h>
+#include <linux/err.h>
 
 #ifdef CONFIG_SUPERH
 #include <asm/clock.h>
@@ -78,7 +79,7 @@ struct sci_port {
 	struct timer_list	break_timer;
 	int			break_flag;
 
-#ifdef CONFIG_SUPERH
+#ifdef CONFIG_HAVE_CLK
 	/* Port clock */
 	struct clk		*clk;
 #endif
@@ -831,7 +832,7 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_CPU_FREQ
+#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_HAVE_CLK)
 /*
  * Here we define a transistion notifier so that we can update all of our
  * ports' baud rate when the peripheral clock changes.
@@ -860,7 +861,7 @@ static int sci_notifier(struct notifier_block *self,
 			 * Clean this up later..
 			 */
 			clk = clk_get(NULL, "module_clk");
-			port->uartclk = clk_get_rate(clk) * 16;
+			port->uartclk = clk_get_rate(clk);
 			clk_put(clk);
 		}
 
@@ -873,7 +874,7 @@ static int sci_notifier(struct notifier_block *self,
 }
 
 static struct notifier_block sci_nb = { &sci_notifier, NULL, 0 };
-#endif /* CONFIG_CPU_FREQ */
+#endif /* CONFIG_CPU_FREQ && CONFIG_HAVE_CLK */
 
 static int sci_request_irq(struct sci_port *port)
 {
@@ -1008,7 +1009,7 @@ static int sci_startup(struct uart_port *port)
 	if (s->enable)
 		s->enable(port);
 
-#if defined(CONFIG_SUPERH) && !defined(CONFIG_SUPERH64)
+#ifdef CONFIG_HAVE_CLK
 	s->clk = clk_get(NULL, "module_clk");
 #endif
 
@@ -1030,7 +1031,7 @@ static void sci_shutdown(struct uart_port *port)
 	if (s->disable)
 		s->disable(port);
 
-#if defined(CONFIG_SUPERH) && !defined(CONFIG_SUPERH64)
+#ifdef CONFIG_HAVE_CLK
 	clk_put(s->clk);
 	s->clk = NULL;
 #endif
@@ -1041,24 +1042,11 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 {
 	struct sci_port *s = &sci_ports[port->line];
 	unsigned int status, baud, smr_val;
-	int t;
+	int t = -1;
 
 	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
-
-	switch (baud) {
-		case 0:
-			t = -1;
-			break;
-		default:
-		{
-#if defined(CONFIG_SUPERH) && !defined(CONFIG_SUPERH64)
-			t = SCBRR_VALUE(baud, clk_get_rate(s->clk));
-#else
-			t = SCBRR_VALUE(baud);
-#endif
-			break;
-		}
-	}
+	if (likely(baud))
+		t = SCBRR_VALUE(baud, port->uartclk);
 
 	do {
 		status = sci_in(port, SCxSR);
@@ -1113,7 +1101,7 @@ static const char *sci_type(struct uart_port *port)
 		case PORT_IRDA: return "irda";
 	}
 
-	return 0;
+	return NULL;
 }
 
 static void sci_release_port(struct uart_port *port)
@@ -1145,19 +1133,23 @@ static void sci_config_port(struct uart_port *port, int flags)
 		break;
 	}
 
-#if defined(CONFIG_CPU_SUBTYPE_SH5_101) || defined(CONFIG_CPU_SUBTYPE_SH5_103)
-	if (port->mapbase == 0)
+	if (port->flags & UPF_IOREMAP && !port->membase) {
+#if defined(CONFIG_SUPERH64)
 		port->mapbase = onchip_remap(SCIF_ADDR_SH5, 1024, "SCIF");
-
-	port->membase = (void __iomem *)port->mapbase;
+		port->membase = (void __iomem *)port->mapbase;
+#else
+		port->membase = ioremap_nocache(port->mapbase, 0x40);
 #endif
+
+		printk(KERN_ERR "sci: can't remap port#%d\n", port->line);
+	}
 }
 
 static int sci_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
 	struct sci_port *s = &sci_ports[port->line];
 
-	if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > NR_IRQS)
+	if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > nr_irqs)
 		return -EINVAL;
 	if (ser->baud_base < 2400)
 		/* No paper tape reader for Mitch.. */
@@ -1207,17 +1199,17 @@ static void __init sci_init_ports(void)
 		sci_ports[i].disable	= h8300_sci_disable;
 #endif
 		sci_ports[i].port.uartclk = CONFIG_CPU_CLOCK;
-#elif defined(CONFIG_SUPERH64)
-		sci_ports[i].port.uartclk = current_cpu_data.module_clock * 16;
-#else
+#elif defined(CONFIG_HAVE_CLK)
 		/*
 		 * XXX: We should use a proper SCI/SCIF clock
 		 */
 		{
 			struct clk *clk = clk_get(NULL, "module_clk");
-			sci_ports[i].port.uartclk = clk_get_rate(clk) * 16;
+			sci_ports[i].port.uartclk = clk_get_rate(clk);
 			clk_put(clk);
 		}
+#else
+#error "Need a valid uartclk"
 #endif
 
 		sci_ports[i].break_timer.data = (unsigned long)&sci_ports[i];
@@ -1285,7 +1277,7 @@ static int __init serial_console_setup(struct console *co, char *options)
 
 	port->type = serial_console_port->type;
 
-#if defined(CONFIG_SUPERH) && !defined(CONFIG_SUPERH64)
+#ifdef CONFIG_HAVE_CLK
 	if (!serial_console_port->clk)
 		serial_console_port->clk = clk_get(NULL, "module_clk");
 #endif
@@ -1436,7 +1428,7 @@ static struct uart_driver sci_uart_driver = {
 static int __devinit sci_probe(struct platform_device *dev)
 {
 	struct plat_sci_port *p = dev->dev.platform_data;
-	int i;
+	int i, ret = -EINVAL;
 
 	for (i = 0; p && p->flags != 0; p++, i++) {
 		struct sci_port *sciport = &sci_ports[i];
@@ -1453,12 +1445,22 @@ static int __devinit sci_probe(struct platform_device *dev)
 
 		sciport->port.mapbase	= p->mapbase;
 
-		/*
-		 * For the simple (and majority of) cases where we don't need
-		 * to do any remapping, just cast the cookie directly.
-		 */
-		if (p->mapbase && !p->membase && !(p->flags & UPF_IOREMAP))
-			p->membase = (void __iomem *)p->mapbase;
+		if (p->mapbase && !p->membase) {
+			if (p->flags & UPF_IOREMAP) {
+				p->membase = ioremap_nocache(p->mapbase, 0x40);
+				if (IS_ERR(p->membase)) {
+					ret = PTR_ERR(p->membase);
+					goto err_unreg;
+				}
+			} else {
+				/*
+				 * For the simple (and majority of) cases
+				 * where we don't need to do any remapping,
+				 * just cast the cookie directly.
+				 */
+				p->membase = (void __iomem *)p->mapbase;
+			}
+		}
 
 		sciport->port.membase	= p->membase;
 
@@ -1479,7 +1481,7 @@ static int __devinit sci_probe(struct platform_device *dev)
 	kgdb_putchar	= kgdb_sci_putchar;
 #endif
 
-#ifdef CONFIG_CPU_FREQ
+#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_HAVE_CLK)
 	cpufreq_register_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER);
 	dev_info(&dev->dev, "CPU frequency notifier registered\n");
 #endif
@@ -1489,6 +1491,12 @@ static int __devinit sci_probe(struct platform_device *dev)
 #endif
 
 	return 0;
+
+err_unreg:
+	for (i = i - 1; i >= 0; i--)
+		uart_remove_one_port(&sci_uart_driver, &sci_ports[i].port);
+
+	return ret;
 }
 
 static int __devexit sci_remove(struct platform_device *dev)
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index 8a0749e34ca..7cd28b22680 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -320,18 +320,16 @@
 #define SCI_EVENT_WRITE_WAKEUP	0
 
 #define SCI_IN(size, offset)					\
-  unsigned int addr = port->mapbase + (offset);			\
   if ((size) == 8) {						\
-    return ctrl_inb(addr);					\
+    return ioread8(port->membase + (offset));			\
   } else {							\
-    return ctrl_inw(addr);					\
+    return ioread16(port->membase + (offset));			\
   }
 #define SCI_OUT(size, offset, value)				\
-  unsigned int addr = port->mapbase + (offset);			\
   if ((size) == 8) {						\
-    ctrl_outb(value, addr);					\
+    iowrite8(value, port->membase + (offset));			\
   } else if ((size) == 16) {					\
-    ctrl_outw(value, addr);					\
+    iowrite16(value, port->membase + (offset));			\
   }
 
 #define CPU_SCIx_FNS(name, sci_offset, sci_size, scif_offset, scif_size)\
@@ -791,11 +789,16 @@ static inline int sci_rxd_in(struct uart_port *port)
       defined(CONFIG_CPU_SUBTYPE_SH7721)
 #define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(32*bps)-1)
 #elif defined(CONFIG_CPU_SUBTYPE_SH7723)
-#define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(16*bps)-1)
+static inline int scbrr_calc(struct uart_port *port, int bps, int clk)
+{
+	if (port->type == PORT_SCIF)
+		return (clk+16*bps)/(32*bps)-1;
+	else
+		return ((clk*2)+16*bps)/(16*bps)-1;
+}
+#define SCBRR_VALUE(bps, clk) scbrr_calc(port, bps, clk)
 #elif defined(__H8300H__) || defined(__H8300S__)
-#define SCBRR_VALUE(bps) (((CONFIG_CPU_CLOCK*1000/32)/bps)-1)
-#elif defined(CONFIG_SUPERH64)
-#define SCBRR_VALUE(bps) ((current_cpu_data.module_clock+16*bps)/(32*bps)-1)
+#define SCBRR_VALUE(bps, clk) (((clk*1000/32)/bps)-1)
 #else /* Generic SH */
 #define SCBRR_VALUE(bps, clk) ((clk+16*bps)/(32*bps)-1)
 #endif
diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c
index b73e3c0056c..d5276c012f7 100644
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -61,7 +61,7 @@
 #define SN_SAL_BUFFER_SIZE (64 * (1 << 10))
 
 #define SN_SAL_UART_FIFO_DEPTH 16
-#define SN_SAL_UART_FIFO_SPEED_CPS 9600/10
+#define SN_SAL_UART_FIFO_SPEED_CPS (9600/10)
 
 /* sn_transmit_chars() calling args */
 #define TRANSMIT_BUFFERED	0
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
index 539c933b335..315a9333ca3 100644
--- a/drivers/serial/ucc_uart.c
+++ b/drivers/serial/ucc_uart.c
@@ -1066,7 +1066,7 @@ static int qe_uart_verify_port(struct uart_port *port,
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
 		return -EINVAL;
 
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		return -EINVAL;
 
 	if (ser->baud_base < 9600)
diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile
index a96f4a8cfeb..6a025cefe6d 100644
--- a/drivers/sh/Makefile
+++ b/drivers/sh/Makefile
@@ -1,6 +1,6 @@
 #
 # Makefile for the SuperH specific drivers.
 #
-
 obj-$(CONFIG_SUPERHYWAY)	+= superhyway/
 obj-$(CONFIG_MAPLE)		+= maple/
+obj-y				+= intc.o
diff --git a/arch/sh/kernel/cpu/irq/intc.c b/drivers/sh/intc.c
index 8c70e201bde..58d24c5a76c 100644
--- a/arch/sh/kernel/cpu/irq/intc.c
+++ b/drivers/sh/intc.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/bootmem.h>
+#include <linux/sh_intc.h>
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
 	((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \
@@ -86,24 +87,24 @@ static inline unsigned int set_field(unsigned int value,
 
 static void write_8(unsigned long addr, unsigned long h, unsigned long data)
 {
-	ctrl_outb(set_field(0, data, h), addr);
+	__raw_writeb(set_field(0, data, h), addr);
 }
 
 static void write_16(unsigned long addr, unsigned long h, unsigned long data)
 {
-	ctrl_outw(set_field(0, data, h), addr);
+	__raw_writew(set_field(0, data, h), addr);
 }
 
 static void write_32(unsigned long addr, unsigned long h, unsigned long data)
 {
-	ctrl_outl(set_field(0, data, h), addr);
+	__raw_writel(set_field(0, data, h), addr);
 }
 
 static void modify_8(unsigned long addr, unsigned long h, unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	ctrl_outb(set_field(ctrl_inb(addr), data, h), addr);
+	__raw_writeb(set_field(__raw_readb(addr), data, h), addr);
 	local_irq_restore(flags);
 }
 
@@ -111,7 +112,7 @@ static void modify_16(unsigned long addr, unsigned long h, unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	ctrl_outw(set_field(ctrl_inw(addr), data, h), addr);
+	__raw_writew(set_field(__raw_readw(addr), data, h), addr);
 	local_irq_restore(flags);
 }
 
@@ -119,7 +120,7 @@ static void modify_32(unsigned long addr, unsigned long h, unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	ctrl_outl(set_field(ctrl_inl(addr), data, h), addr);
+	__raw_writel(set_field(__raw_readl(addr), data, h), addr);
 	local_irq_restore(flags);
 }
 
@@ -246,16 +247,16 @@ static void intc_mask_ack(unsigned int irq)
 		addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
 		switch (_INTC_FN(handle)) {
 		case REG_FN_MODIFY_BASE + 0:	/* 8bit */
-			ctrl_inb(addr);
-			ctrl_outb(0xff ^ set_field(0, 1, handle), addr);
+			__raw_readb(addr);
+			__raw_writeb(0xff ^ set_field(0, 1, handle), addr);
 			break;
 		case REG_FN_MODIFY_BASE + 1:	/* 16bit */
-			ctrl_inw(addr);
-			ctrl_outw(0xffff ^ set_field(0, 1, handle), addr);
+			__raw_readw(addr);
+			__raw_writew(0xffff ^ set_field(0, 1, handle), addr);
 			break;
 		case REG_FN_MODIFY_BASE + 3:	/* 32bit */
-			ctrl_inl(addr);
-			ctrl_outl(0xffffffff ^ set_field(0, 1, handle), addr);
+			__raw_readl(addr);
+			__raw_writel(0xffffffff ^ set_field(0, 1, handle), addr);
 			break;
 		default:
 			BUG();
@@ -464,9 +465,10 @@ static unsigned int __init intc_prio_data(struct intc_desc *desc,
 			}
 
 			fn += (pr->reg_width >> 3) - 1;
-			bit = pr->reg_width - ((j + 1) * pr->field_width);
 
-			BUG_ON(bit < 0);
+			BUG_ON((j + 1) * pr->field_width > pr->reg_width);
+
+			bit = pr->reg_width - ((j + 1) * pr->field_width);
 
 			return _INTC_MK(fn, mode,
 					intc_get_reg(d, reg_e),
@@ -531,9 +533,10 @@ static unsigned int __init intc_sense_data(struct intc_desc *desc,
 
 			fn = REG_FN_MODIFY_BASE;
 			fn += (sr->reg_width >> 3) - 1;
-			bit = sr->reg_width - ((j + 1) * sr->field_width);
 
-			BUG_ON(bit < 0);
+			BUG_ON((j + 1) * sr->field_width > sr->reg_width);
+
+			bit = sr->reg_width - ((j + 1) * sr->field_width);
 
 			return _INTC_MK(fn, 0, intc_get_reg(d, sr->reg),
 					0, sr->field_width, bit);
diff --git a/drivers/staging/go7007/Kconfig b/drivers/staging/go7007/Kconfig
index 57a121c338c..593fdb767aa 100644
--- a/drivers/staging/go7007/Kconfig
+++ b/drivers/staging/go7007/Kconfig
@@ -1,10 +1,12 @@
 config VIDEO_GO7007
 	tristate "Go 7007 support"
 	depends on VIDEO_DEV && PCI && I2C && INPUT
+	depends on SND
 	select VIDEOBUF_DMA_SG
 	select VIDEO_IR
 	select VIDEO_TUNER
 	select VIDEO_TVEEPROM
+	select SND_PCM
 	select CRC32
 	default N
 	---help---
diff --git a/drivers/staging/sxg/Kconfig b/drivers/staging/sxg/Kconfig
index 1ae35080660..6e6cf0b9ef9 100644
--- a/drivers/staging/sxg/Kconfig
+++ b/drivers/staging/sxg/Kconfig
@@ -1,6 +1,7 @@
 config SXG
 	tristate "Alacritech SLIC Technology Non-Accelerated 10Gbe support"
 	depends on PCI && NETDEV_10000
+	depends on X86
 	default n
 	help
 	  This driver supports the Alacritech SLIC Technology Non-Accelerated
diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c
index 4d74ba36c3a..37caf4d6903 100644
--- a/drivers/telephony/phonedev.c
+++ b/drivers/telephony/phonedev.c
@@ -54,7 +54,6 @@ static int phone_open(struct inode *inode, struct file *file)
 	if (minor >= PHONE_NUM_DEVICES)
 		return -ENODEV;
 
-	lock_kernel();
 	mutex_lock(&phone_lock);
 	p = phone_device[minor];
 	if (p)
@@ -81,7 +80,6 @@ static int phone_open(struct inode *inode, struct file *file)
 	fops_put(old_fops);
 end:
 	mutex_unlock(&phone_lock);
-	unlock_kernel();
 	return err;
 }
 
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 5dccf057a7d..f9b4647255a 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -47,6 +47,9 @@ static struct uio_class {
 	struct class *class;
 } *uio_class;
 
+/* Protect idr accesses */
+static DEFINE_MUTEX(minor_lock);
+
 /*
  * attributes
  */
@@ -239,7 +242,6 @@ static void uio_dev_del_attributes(struct uio_device *idev)
 
 static int uio_get_minor(struct uio_device *idev)
 {
-	static DEFINE_MUTEX(minor_lock);
 	int retval = -ENOMEM;
 	int id;
 
@@ -261,7 +263,9 @@ exit:
 
 static void uio_free_minor(struct uio_device *idev)
 {
+	mutex_lock(&minor_lock);
 	idr_remove(&uio_idr, idev->minor);
+	mutex_unlock(&minor_lock);
 }
 
 /**
@@ -305,8 +309,9 @@ static int uio_open(struct inode *inode, struct file *filep)
 	struct uio_listener *listener;
 	int ret = 0;
 
-	lock_kernel();
+	mutex_lock(&minor_lock);
 	idev = idr_find(&uio_idr, iminor(inode));
+	mutex_unlock(&minor_lock);
 	if (!idev) {
 		ret = -ENODEV;
 		goto out;
@@ -332,18 +337,15 @@ static int uio_open(struct inode *inode, struct file *filep)
 		if (ret)
 			goto err_infoopen;
 	}
-	unlock_kernel();
 	return 0;
 
 err_infoopen:
-
 	kfree(listener);
-err_alloc_listener:
 
+err_alloc_listener:
 	module_put(idev->owner);
 
 out:
-	unlock_kernel();
 	return ret;
 }
 
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index d343afacb0b..15a803b206b 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1111,8 +1111,8 @@ clean0:
 #ifdef DEBUG
 	debugfs_remove(ehci_debug_root);
 	ehci_debug_root = NULL;
-#endif
 err_debug:
+#endif
 	clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
 	return retval;
 }
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 6fa0b9d5559..d4cfed0b26d 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -19,7 +19,7 @@
 #include <linux/backlight.h>
 
 #include <cpu/dac.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 #include <asm/hd64461.h>
 
 #define HP680_MAX_INTENSITY 255
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 217c5118ae9..cd5f20da738 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1002,101 +1002,132 @@ fb_blank(struct fb_info *info, int blank)
  	return ret;
 }
 
-static int 
-fb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+static long
+fb_ioctl(struct file *file, unsigned int cmd,
 	 unsigned long arg)
 {
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int fbidx = iminor(inode);
-	struct fb_info *info = registered_fb[fbidx];
-	struct fb_ops *fb = info->fbops;
+	struct fb_info *info;
+	struct fb_ops *fb;
 	struct fb_var_screeninfo var;
 	struct fb_fix_screeninfo fix;
 	struct fb_con2fbmap con2fb;
 	struct fb_cmap_user cmap;
 	struct fb_event event;
 	void __user *argp = (void __user *)arg;
-	int i;
-	
-	if (!fb)
+	long ret = 0;
+
+	info = registered_fb[fbidx];
+	mutex_lock(&info->lock);
+	fb = info->fbops;
+
+	if (!fb) {
+		mutex_unlock(&info->lock);
 		return -ENODEV;
+	}
 	switch (cmd) {
 	case FBIOGET_VSCREENINFO:
-		return copy_to_user(argp, &info->var,
+		ret = copy_to_user(argp, &info->var,
 				    sizeof(var)) ? -EFAULT : 0;
+		break;
 	case FBIOPUT_VSCREENINFO:
-		if (copy_from_user(&var, argp, sizeof(var)))
-			return -EFAULT;
+		if (copy_from_user(&var, argp, sizeof(var))) {
+			ret =  -EFAULT;
+			break;
+		}
 		acquire_console_sem();
 		info->flags |= FBINFO_MISC_USEREVENT;
-		i = fb_set_var(info, &var);
+		ret = fb_set_var(info, &var);
 		info->flags &= ~FBINFO_MISC_USEREVENT;
 		release_console_sem();
-		if (i) return i;
-		if (copy_to_user(argp, &var, sizeof(var)))
-			return -EFAULT;
-		return 0;
+		if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
+			ret = -EFAULT;
+		break;
 	case FBIOGET_FSCREENINFO:
-		return copy_to_user(argp, &info->fix,
+		ret = copy_to_user(argp, &info->fix,
 				    sizeof(fix)) ? -EFAULT : 0;
+		break;
 	case FBIOPUTCMAP:
 		if (copy_from_user(&cmap, argp, sizeof(cmap)))
-			return -EFAULT;
-		return (fb_set_user_cmap(&cmap, info));
+			ret = -EFAULT;
+		else
+			ret = fb_set_user_cmap(&cmap, info);
+		break;
 	case FBIOGETCMAP:
 		if (copy_from_user(&cmap, argp, sizeof(cmap)))
-			return -EFAULT;
-		return fb_cmap_to_user(&info->cmap, &cmap);
+			ret = -EFAULT;
+		else
+			ret = fb_cmap_to_user(&info->cmap, &cmap);
+		break;
 	case FBIOPAN_DISPLAY:
-		if (copy_from_user(&var, argp, sizeof(var)))
-			return -EFAULT;
+		if (copy_from_user(&var, argp, sizeof(var))) {
+			ret = -EFAULT;
+			break;
+		}
 		acquire_console_sem();
-		i = fb_pan_display(info, &var);
+		ret = fb_pan_display(info, &var);
 		release_console_sem();
-		if (i)
-			return i;
-		if (copy_to_user(argp, &var, sizeof(var)))
-			return -EFAULT;
-		return 0;
+		if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
+			ret = -EFAULT;
+		break;
 	case FBIO_CURSOR:
-		return -EINVAL;
+		ret = -EINVAL;
+		break;
 	case FBIOGET_CON2FBMAP:
 		if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
-			return -EFAULT;
-		if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
-		    return -EINVAL;
-		con2fb.framebuffer = -1;
-		event.info = info;
-		event.data = &con2fb;
-		fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event);
-		return copy_to_user(argp, &con2fb,
+			ret = -EFAULT;
+		else if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
+			ret = -EINVAL;
+		else {
+			con2fb.framebuffer = -1;
+			event.info = info;
+			event.data = &con2fb;
+			fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP,
+								&event);
+			ret = copy_to_user(argp, &con2fb,
 				    sizeof(con2fb)) ? -EFAULT : 0;
+		}
+		break;
 	case FBIOPUT_CON2FBMAP:
-		if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
-			return - EFAULT;
-		if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
-		    return -EINVAL;
-		if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX)
-		    return -EINVAL;
-		if (!registered_fb[con2fb.framebuffer])
-		    request_module("fb%d", con2fb.framebuffer);
+		if (copy_from_user(&con2fb, argp, sizeof(con2fb))) {
+			ret = -EFAULT;
+			break;
+		}
+		if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) {
+			ret = -EINVAL;
+			break;
+		}
+		if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) {
+			ret = -EINVAL;
+			break;
+		}
 		if (!registered_fb[con2fb.framebuffer])
-		    return -EINVAL;
+			request_module("fb%d", con2fb.framebuffer);
+		if (!registered_fb[con2fb.framebuffer]) {
+			ret = -EINVAL;
+			break;
+		}
 		event.info = info;
 		event.data = &con2fb;
-		return fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
+		ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
 					      &event);
+		break;
 	case FBIOBLANK:
 		acquire_console_sem();
 		info->flags |= FBINFO_MISC_USEREVENT;
-		i = fb_blank(info, arg);
+		ret = fb_blank(info, arg);
 		info->flags &= ~FBINFO_MISC_USEREVENT;
 		release_console_sem();
-		return i;
+		break;;
 	default:
 		if (fb->fb_ioctl == NULL)
-			return -EINVAL;
-		return fb->fb_ioctl(info, cmd, arg);
+			ret = -ENOTTY;
+		else
+			ret = fb->fb_ioctl(info, cmd, arg);
 	}
+	mutex_unlock(&info->lock);
+	return ret;
 }
 
 #ifdef CONFIG_COMPAT
@@ -1150,7 +1181,7 @@ static int fb_getput_cmap(struct inode *inode, struct file *file,
 	    put_user(compat_ptr(data), &cmap->transp))
 		return -EFAULT;
 
-	err = fb_ioctl(inode, file, cmd, (unsigned long) cmap);
+	err = fb_ioctl(file, cmd, (unsigned long) cmap);
 
 	if (!err) {
 		if (copy_in_user(&cmap32->start,
@@ -1204,7 +1235,7 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file,
 
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
-	err = fb_ioctl(inode, file, cmd, (unsigned long) &fix);
+	err = fb_ioctl(file, cmd, (unsigned long) &fix);
 	set_fs(old_fs);
 
 	if (!err)
@@ -1222,7 +1253,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct fb_ops *fb = info->fbops;
 	long ret = -ENOIOCTLCMD;
 
-	lock_kernel();
+	mutex_lock(&info->lock);
 	switch(cmd) {
 	case FBIOGET_VSCREENINFO:
 	case FBIOPUT_VSCREENINFO:
@@ -1231,7 +1262,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case FBIOPUT_CON2FBMAP:
 		arg = (unsigned long) compat_ptr(arg);
 	case FBIOBLANK:
-		ret = fb_ioctl(inode, file, cmd, arg);
+		ret = fb_ioctl(file, cmd, arg);
 		break;
 
 	case FBIOGET_FSCREENINFO:
@@ -1248,7 +1279,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			ret = fb->fb_compat_ioctl(info, cmd, arg);
 		break;
 	}
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	return ret;
 }
 #endif
@@ -1270,13 +1301,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
 		return -ENODEV;
 	if (fb->fb_mmap) {
 		int res;
-		lock_kernel();
+		mutex_lock(&info->lock);
 		res = fb->fb_mmap(info, vma);
-		unlock_kernel();
+		mutex_unlock(&info->lock);
 		return res;
 	}
 
-	lock_kernel();
+	mutex_lock(&info->lock);
 
 	/* frame buffer memory */
 	start = info->fix.smem_start;
@@ -1285,13 +1316,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
 		/* memory mapped io */
 		off -= len;
 		if (info->var.accel_flags) {
-			unlock_kernel();
+			mutex_unlock(&info->lock);
 			return -EINVAL;
 		}
 		start = info->fix.mmio_start;
 		len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len);
 	}
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	start &= PAGE_MASK;
 	if ((vma->vm_end - vma->vm_start + off) > len)
 		return -EINVAL;
@@ -1315,13 +1346,13 @@ fb_open(struct inode *inode, struct file *file)
 
 	if (fbidx >= FB_MAX)
 		return -ENODEV;
-	lock_kernel();
-	if (!(info = registered_fb[fbidx]))
+	info = registered_fb[fbidx];
+	if (!info)
 		request_module("fb%d", fbidx);
-	if (!(info = registered_fb[fbidx])) {
-		res = -ENODEV;
-		goto out;
-	}
+	info = registered_fb[fbidx];
+	if (!info)
+		return -ENODEV;
+	mutex_lock(&info->lock);
 	if (!try_module_get(info->fbops->owner)) {
 		res = -ENODEV;
 		goto out;
@@ -1337,7 +1368,7 @@ fb_open(struct inode *inode, struct file *file)
 		fb_deferred_io_open(info, inode, file);
 #endif
 out:
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	return res;
 }
 
@@ -1346,11 +1377,11 @@ fb_release(struct inode *inode, struct file *file)
 {
 	struct fb_info * const info = file->private_data;
 
-	lock_kernel();
+	mutex_lock(&info->lock);
 	if (info->fbops->fb_release)
 		info->fbops->fb_release(info,1);
 	module_put(info->fbops->owner);
-	unlock_kernel();
+	mutex_unlock(&info->lock);
 	return 0;
 }
 
@@ -1358,7 +1389,7 @@ static const struct file_operations fb_fops = {
 	.owner =	THIS_MODULE,
 	.read =		fb_read,
 	.write =	fb_write,
-	.ioctl =	fb_ioctl,
+	.unlocked_ioctl = fb_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = fb_compat_ioctl,
 #endif
@@ -1429,6 +1460,7 @@ register_framebuffer(struct fb_info *fb_info)
 		if (!registered_fb[i])
 			break;
 	fb_info->node = i;
+	mutex_init(&fb_info->lock);
 
 	fb_info->dev = device_create(fb_class, fb_info->device,
 				     MKDEV(FB_MAJOR, i), NULL, "fb%d", i);
diff --git a/drivers/video/imacfb.c b/drivers/video/imacfb.c
deleted file mode 100644
index e69de29bb2d..00000000000
--- a/drivers/video/imacfb.c
+++ /dev/null
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 4c32c06579a..efff672fd7b 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -16,7 +16,7 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <asm/sh_mobile_lcdc.h>
+#include <video/sh_mobile_lcdc.h>
 
 #define PALETTE_NR 16
 
@@ -34,7 +34,9 @@ struct sh_mobile_lcdc_chan {
 
 struct sh_mobile_lcdc_priv {
 	void __iomem *base;
+#ifdef CONFIG_HAVE_CLK
 	struct clk *clk;
+#endif
 	unsigned long lddckr;
 	struct sh_mobile_lcdc_chan ch[2];
 };
@@ -260,6 +262,11 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
 		tmp = ch->ldmt1r_value;
 		tmp |= (lcd_cfg->sync & FB_SYNC_VERT_HIGH_ACT) ? 0 : 1 << 28;
 		tmp |= (lcd_cfg->sync & FB_SYNC_HOR_HIGH_ACT) ? 0 : 1 << 27;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_DWPOL) ? 1 << 26 : 0;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_DIPOL) ? 1 << 25 : 0;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_DAPOL) ? 1 << 24 : 0;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_HSCNT) ? 1 << 17 : 0;
+		tmp |= (ch->cfg.flags & LCDC_FLAGS_DWCNT) ? 1 << 16 : 0;
 		lcdc_write_chan(ch, LDMT1R, tmp);
 
 		/* setup SYS bus */
@@ -422,6 +429,7 @@ static int sh_mobile_lcdc_setup_clocks(struct device *dev, int clock_source,
 
 	priv->lddckr = icksel << 16;
 
+#ifdef CONFIG_HAVE_CLK
 	if (str) {
 		priv->clk = clk_get(dev, str);
 		if (IS_ERR(priv->clk)) {
@@ -431,6 +439,7 @@ static int sh_mobile_lcdc_setup_clocks(struct device *dev, int clock_source,
 
 		clk_enable(priv->clk);
 	}
+#endif
 
 	return 0;
 }
@@ -585,7 +594,6 @@ static int __init sh_mobile_lcdc_probe(struct platform_device *pdev)
 		goto err1;
 	}
 
-	priv->lddckr = pdata->lddckr;
 	priv->base = ioremap_nocache(res->start, (res->end - res->start) + 1);
 
 	for (i = 0; i < j; i++) {
@@ -688,10 +696,12 @@ static int sh_mobile_lcdc_remove(struct platform_device *pdev)
 		fb_dealloc_cmap(&info->cmap);
 	}
 
+#ifdef CONFIG_HAVE_CLK
 	if (priv->clk) {
 		clk_disable(priv->clk);
 		clk_put(priv->clk);
 	}
+#endif
 
 	if (priv->base)
 		iounmap(priv->base);
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index ed6b0576208..1f09d4e4144 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -80,7 +80,6 @@ static struct bin_attribute w1_ds2760_bin_attr = {
 	.attr = {
 		.name = "w1_slave",
 		.mode = S_IRUGO,
-		.owner = THIS_MODULE,
 	},
 	.size = DS2760_DATA_SIZE,
 	.read = w1_ds2760_read_bin,
diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c
index 05a28106e8e..8782ec1f5aa 100644
--- a/drivers/watchdog/ib700wdt.c
+++ b/drivers/watchdog/ib700wdt.c
@@ -154,7 +154,7 @@ static int ibwdt_set_heartbeat(int t)
 		return -EINVAL;
 
 	for (i = 0x0F; i > -1; i--)
-		if (wd_times[i] > t)
+		if (wd_times[i] >= t)
 			break;
 	wd_margin = i;
 	return 0;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index c3290bc186a..9ce1ab6c268 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -125,7 +125,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 
 	BUG_ON(irq == -1);
 #ifdef CONFIG_SMP
-	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+	irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
 #endif
 
 	__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
@@ -137,10 +137,12 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 static void init_evtchn_cpu_bindings(void)
 {
 #ifdef CONFIG_SMP
+	struct irq_desc *desc;
 	int i;
+
 	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < NR_IRQS; i++)
-		irq_desc[i].affinity = cpumask_of_cpu(0);
+	for_each_irq_desc(i, desc)
+		desc->affinity = cpumask_of_cpu(0);
 #endif
 
 	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -229,12 +231,12 @@ static int find_unbound_irq(void)
 	int irq;
 
 	/* Only allocate from dynirq range */
-	for (irq = 0; irq < NR_IRQS; irq++)
+	for_each_irq_nr(irq)
 		if (irq_bindcount[irq] == 0)
 			break;
 
-	if (irq == NR_IRQS)
-		panic("No available IRQ to bind to: increase NR_IRQS!\n");
+	if (irq == nr_irqs)
+		panic("No available IRQ to bind to: increase nr_irqs!\n");
 
 	return irq;
 }
@@ -790,7 +792,7 @@ void xen_irq_resume(void)
 		mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
-	for (irq = 0; irq < NR_IRQS; irq++)
+	for_each_irq_nr(irq)
 		irq_info[irq].evtchn = 0; /* zap event-channel binding */
 
 	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -822,7 +824,7 @@ void __init xen_init_IRQ(void)
 		mask_evtchn(i);
 
 	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for (i = 0; i < NR_IRQS; i++)
+	for_each_irq_nr(i)
 		irq_bindcount[i] = 0;
 
 	irq_ctx_init(smp_processor_id());
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c061c3f18e7..24eb01087b6 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -30,8 +30,8 @@
 #include <linux/parser.h>
 #include <linux/idr.h>
 #include <net/9p/9p.h>
-#include <net/9p/transport.h>
 #include <net/9p/client.h>
+#include <net/9p/transport.h>
 #include "v9fs.h"
 #include "v9fs_vfs.h"
 
@@ -234,7 +234,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	if (!v9ses->clnt->dotu)
 		v9ses->flags &= ~V9FS_EXTENDED;
 
-	v9ses->maxdata = v9ses->clnt->msize;
+	v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
 
 	/* for legacy mode, fall back to V9FS_ACCESS_ANY */
 	if (!v9fs_extended(v9ses) &&
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 57997fa14e6..c295ba786ed 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -46,9 +46,11 @@ extern struct dentry_operations v9fs_cached_dentry_operations;
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
-void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *);
+void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
-void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
+void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
 void v9fs_dentry_release(struct dentry *);
 int v9fs_uflags2omode(int uflags, int extended);
+
+ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 97d3aed5798..6fcb1e7095c 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -38,7 +38,6 @@
 
 #include "v9fs.h"
 #include "v9fs_vfs.h"
-#include "fid.h"
 
 /**
  * v9fs_vfs_readpage - read an entire page in from 9P
@@ -53,14 +52,12 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
 	int retval;
 	loff_t offset;
 	char *buffer;
-	struct p9_fid *fid;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "\n");
-	fid = filp->private_data;
 	buffer = kmap(page);
 	offset = page_offset(page);
 
-	retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE);
+	retval = v9fs_file_readn(filp, buffer, NULL, offset, PAGE_CACHE_SIZE);
 	if (retval < 0)
 		goto done;
 
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index e298fe19409..873cd31baa4 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -45,7 +45,7 @@
  *
  */
 
-static inline int dt_type(struct p9_stat *mistat)
+static inline int dt_type(struct p9_wstat *mistat)
 {
 	unsigned long perm = mistat->mode;
 	int rettype = DT_REG;
@@ -69,32 +69,58 @@ static inline int dt_type(struct p9_stat *mistat)
 static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	int over;
+	struct p9_wstat st;
+	int err;
 	struct p9_fid *fid;
-	struct v9fs_session_info *v9ses;
-	struct inode *inode;
-	struct p9_stat *st;
+	int buflen;
+	char *statbuf;
+	int n, i = 0;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
-	inode = filp->f_path.dentry->d_inode;
-	v9ses = v9fs_inode2v9ses(inode);
 	fid = filp->private_data;
-	while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) {
-		if (IS_ERR(st))
-			return PTR_ERR(st);
 
-		over = filldir(dirent, st->name.str, st->name.len, filp->f_pos,
-			v9fs_qid2ino(&st->qid), dt_type(st));
+	buflen = fid->clnt->msize - P9_IOHDRSZ;
+	statbuf = kmalloc(buflen, GFP_KERNEL);
+	if (!statbuf)
+		return -ENOMEM;
 
-		if (over)
+	while (1) {
+		err = v9fs_file_readn(filp, statbuf, NULL, buflen,
+								fid->rdir_fpos);
+		if (err <= 0)
 			break;
 
-		filp->f_pos += st->size;
-		kfree(st);
-		st = NULL;
+		n = err;
+		while (i < n) {
+			err = p9stat_read(statbuf + i, buflen-i, &st,
+							fid->clnt->dotu);
+			if (err) {
+				P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
+				err = -EIO;
+				p9stat_free(&st);
+				goto free_and_exit;
+			}
+
+			i += st.size+2;
+			fid->rdir_fpos += st.size+2;
+
+			over = filldir(dirent, st.name, strlen(st.name),
+			    filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st));
+
+			filp->f_pos += st.size+2;
+
+			p9stat_free(&st);
+
+			if (over) {
+				err = 0;
+				goto free_and_exit;
+			}
+		}
 	}
 
-	kfree(st);
-	return 0;
+free_and_exit:
+	kfree(statbuf);
+	return err;
 }
 
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 52944d2249a..041c5269228 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -120,23 +120,72 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 }
 
 /**
- * v9fs_file_read - read from a file
+ * v9fs_file_readn - read from a file
  * @filp: file pointer to read
  * @data: data buffer to read data into
+ * @udata: user data buffer to read data into
  * @count: size of buffer
  * @offset: offset at which to read data
  *
  */
+
+ssize_t
+v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
+	       u64 offset)
+{
+	int n, total;
+	struct p9_fid *fid = filp->private_data;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
+					(long long unsigned) offset, count);
+
+	n = 0;
+	total = 0;
+	do {
+		n = p9_client_read(fid, data, udata, offset, count);
+		if (n <= 0)
+			break;
+
+		if (data)
+			data += n;
+		if (udata)
+			udata += n;
+
+		offset += n;
+		count -= n;
+		total += n;
+	} while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ));
+
+	if (n < 0)
+		total = n;
+
+	return total;
+}
+
+/**
+ * v9fs_file_read - read from a file
+ * @filp: file pointer to read
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+
 static ssize_t
-v9fs_file_read(struct file *filp, char __user * data, size_t count,
+v9fs_file_read(struct file *filp, char __user *udata, size_t count,
 	       loff_t * offset)
 {
 	int ret;
 	struct p9_fid *fid;
 
-	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+	P9_DPRINTK(P9_DEBUG_VFS, "count %d offset %lld\n", count, *offset);
 	fid = filp->private_data;
-	ret = p9_client_uread(fid, data, *offset, count);
+
+	if (count > (fid->clnt->msize - P9_IOHDRSZ))
+		ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
+	else
+		ret = p9_client_read(fid, NULL, udata, *offset, count);
+
 	if (ret > 0)
 		*offset += ret;
 
@@ -156,19 +205,38 @@ static ssize_t
 v9fs_file_write(struct file *filp, const char __user * data,
 		size_t count, loff_t * offset)
 {
-	int ret;
+	int n, rsize, total = 0;
 	struct p9_fid *fid;
+	struct p9_client *clnt;
 	struct inode *inode = filp->f_path.dentry->d_inode;
+	int origin = *offset;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
 		(int)count, (int)*offset);
 
 	fid = filp->private_data;
-	ret = p9_client_uwrite(fid, data, *offset, count);
-	if (ret > 0) {
-		invalidate_inode_pages2_range(inode->i_mapping, *offset,
-								*offset+ret);
-		*offset += ret;
+	clnt = fid->clnt;
+
+	rsize = fid->iounit;
+	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
+		rsize = clnt->msize - P9_IOHDRSZ;
+
+	do {
+		if (count < rsize)
+			rsize = count;
+
+		n = p9_client_write(fid, NULL, data+total, *offset+total,
+									rsize);
+		if (n <= 0)
+			break;
+		count -= n;
+		total += n;
+	} while (count > 0);
+
+	if (total > 0) {
+		invalidate_inode_pages2_range(inode->i_mapping, origin,
+								origin+total);
+		*offset += total;
 	}
 
 	if (*offset > inode->i_size) {
@@ -176,7 +244,10 @@ v9fs_file_write(struct file *filp, const char __user * data,
 		inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
 	}
 
-	return ret;
+	if (n < 0)
+		return n;
+
+	return total;
 }
 
 static const struct file_operations v9fs_cached_file_operations = {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e83aa5ebe86..8314d3f43b7 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -334,7 +334,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 {
 	int err, umode;
 	struct inode *ret;
-	struct p9_stat *st;
+	struct p9_wstat *st;
 
 	ret = NULL;
 	st = p9_client_stat(fid);
@@ -417,6 +417,8 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	struct p9_fid *dfid, *ofid, *fid;
 	struct inode *inode;
 
+	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+
 	err = 0;
 	ofid = NULL;
 	fid = NULL;
@@ -424,6 +426,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	dfid = v9fs_fid_clone(dentry->d_parent);
 	if (IS_ERR(dfid)) {
 		err = PTR_ERR(dfid);
+		P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err);
 		dfid = NULL;
 		goto error;
 	}
@@ -432,18 +435,22 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	ofid = p9_client_walk(dfid, 0, NULL, 1);
 	if (IS_ERR(ofid)) {
 		err = PTR_ERR(ofid);
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
 		ofid = NULL;
 		goto error;
 	}
 
 	err = p9_client_fcreate(ofid, name, perm, mode, extension);
-	if (err < 0)
+	if (err < 0) {
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
 		goto error;
+	}
 
 	/* now walk from the parent so we can get unopened fid */
 	fid = p9_client_walk(dfid, 1, &name, 0);
 	if (IS_ERR(fid)) {
 		err = PTR_ERR(fid);
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
 		fid = NULL;
 		goto error;
 	} else
@@ -453,6 +460,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
+		P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
 		goto error;
 	}
 
@@ -734,7 +742,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	int err;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid;
-	struct p9_stat *st;
+	struct p9_wstat *st;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
 	err = -EPERM;
@@ -815,10 +823,9 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
  */
 
 void
-v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
+v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 	struct super_block *sb)
 {
-	int n;
 	char ext[32];
 	struct v9fs_session_info *v9ses = sb->s_fs_info;
 
@@ -842,11 +849,7 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
 		int major = -1;
 		int minor = -1;
 
-		n = stat->extension.len;
-		if (n > sizeof(ext)-1)
-			n = sizeof(ext)-1;
-		memmove(ext, stat->extension.str, n);
-		ext[n] = 0;
+		strncpy(ext, stat->extension, sizeof(ext));
 		sscanf(ext, "%c %u %u", &type, &major, &minor);
 		switch (type) {
 		case 'c':
@@ -857,10 +860,11 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
 			break;
 		default:
 			P9_DPRINTK(P9_DEBUG_ERROR,
-				"Unknown special type %c (%.*s)\n", type,
-				stat->extension.len, stat->extension.str);
+				"Unknown special type %c %s\n", type,
+				stat->extension);
 		};
 		inode->i_rdev = MKDEV(major, minor);
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 	} else
 		inode->i_rdev = 0;
 
@@ -904,7 +908,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid;
-	struct p9_stat *st;
+	struct p9_wstat *st;
 
 	P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
 	retval = -EPERM;
@@ -926,15 +930,10 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	}
 
 	/* copy extension buffer into buffer */
-	if (st->extension.len < buflen)
-		buflen = st->extension.len + 1;
-
-	memmove(buffer, st->extension.str, buflen - 1);
-	buffer[buflen-1] = 0;
+	strncpy(buffer, st->extension, buflen);
 
 	P9_DPRINTK(P9_DEBUG_VFS,
-		"%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len,
-		st->extension.str, buffer);
+		"%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer);
 
 	retval = buflen;
 
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bf59c396049..d6cb1a0ca72 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -111,7 +111,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	struct inode *inode = NULL;
 	struct dentry *root = NULL;
 	struct v9fs_session_info *v9ses = NULL;
-	struct p9_stat *st = NULL;
+	struct p9_wstat *st = NULL;
 	int mode = S_IRWXUGO | S_ISVTX;
 	uid_t uid = current->fsuid;
 	gid_t gid = current->fsgid;
@@ -161,10 +161,14 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 
 	sb->s_root = root;
 	root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
+
 	v9fs_stat2inode(st, root->d_inode, sb);
+
 	v9fs_fid_add(root, fid);
+	p9stat_free(st);
 	kfree(st);
 
+P9_DPRINTK(P9_DEBUG_VFS, " return simple set mount\n");
 	return simple_set_mnt(mnt, sb);
 
 release_sb:
diff --git a/fs/Kconfig b/fs/Kconfig
index d0a1174fb51..e46297f020c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,61 +6,9 @@ menu "File systems"
 
 if BLOCK
 
-config EXT2_FS
-	tristate "Second extended fs support"
-	help
-	  Ext2 is a standard Linux file system for hard disks.
-
-	  To compile this file system support as a module, choose M here: the
-	  module will be called ext2.
-
-	  If unsure, say Y.
-
-config EXT2_FS_XATTR
-	bool "Ext2 extended attributes"
-	depends on EXT2_FS
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-config EXT2_FS_POSIX_ACL
-	bool "Ext2 POSIX Access Control Lists"
-	depends on EXT2_FS_XATTR
-	select FS_POSIX_ACL
-	help
-	  Posix Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the Posix ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
-
-config EXT2_FS_SECURITY
-	bool "Ext2 Security Labels"
-	depends on EXT2_FS_XATTR
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the ext2 filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
-config EXT2_FS_XIP
-	bool "Ext2 execute in place support"
-	depends on EXT2_FS && MMU
-	help
-	  Execute in place can be used on memory-backed block devices. If you
-	  enable this option, you can select to mount block devices which are
-	  capable of this feature without using the page cache.
-
-	  If you do not use a block device that is capable of using this,
-	  or if unsure, say N.
+source "fs/ext2/Kconfig"
+source "fs/ext3/Kconfig"
+source "fs/ext4/Kconfig"
 
 config FS_XIP
 # execute in place
@@ -68,218 +16,8 @@ config FS_XIP
 	depends on EXT2_FS_XIP
 	default y
 
-config EXT3_FS
-	tristate "Ext3 journalling file system support"
-	select JBD
-	help
-	  This is the journalling version of the Second extended file system
-	  (often called ext3), the de facto standard Linux file system
-	  (method to organize files on a storage device) for hard disks.
-
-	  The journalling code included in this driver means you do not have
-	  to run e2fsck (file system checker) on your file systems after a
-	  crash.  The journal keeps track of any changes that were being made
-	  at the time the system crashed, and can ensure that your file system
-	  is consistent without the need for a lengthy check.
-
-	  Other than adding the journal to the file system, the on-disk format
-	  of ext3 is identical to ext2.  It is possible to freely switch
-	  between using the ext3 driver and the ext2 driver, as long as the
-	  file system has been cleanly unmounted, or e2fsck is run on the file
-	  system.
-
-	  To add a journal on an existing ext2 file system or change the
-	  behavior of ext3 file systems, you can use the tune2fs utility ("man
-	  tune2fs").  To modify attributes of files and directories on ext3
-	  file systems, use chattr ("man chattr").  You need to be using
-	  e2fsprogs version 1.20 or later in order to create ext3 journals
-	  (available at <http://sourceforge.net/projects/e2fsprogs/>).
-
-	  To compile this file system support as a module, choose M here: the
-	  module will be called ext3.
-
-config EXT3_FS_XATTR
-	bool "Ext3 extended attributes"
-	depends on EXT3_FS
-	default y
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-	  You need this for POSIX ACL support on ext3.
-
-config EXT3_FS_POSIX_ACL
-	bool "Ext3 POSIX Access Control Lists"
-	depends on EXT3_FS_XATTR
-	select FS_POSIX_ACL
-	help
-	  Posix Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the Posix ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
-
-config EXT3_FS_SECURITY
-	bool "Ext3 Security Labels"
-	depends on EXT3_FS_XATTR
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the ext3 filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
-config EXT4_FS
-	tristate "The Extended 4 (ext4) filesystem"
-	select JBD2
-	select CRC16
-	help
-	  This is the next generation of the ext3 filesystem.
-
-	  Unlike the change from ext2 filesystem to ext3 filesystem,
-	  the on-disk format of ext4 is not forwards compatible with
-	  ext3; it is based on extent maps and it supports 48-bit
-	  physical block numbers.  The ext4 filesystem also supports delayed
-	  allocation, persistent preallocation, high resolution time stamps,
-	  and a number of other features to improve performance and speed
-	  up fsck time.  For more information, please see the web pages at
-	  http://ext4.wiki.kernel.org.
-
-	  The ext4 filesystem will support mounting an ext3
-	  filesystem; while there will be some performance gains from
-	  the delayed allocation and inode table readahead, the best
-	  performance gains will require enabling ext4 features in the
-	  filesystem, or formating a new filesystem as an ext4
-	  filesystem initially.
-
-	  To compile this file system support as a module, choose M here. The
-	  module will be called ext4.
-
-	  If unsure, say N.
-
-config EXT4DEV_COMPAT
-	bool "Enable ext4dev compatibility"
-	depends on EXT4_FS
-	help
-	  Starting with 2.6.28, the name of the ext4 filesystem was
-	  renamed from ext4dev to ext4.  Unfortunately there are some
-	  legacy userspace programs (such as klibc's fstype) have
-	  "ext4dev" hardcoded.
-
-	  To enable backwards compatibility so that systems that are
-	  still expecting to mount ext4 filesystems using ext4dev,
-	  chose Y here.   This feature will go away by 2.6.31, so
-	  please arrange to get your userspace programs fixed!
-
-config EXT4_FS_XATTR
-	bool "Ext4 extended attributes"
-	depends on EXT4_FS
-	default y
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-	  You need this for POSIX ACL support on ext4.
-
-config EXT4_FS_POSIX_ACL
-	bool "Ext4 POSIX Access Control Lists"
-	depends on EXT4_FS_XATTR
-	select FS_POSIX_ACL
-	help
-	  POSIX Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the POSIX ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
-
-config EXT4_FS_SECURITY
-	bool "Ext4 Security Labels"
-	depends on EXT4_FS_XATTR
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the ext4 filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
-config JBD
-	tristate
-	help
-	  This is a generic journalling layer for block devices.  It is
-	  currently used by the ext3 file system, but it could also be
-	  used to add journal support to other file systems or block
-	  devices such as RAID or LVM.
-
-	  If you are using the ext3 file system, you need to say Y here.
-	  If you are not using ext3 then you will probably want to say N.
-
-	  To compile this device as a module, choose M here: the module will be
-	  called jbd.  If you are compiling ext3 into the kernel, you
-	  cannot compile this code as a module.
-
-config JBD_DEBUG
-	bool "JBD (ext3) debugging support"
-	depends on JBD && DEBUG_FS
-	help
-	  If you are using the ext3 journaled file system (or potentially any
-	  other file system/device using JBD), this option allows you to
-	  enable debugging output while the system is running, in order to
-	  help track down any problems you are having.  By default the
-	  debugging output will be turned off.
-
-	  If you select Y here, then you will be able to turn on debugging
-	  with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
-	  number between 1 and 5, the higher the number, the more debugging
-	  output is generated.  To turn debugging off again, do
-	  "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
-
-config JBD2
-	tristate
-	select CRC32
-	help
-	  This is a generic journaling layer for block devices that support
-	  both 32-bit and 64-bit block numbers.  It is currently used by
-	  the ext4 and OCFS2 filesystems, but it could also be used to add
-	  journal support to other file systems or block devices such
-	  as RAID or LVM.
-
-	  If you are using ext4 or OCFS2, you need to say Y here.
-	  If you are not using ext4 or OCFS2 then you will
-	  probably want to say N.
-
-	  To compile this device as a module, choose M here. The module will be
-	  called jbd2.  If you are compiling ext4 or OCFS2 into the kernel,
-	  you cannot compile this code as a module.
-
-config JBD2_DEBUG
-	bool "JBD2 (ext4) debugging support"
-	depends on JBD2 && DEBUG_FS
-	help
-	  If you are using the ext4 journaled file system (or
-	  potentially any other filesystem/device using JBD2), this option
-	  allows you to enable debugging output while the system is running,
-	  in order to help track down any problems you are having.
-	  By default, the debugging output will be turned off.
-
-	  If you select Y here, then you will be able to turn on debugging
-	  with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
-	  number between 1 and 5. The higher the number, the more debugging
-	  output is generated.  To turn debugging off again, do
-	  "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
+source "fs/jbd/Kconfig"
+source "fs/jbd2/Kconfig"
 
 config FS_MBCACHE
 # Meta block cache for Extended Attributes (ext2/ext3/ext4)
@@ -665,7 +403,7 @@ config AUTOFS4_FS
 	  N here.
 
 config FUSE_FS
-	tristate "Filesystem in Userspace support"
+	tristate "FUSE (Filesystem in Userspace) support"
 	help
 	  With FUSE it is possible to implement a fully functional filesystem
 	  in a userspace program.
@@ -1168,195 +906,7 @@ config EFS_FS
 	  To compile the EFS file system support as a module, choose M here: the
 	  module will be called efs.
 
-config JFFS2_FS
-	tristate "Journalling Flash File System v2 (JFFS2) support"
-	select CRC32
-	depends on MTD
-	help
-	  JFFS2 is the second generation of the Journalling Flash File System
-	  for use on diskless embedded devices. It provides improved wear
-	  levelling, compression and support for hard links. You cannot use
-	  this on normal block devices, only on 'MTD' devices.
-
-	  Further information on the design and implementation of JFFS2 is
-	  available at <http://sources.redhat.com/jffs2/>.
-
-config JFFS2_FS_DEBUG
-	int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
-	depends on JFFS2_FS
-	default "0"
-	help
-	  This controls the amount of debugging messages produced by the JFFS2
-	  code. Set it to zero for use in production systems. For evaluation,
-	  testing and debugging, it's advisable to set it to one. This will
-	  enable a few assertions and will print debugging messages at the
-	  KERN_DEBUG loglevel, where they won't normally be visible. Level 2
-	  is unlikely to be useful - it enables extra debugging in certain
-	  areas which at one point needed debugging, but when the bugs were
-	  located and fixed, the detailed messages were relegated to level 2.
-
-	  If reporting bugs, please try to have available a full dump of the
-	  messages at debug level 1 while the misbehaviour was occurring.
-
-config JFFS2_FS_WRITEBUFFER
-	bool "JFFS2 write-buffering support"
-	depends on JFFS2_FS
-	default y
-	help
-	  This enables the write-buffering support in JFFS2.
-
-	  This functionality is required to support JFFS2 on the following
-	  types of flash devices:
-	    - NAND flash
-	    - NOR flash with transparent ECC
-	    - DataFlash
-
-config JFFS2_FS_WBUF_VERIFY
-	bool "Verify JFFS2 write-buffer reads"
-	depends on JFFS2_FS_WRITEBUFFER
-	default n
-	help
-	  This causes JFFS2 to read back every page written through the
-	  write-buffer, and check for errors.
-
-config JFFS2_SUMMARY
-	bool "JFFS2 summary support (EXPERIMENTAL)"
-	depends on JFFS2_FS && EXPERIMENTAL
-	default n
-	help
-	  This feature makes it possible to use summary information
-	  for faster filesystem mount.
-
-	  The summary information can be inserted into a filesystem image
-	  by the utility 'sumtool'.
-
-	  If unsure, say 'N'.
-
-config JFFS2_FS_XATTR
-	bool "JFFS2 XATTR support (EXPERIMENTAL)"
-	depends on JFFS2_FS && EXPERIMENTAL
-	default n
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-config JFFS2_FS_POSIX_ACL
-	bool "JFFS2 POSIX Access Control Lists"
-	depends on JFFS2_FS_XATTR
-	default y
-	select FS_POSIX_ACL
-	help
-	  Posix Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the Posix ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
-
-config JFFS2_FS_SECURITY
-	bool "JFFS2 Security Labels"
-	depends on JFFS2_FS_XATTR
-	default y
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the jffs2 filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
-config JFFS2_COMPRESSION_OPTIONS
-	bool "Advanced compression options for JFFS2"
-	depends on JFFS2_FS
-	default n
-	help
-	  Enabling this option allows you to explicitly choose which
-	  compression modules, if any, are enabled in JFFS2. Removing
-	  compressors can mean you cannot read existing file systems,
-	  and enabling experimental compressors can mean that you
-	  write a file system which cannot be read by a standard kernel.
-
-	  If unsure, you should _definitely_ say 'N'.
-
-config JFFS2_ZLIB
-	bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
-	select ZLIB_INFLATE
-	select ZLIB_DEFLATE
-	depends on JFFS2_FS
-	default y
-	help
-	  Zlib is designed to be a free, general-purpose, legally unencumbered,
-	  lossless data-compression library for use on virtually any computer
-	  hardware and operating system. See <http://www.gzip.org/zlib/> for
-	  further information.
-
-	  Say 'Y' if unsure.
-
-config JFFS2_LZO
-	bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
-	select LZO_COMPRESS
-	select LZO_DECOMPRESS
-	depends on JFFS2_FS
-	default n
-	help
-	  minilzo-based compression. Generally works better than Zlib.
-
-	  This feature was added in July, 2007. Say 'N' if you need
-	  compatibility with older bootloaders or kernels.
-
-config JFFS2_RTIME
-	bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
-	depends on JFFS2_FS
-	default y
-	help
-	  Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
-
-config JFFS2_RUBIN
-	bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
-	depends on JFFS2_FS
-	default n
-	help
-	  RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
-
-choice
-	prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
-	default JFFS2_CMODE_PRIORITY
-	depends on JFFS2_FS
-	help
-	  You can set here the default compression mode of JFFS2 from
-	  the available compression modes. Don't touch if unsure.
-
-config JFFS2_CMODE_NONE
-	bool "no compression"
-	help
-	  Uses no compression.
-
-config JFFS2_CMODE_PRIORITY
-	bool "priority"
-	help
-	  Tries the compressors in a predefined order and chooses the first
-	  successful one.
-
-config JFFS2_CMODE_SIZE
-	bool "size (EXPERIMENTAL)"
-	help
-	  Tries all compressors and chooses the one which has the smallest
-	  result.
-
-config JFFS2_CMODE_FAVOURLZO
-	bool "Favour LZO"
-	help
-	  Tries all compressors and chooses the one which has the smallest
-	  result but gives some preference to LZO (which has faster
-	  decompression) at the expense of size.
-
-endchoice
-
+source "fs/jffs2/Kconfig"
 # UBIFS File system configuration
 source "fs/ubifs/Kconfig"
 
@@ -1913,148 +1463,7 @@ config SMB_NLS_REMOTE
 
 	  smbmount from samba 2.2.0 or later supports this.
 
-config CIFS
-	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
-	depends on INET
-	select NLS
-	help
-	  This is the client VFS module for the Common Internet File System
-	  (CIFS) protocol which is the successor to the Server Message Block 
-	  (SMB) protocol, the native file sharing mechanism for most early
-	  PC operating systems.  The CIFS protocol is fully supported by 
-	  file servers such as Windows 2000 (including Windows 2003, NT 4  
-	  and Windows XP) as well by Samba (which provides excellent CIFS
-	  server support for Linux and many other operating systems). Limited
-	  support for OS/2 and Windows ME and similar servers is provided as
-	  well.
-
-	  The cifs module provides an advanced network file system
-	  client for mounting to CIFS compliant servers.  It includes
-	  support for DFS (hierarchical name space), secure per-user
-	  session establishment via Kerberos or NTLM or NTLMv2,
-	  safe distributed caching (oplock), optional packet
-	  signing, Unicode and other internationalization improvements.
-	  If you need to mount to Samba or Windows from this machine, say Y.
-
-config CIFS_STATS
-        bool "CIFS statistics"
-        depends on CIFS
-        help
-          Enabling this option will cause statistics for each server share
-	  mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
-
-config CIFS_STATS2
-	bool "Extended statistics"
-	depends on CIFS_STATS
-	help
-	  Enabling this option will allow more detailed statistics on SMB
-	  request timing to be displayed in /proc/fs/cifs/DebugData and also
-	  allow optional logging of slow responses to dmesg (depending on the
-	  value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
-	  These additional statistics may have a minor effect on performance
-	  and memory utilization.
-
-	  Unless you are a developer or are doing network performance analysis
-	  or tuning, say N.
-
-config CIFS_WEAK_PW_HASH
-	bool "Support legacy servers which use weaker LANMAN security"
-	depends on CIFS
-	help
-	  Modern CIFS servers including Samba and most Windows versions
-	  (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
-	  security mechanisms. These hash the password more securely
-	  than the mechanisms used in the older LANMAN version of the
-	  SMB protocol but LANMAN based authentication is needed to
-	  establish sessions with some old SMB servers.
-
-	  Enabling this option allows the cifs module to mount to older
-	  LANMAN based servers such as OS/2 and Windows 95, but such
-	  mounts may be less secure than mounts using NTLM or more recent
-	  security mechanisms if you are on a public network.  Unless you
-	  have a need to access old SMB servers (and are on a private
-	  network) you probably want to say N.  Even if this support
-	  is enabled in the kernel build, LANMAN authentication will not be
-	  used automatically. At runtime LANMAN mounts are disabled but
-	  can be set to required (or optional) either in
-	  /proc/fs/cifs (see fs/cifs/README for more detail) or via an
-	  option on the mount command. This support is disabled by
-	  default in order to reduce the possibility of a downgrade
-	  attack.
-
-	  If unsure, say N.
-
-config CIFS_UPCALL
-	  bool "Kerberos/SPNEGO advanced session setup"
-	  depends on CIFS && KEYS
-	  help
-	    Enables an upcall mechanism for CIFS which accesses
-	    userspace helper utilities to provide SPNEGO packaged (RFC 4178)
-	    Kerberos tickets which are needed to mount to certain secure servers
-	    (for which more secure Kerberos authentication is required). If
-	    unsure, say N.
-
-config CIFS_XATTR
-        bool "CIFS extended attributes"
-        depends on CIFS
-        help
-          Extended attributes are name:value pairs associated with inodes by
-          the kernel or by users (see the attr(5) manual page, or visit
-          <http://acl.bestbits.at/> for details).  CIFS maps the name of
-          extended attributes beginning with the user namespace prefix
-          to SMB/CIFS EAs. EAs are stored on Windows servers without the
-          user namespace prefix, but their names are seen by Linux cifs clients
-          prefaced by the user namespace prefix. The system namespace
-          (used by some filesystems to store ACLs) is not supported at
-          this time.
-
-          If unsure, say N.
-
-config CIFS_POSIX
-        bool "CIFS POSIX Extensions"
-        depends on CIFS_XATTR
-        help
-          Enabling this option will cause the cifs client to attempt to
-	  negotiate a newer dialect with servers, such as Samba 3.0.5
-	  or later, that optionally can handle more POSIX like (rather
-	  than Windows like) file behavior.  It also enables
-	  support for POSIX ACLs (getfacl and setfacl) to servers
-	  (such as Samba 3.10 and later) which can negotiate
-	  CIFS POSIX ACL support.  If unsure, say N.
-
-config CIFS_DEBUG2
-	bool "Enable additional CIFS debugging routines"
-	depends on CIFS
-	help
-	   Enabling this option adds a few more debugging routines
-	   to the cifs code which slightly increases the size of
-	   the cifs module and can cause additional logging of debug
-	   messages in some error paths, slowing performance. This
-	   option can be turned off unless you are debugging
-	   cifs problems.  If unsure, say N.
-
-config CIFS_EXPERIMENTAL
-	  bool "CIFS Experimental Features (EXPERIMENTAL)"
-	  depends on CIFS && EXPERIMENTAL
-	  help
-	    Enables cifs features under testing. These features are
-	    experimental and currently include DFS support and directory 
-	    change notification ie fcntl(F_DNOTIFY), as well as the upcall
-	    mechanism which will be used for Kerberos session negotiation
-	    and uid remapping.  Some of these features also may depend on 
-	    setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
-	    (which is disabled by default). See the file fs/cifs/README 
-	    for more details.  If unsure, say N.
-
-config CIFS_DFS_UPCALL
-	  bool "DFS feature support (EXPERIMENTAL)"
-	  depends on CIFS_EXPERIMENTAL
-	  depends on KEYS
-	  help
-	    Enables an upcall mechanism for CIFS which contacts userspace
-	    helper utilities to provide server name resolution (host names to
-	    IP addresses) which is needed for implicit mounts of DFS junction
-	    points. If unsure, say N.
+source "fs/cifs/Kconfig"
 
 config NCP_FS
 	tristate "NCP file system support (to mount NetWare volumes)"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 801db134181..ce9fb3fbfae 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC
 
 	  It is also possible to run FDPIC ELF binaries on MMU linux also.
 
+config CORE_DUMP_DEFAULT_ELF_HEADERS
+	bool "Write ELF core dumps with partial segments"
+	default n
+	depends on BINFMT_ELF
+	help
+	  ELF core dump files describe each memory mapping of the crashed
+	  process, and can contain or omit the memory contents of each one.
+	  The contents of an unmodified text mapping are omitted by default.
+
+	  For an unmodified text mapping of an ELF object, including just
+	  the first page of the file in a core dump makes it possible to
+	  identify the build ID bits in the file, without paying the i/o
+	  cost and disk space to dump all the text.  However, versions of
+	  GDB before 6.7 are confused by ELF core dump files in this format.
+
+	  The core dump behavior can be controlled per process using
+	  the /proc/PID/coredump_filter pseudo-file; this setting is
+	  inherited.  See Documentation/filesystems/proc.txt for details.
+
+	  This config option changes the default setting of coredump_filter
+	  seen at boot time.  If unsure, say N.
+
 config BINFMT_FLAT
 	bool "Kernel support for flat binaries"
 	depends on !MMU && (!FRV || BROKEN)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c76afa26edf..8fcfa398d35 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off)
 static unsigned long vma_dump_size(struct vm_area_struct *vma,
 				   unsigned long mm_flags)
 {
+#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
+
 	/* The vma can be set up to tell us the answer directly.  */
 	if (vma->vm_flags & VM_ALWAYSDUMP)
 		goto whole;
 
+	/* Hugetlb memory check */
+	if (vma->vm_flags & VM_HUGETLB) {
+		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
+			goto whole;
+		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
+			goto whole;
+	}
+
 	/* Do not dump I/O mapped devices or special mappings */
 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
 		return 0;
 
-#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
-
 	/* By default, dump shared memory if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED) {
 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
@@ -1333,20 +1341,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pgrp = task_pgrp_vnr(p);
 	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
+		struct task_cputime cputime;
+
 		/*
-		 * This is the record for the group leader.  Add in the
-		 * cumulative times of previous dead threads.  This total
-		 * won't include the time of each live thread whose state
-		 * is included in the core dump.  The final total reported
-		 * to our parent process when it calls wait4 will include
-		 * those sums as well as the little bit more time it takes
-		 * this and each other thread to finish dying after the
-		 * core dump synchronization phase.
+		 * This is the record for the group leader.  It shows the
+		 * group-wide total, not its individual thread total.
 		 */
-		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-				   &prstatus->pr_utime);
-		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-				   &prstatus->pr_stime);
+		thread_group_cputime(p, &cputime);
+		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 0e8367c5462..5b5424cb339 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1390,20 +1390,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pgrp = task_pgrp_vnr(p);
 	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
+		struct task_cputime cputime;
+
 		/*
-		 * This is the record for the group leader.  Add in the
-		 * cumulative times of previous dead threads.  This total
-		 * won't include the time of each live thread whose state
-		 * is included in the core dump.  The final total reported
-		 * to our parent process when it calls wait4 will include
-		 * those sums as well as the little bit more time it takes
-		 * this and each other thread to finish dying after the
-		 * core dump synchronization phase.
+		 * This is the record for the group leader.  It shows the
+		 * group-wide total, not its individual thread total.
 		 */
-		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-				   &prstatus->pr_utime);
-		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-				   &prstatus->pr_stime);
+		thread_group_cputime(p, &cputime);
+		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/buffer.c b/fs/buffer.c
index ac78d4c19b3..6569fda5cfe 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer);
 
 void unlock_buffer(struct buffer_head *bh)
 {
-	smp_mb__before_clear_bit();
-	clear_buffer_locked(bh);
+	clear_bit_unlock(BH_Lock, &bh->b_state);
 	smp_mb__after_clear_bit();
 	wake_up_bit(&bh->b_state, BH_Lock);
 }
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
new file mode 100644
index 00000000000..341a98965bd
--- /dev/null
+++ b/fs/cifs/Kconfig
@@ -0,0 +1,142 @@
+config CIFS
+	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
+	depends on INET
+	select NLS
+	help
+	  This is the client VFS module for the Common Internet File System
+	  (CIFS) protocol which is the successor to the Server Message Block
+	  (SMB) protocol, the native file sharing mechanism for most early
+	  PC operating systems.  The CIFS protocol is fully supported by
+	  file servers such as Windows 2000 (including Windows 2003, NT 4
+	  and Windows XP) as well by Samba (which provides excellent CIFS
+	  server support for Linux and many other operating systems). Limited
+	  support for OS/2 and Windows ME and similar servers is provided as
+	  well.
+
+	  The cifs module provides an advanced network file system
+	  client for mounting to CIFS compliant servers.  It includes
+	  support for DFS (hierarchical name space), secure per-user
+	  session establishment via Kerberos or NTLM or NTLMv2,
+	  safe distributed caching (oplock), optional packet
+	  signing, Unicode and other internationalization improvements.
+	  If you need to mount to Samba or Windows from this machine, say Y.
+
+config CIFS_STATS
+        bool "CIFS statistics"
+        depends on CIFS
+        help
+          Enabling this option will cause statistics for each server share
+	  mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
+
+config CIFS_STATS2
+	bool "Extended statistics"
+	depends on CIFS_STATS
+	help
+	  Enabling this option will allow more detailed statistics on SMB
+	  request timing to be displayed in /proc/fs/cifs/DebugData and also
+	  allow optional logging of slow responses to dmesg (depending on the
+	  value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
+	  These additional statistics may have a minor effect on performance
+	  and memory utilization.
+
+	  Unless you are a developer or are doing network performance analysis
+	  or tuning, say N.
+
+config CIFS_WEAK_PW_HASH
+	bool "Support legacy servers which use weaker LANMAN security"
+	depends on CIFS
+	help
+	  Modern CIFS servers including Samba and most Windows versions
+	  (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
+	  security mechanisms. These hash the password more securely
+	  than the mechanisms used in the older LANMAN version of the
+	  SMB protocol but LANMAN based authentication is needed to
+	  establish sessions with some old SMB servers.
+
+	  Enabling this option allows the cifs module to mount to older
+	  LANMAN based servers such as OS/2 and Windows 95, but such
+	  mounts may be less secure than mounts using NTLM or more recent
+	  security mechanisms if you are on a public network.  Unless you
+	  have a need to access old SMB servers (and are on a private
+	  network) you probably want to say N.  Even if this support
+	  is enabled in the kernel build, LANMAN authentication will not be
+	  used automatically. At runtime LANMAN mounts are disabled but
+	  can be set to required (or optional) either in
+	  /proc/fs/cifs (see fs/cifs/README for more detail) or via an
+	  option on the mount command. This support is disabled by
+	  default in order to reduce the possibility of a downgrade
+	  attack.
+
+	  If unsure, say N.
+
+config CIFS_UPCALL
+	  bool "Kerberos/SPNEGO advanced session setup"
+	  depends on CIFS && KEYS
+	  help
+	    Enables an upcall mechanism for CIFS which accesses
+	    userspace helper utilities to provide SPNEGO packaged (RFC 4178)
+	    Kerberos tickets which are needed to mount to certain secure servers
+	    (for which more secure Kerberos authentication is required). If
+	    unsure, say N.
+
+config CIFS_XATTR
+        bool "CIFS extended attributes"
+        depends on CIFS
+        help
+          Extended attributes are name:value pairs associated with inodes by
+          the kernel or by users (see the attr(5) manual page, or visit
+          <http://acl.bestbits.at/> for details).  CIFS maps the name of
+          extended attributes beginning with the user namespace prefix
+          to SMB/CIFS EAs. EAs are stored on Windows servers without the
+          user namespace prefix, but their names are seen by Linux cifs clients
+          prefaced by the user namespace prefix. The system namespace
+          (used by some filesystems to store ACLs) is not supported at
+          this time.
+
+          If unsure, say N.
+
+config CIFS_POSIX
+        bool "CIFS POSIX Extensions"
+        depends on CIFS_XATTR
+        help
+          Enabling this option will cause the cifs client to attempt to
+	  negotiate a newer dialect with servers, such as Samba 3.0.5
+	  or later, that optionally can handle more POSIX like (rather
+	  than Windows like) file behavior.  It also enables
+	  support for POSIX ACLs (getfacl and setfacl) to servers
+	  (such as Samba 3.10 and later) which can negotiate
+	  CIFS POSIX ACL support.  If unsure, say N.
+
+config CIFS_DEBUG2
+	bool "Enable additional CIFS debugging routines"
+	depends on CIFS
+	help
+	   Enabling this option adds a few more debugging routines
+	   to the cifs code which slightly increases the size of
+	   the cifs module and can cause additional logging of debug
+	   messages in some error paths, slowing performance. This
+	   option can be turned off unless you are debugging
+	   cifs problems.  If unsure, say N.
+
+config CIFS_EXPERIMENTAL
+	  bool "CIFS Experimental Features (EXPERIMENTAL)"
+	  depends on CIFS && EXPERIMENTAL
+	  help
+	    Enables cifs features under testing. These features are
+	    experimental and currently include DFS support and directory
+	    change notification ie fcntl(F_DNOTIFY), as well as the upcall
+	    mechanism which will be used for Kerberos session negotiation
+	    and uid remapping.  Some of these features also may depend on
+	    setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
+	    (which is disabled by default). See the file fs/cifs/README
+	    for more details.  If unsure, say N.
+
+config CIFS_DFS_UPCALL
+	  bool "DFS feature support (EXPERIMENTAL)"
+	  depends on CIFS_EXPERIMENTAL
+	  depends on KEYS
+	  help
+	    Enables an upcall mechanism for CIFS which contacts userspace
+	    helper utilities to provide server name resolution (host names to
+	    IP addresses) which is needed for implicit mounts of DFS junction
+	    points. If unsure, say N.
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c4a8a060512..62d8bd8f14c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
 		SetPageUptodate(page);
 		unlock_page(page);
 		if (!pagevec_add(plru_pvec, page))
-			__pagevec_lru_add(plru_pvec);
+			__pagevec_lru_add_file(plru_pvec);
 		data += PAGE_CACHE_SIZE;
 	}
 	return;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		bytes_read = 0;
 	}
 
-	pagevec_lru_add(&lru_pvec);
+	pagevec_lru_add_file(&lru_pvec);
 
 /* need to free smb_read_data buf before exit */
 	if (smb_read_data) {
diff --git a/fs/exec.c b/fs/exec.c
index a41e7902ed0..4e834f16d9d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1386,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt);
  * name into corename, which must have space for at least
  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  */
-static int format_corename(char *corename, int nr_threads, long signr)
+static int format_corename(char *corename, long signr)
 {
 	const char *pat_ptr = core_pattern;
 	int ispipe = (*pat_ptr == '|');
@@ -1493,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr)
 	 * If core_pattern does not include a %p (as is the default)
 	 * and core_uses_pid is set, then .%pid will be appended to
 	 * the filename. Do not do this for piped commands. */
-	if (!ispipe && !pid_in_pattern
-	    && (core_uses_pid || nr_threads)) {
+	if (!ispipe && !pid_in_pattern && core_uses_pid) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
 			      ".%d", task_tgid_vnr(current));
 		if (rc > out_end - out_ptr)
@@ -1757,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	 * uses lock_kernel()
 	 */
  	lock_kernel();
-	ispipe = format_corename(corename, retval, signr);
+	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 	/*
 	 * Don't bother to check the RLIMIT_CORE value if core_pattern points
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
new file mode 100644
index 00000000000..14a6780fd03
--- /dev/null
+++ b/fs/ext2/Kconfig
@@ -0,0 +1,55 @@
+config EXT2_FS
+	tristate "Second extended fs support"
+	help
+	  Ext2 is a standard Linux file system for hard disks.
+
+	  To compile this file system support as a module, choose M here: the
+	  module will be called ext2.
+
+	  If unsure, say Y.
+
+config EXT2_FS_XATTR
+	bool "Ext2 extended attributes"
+	depends on EXT2_FS
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+config EXT2_FS_POSIX_ACL
+	bool "Ext2 POSIX Access Control Lists"
+	depends on EXT2_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the Posix ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config EXT2_FS_SECURITY
+	bool "Ext2 Security Labels"
+	depends on EXT2_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ext2 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
+
+config EXT2_FS_XIP
+	bool "Ext2 execute in place support"
+	depends on EXT2_FS && MMU
+	help
+	  Execute in place can be used on memory-backed block devices. If you
+	  enable this option, you can select to mount block devices which are
+	  capable of this feature without using the page cache.
+
+	  If you do not use a block device that is capable of using this,
+	  or if unsure, say N.
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
new file mode 100644
index 00000000000..8e0cfe44b0f
--- /dev/null
+++ b/fs/ext3/Kconfig
@@ -0,0 +1,67 @@
+config EXT3_FS
+	tristate "Ext3 journalling file system support"
+	select JBD
+	help
+	  This is the journalling version of the Second extended file system
+	  (often called ext3), the de facto standard Linux file system
+	  (method to organize files on a storage device) for hard disks.
+
+	  The journalling code included in this driver means you do not have
+	  to run e2fsck (file system checker) on your file systems after a
+	  crash.  The journal keeps track of any changes that were being made
+	  at the time the system crashed, and can ensure that your file system
+	  is consistent without the need for a lengthy check.
+
+	  Other than adding the journal to the file system, the on-disk format
+	  of ext3 is identical to ext2.  It is possible to freely switch
+	  between using the ext3 driver and the ext2 driver, as long as the
+	  file system has been cleanly unmounted, or e2fsck is run on the file
+	  system.
+
+	  To add a journal on an existing ext2 file system or change the
+	  behavior of ext3 file systems, you can use the tune2fs utility ("man
+	  tune2fs").  To modify attributes of files and directories on ext3
+	  file systems, use chattr ("man chattr").  You need to be using
+	  e2fsprogs version 1.20 or later in order to create ext3 journals
+	  (available at <http://sourceforge.net/projects/e2fsprogs/>).
+
+	  To compile this file system support as a module, choose M here: the
+	  module will be called ext3.
+
+config EXT3_FS_XATTR
+	bool "Ext3 extended attributes"
+	depends on EXT3_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on ext3.
+
+config EXT3_FS_POSIX_ACL
+	bool "Ext3 POSIX Access Control Lists"
+	depends on EXT3_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the Posix ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config EXT3_FS_SECURITY
+	bool "Ext3 Security Labels"
+	depends on EXT3_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ext3 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 92fd0338a6e..f5b57a2ca35 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1547,6 +1547,7 @@ retry_alloc:
 	 * turn off reservation for this allocation
 	 */
 	if (my_rsv && (free_blocks < windowsz)
+		&& (free_blocks > 0)
 		&& (rsv_is_empty(&my_rsv->rsv_window)))
 		my_rsv = NULL;
 
@@ -1585,7 +1586,7 @@ retry_alloc:
 		 * free blocks is less than half of the reservation
 		 * window size.
 		 */
-		if (free_blocks <= (windowsz/2))
+		if (my_rsv && (free_blocks <= (windowsz/2)))
 			continue;
 
 		brelse(bitmap_bh);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 2eea96ec78e..4c82531ea0a 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp,
 	int err;
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	int ret = 0;
+	int dir_has_error = 0;
 
 	sb = inode->i_sb;
 
@@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp,
 		 * of recovering data when there's a bad sector
 		 */
 		if (!bh) {
-			ext3_error (sb, "ext3_readdir",
-				"directory #%lu contains a hole at offset %lu",
-				inode->i_ino, (unsigned long)filp->f_pos);
+			if (!dir_has_error) {
+				ext3_error(sb, __func__, "directory #%lu "
+					"contains a hole at offset %lld",
+					inode->i_ino, filp->f_pos);
+				dir_has_error = 1;
+			}
 			/* corrupt size?  Maybe no more blocks to read */
 			if (filp->f_pos > inode->i_blocks << 9)
 				break;
@@ -410,7 +414,7 @@ static int call_filldir(struct file * filp, void * dirent,
 				get_dtype(sb, fname->file_type));
 		if (error) {
 			filp->f_pos = curr_pos;
-			info->extra_fname = fname->next;
+			info->extra_fname = fname;
 			return error;
 		}
 		fname = fname->next;
@@ -449,11 +453,21 @@ static int ext3_dx_readdir(struct file * filp,
 	 * If there are any leftover names on the hash collision
 	 * chain, return them first.
 	 */
-	if (info->extra_fname &&
-	    call_filldir(filp, dirent, filldir, info->extra_fname))
-		goto finished;
+	if (info->extra_fname) {
+		if (call_filldir(filp, dirent, filldir, info->extra_fname))
+			goto finished;
 
-	if (!info->curr_node)
+		info->extra_fname = NULL;
+		info->curr_node = rb_next(info->curr_node);
+		if (!info->curr_node) {
+			if (info->next_hash == ~0) {
+				filp->f_pos = EXT3_HTREE_EOF;
+				goto finished;
+			}
+			info->curr_hash = info->next_hash;
+			info->curr_minor_hash = 0;
+		}
+	} else if (!info->curr_node)
 		info->curr_node = rb_first(&info->root);
 
 	while (1) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ebfec4d0148..f8424ad8997 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1186,6 +1186,13 @@ write_begin_failed:
 		ext3_journal_stop(handle);
 		unlock_page(page);
 		page_cache_release(page);
+		/*
+		 * block_write_begin may have instantiated a few blocks
+		 * outside i_size.  Trim these off again. Don't need
+		 * i_size_read because we hold i_mutex.
+		 */
+		if (pos + len > inode->i_size)
+			vmtruncate(inode, inode->i_size);
 	}
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 77278e947e9..78fdf383637 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 
 	if (reserved_gdb || gdb_off == 0) {
 		if (!EXT3_HAS_COMPAT_FEATURE(sb,
-					     EXT3_FEATURE_COMPAT_RESIZE_INODE)){
+					     EXT3_FEATURE_COMPAT_RESIZE_INODE)
+		    || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
 			ext3_warning(sb, __func__,
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 399a96a6c55..3a260af5544 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
 		seq_puts(seq, ",data=writeback");
 
+	if (test_opt(sb, DATA_ERR_ABORT))
+		seq_puts(seq, ",data_err=abort");
+
 	ext3_show_quota_options(seq, sb);
 
 	return 0;
@@ -754,6 +757,7 @@ enum {
 	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
 	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+	Opt_data_err_abort, Opt_data_err_ignore,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -796,6 +800,8 @@ static const match_table_t tokens = {
 	{Opt_data_journal, "data=journal"},
 	{Opt_data_ordered, "data=ordered"},
 	{Opt_data_writeback, "data=writeback"},
+	{Opt_data_err_abort, "data_err=abort"},
+	{Opt_data_err_ignore, "data_err=ignore"},
 	{Opt_offusrjquota, "usrjquota="},
 	{Opt_usrjquota, "usrjquota=%s"},
 	{Opt_offgrpjquota, "grpjquota="},
@@ -1011,6 +1017,12 @@ static int parse_options (char *options, struct super_block *sb,
 				sbi->s_mount_opt |= data_opt;
 			}
 			break;
+		case Opt_data_err_abort:
+			set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+			break;
+		case Opt_data_err_ignore:
+			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
 			qtype = USRQUOTA;
@@ -1986,6 +1998,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
 		journal->j_flags |= JFS_BARRIER;
 	else
 		journal->j_flags &= ~JFS_BARRIER;
+	if (test_opt(sb, DATA_ERR_ABORT))
+		journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
+	else
+		journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
 	spin_unlock(&journal->j_state_lock);
 }
 
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
new file mode 100644
index 00000000000..7505482a08f
--- /dev/null
+++ b/fs/ext4/Kconfig
@@ -0,0 +1,79 @@
+config EXT4_FS
+	tristate "The Extended 4 (ext4) filesystem"
+	select JBD2
+	select CRC16
+	help
+	  This is the next generation of the ext3 filesystem.
+
+	  Unlike the change from ext2 filesystem to ext3 filesystem,
+	  the on-disk format of ext4 is not forwards compatible with
+	  ext3; it is based on extent maps and it supports 48-bit
+	  physical block numbers.  The ext4 filesystem also supports delayed
+	  allocation, persistent preallocation, high resolution time stamps,
+	  and a number of other features to improve performance and speed
+	  up fsck time.  For more information, please see the web pages at
+	  http://ext4.wiki.kernel.org.
+
+	  The ext4 filesystem will support mounting an ext3
+	  filesystem; while there will be some performance gains from
+	  the delayed allocation and inode table readahead, the best
+	  performance gains will require enabling ext4 features in the
+	  filesystem, or formating a new filesystem as an ext4
+	  filesystem initially.
+
+	  To compile this file system support as a module, choose M here. The
+	  module will be called ext4.
+
+	  If unsure, say N.
+
+config EXT4DEV_COMPAT
+	bool "Enable ext4dev compatibility"
+	depends on EXT4_FS
+	help
+	  Starting with 2.6.28, the name of the ext4 filesystem was
+	  renamed from ext4dev to ext4.  Unfortunately there are some
+	  legacy userspace programs (such as klibc's fstype) have
+	  "ext4dev" hardcoded.
+
+	  To enable backwards compatibility so that systems that are
+	  still expecting to mount ext4 filesystems using ext4dev,
+	  chose Y here.   This feature will go away by 2.6.31, so
+	  please arrange to get your userspace programs fixed!
+
+config EXT4_FS_XATTR
+	bool "Ext4 extended attributes"
+	depends on EXT4_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on ext4.
+
+config EXT4_FS_POSIX_ACL
+	bool "Ext4 POSIX Access Control Lists"
+	depends on EXT4_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  POSIX Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config EXT4_FS_SECURITY
+	bool "Ext4 Security Labels"
+	depends on EXT4_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ext4 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bada6bbc31..34930a964b8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file,
 		file->f_op = &fuse_direct_io_file_operations;
 	if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
 		invalidate_inode_pages2(inode->i_mapping);
+	if (outarg->open_flags & FOPEN_NONSEEKABLE)
+		nonseekable_open(inode, file);
 	ff->fh = outarg->fh;
 	file->private_data = fuse_file_get(ff);
 }
@@ -1448,6 +1450,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
 	mutex_lock(&inode->i_mutex);
 	switch (origin) {
 	case SEEK_END:
+		retval = fuse_update_attributes(inode, NULL, file, NULL);
+		if (retval)
+			return retval;
 		offset += i_size_read(inode);
 		break;
 	case SEEK_CUR:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 3a876076bdd..35accfdd747 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -6,6 +6,9 @@
   See the file COPYING.
 */
 
+#ifndef _FS_FUSE_I_H
+#define _FS_FUSE_I_H
+
 #include <linux/fuse.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
@@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode);
 void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
+
+#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6a84388cacf..54b1f0e1ef5 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -865,7 +865,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (is_bdev) {
 		fc->destroy_req = fuse_request_alloc();
 		if (!fc->destroy_req)
-			goto err_put_root;
+			goto err_free_init_req;
 	}
 
 	mutex_lock(&fuse_mutex);
@@ -895,6 +895,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
  err_unlock:
 	mutex_unlock(&fuse_mutex);
+ err_free_init_req:
 	fuse_request_free(init_req);
  err_put_root:
 	dput(root_dentry);
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index fec8f61227f..0022eec63cd 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
 		goto done;
 	}
 
+	if (inode->i_ino == HFSPLUS_EXT_CNID)
+		return -EIO;
+
 	mutex_lock(&HFSPLUS_I(inode).extents_lock);
 	res = hfsplus_ext_read_extent(inode, ablock);
 	if (!res) {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b085d64a2b6..963be644297 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -254,6 +254,8 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
 {
 	if (HFSPLUS_IS_RSRC(inode))
 		inode = HFSPLUS_I(inode).rsrc_inode;
+	if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+		return -EOVERFLOW;
 	atomic_inc(&HFSPLUS_I(inode).opencnt);
 	return 0;
 }
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
new file mode 100644
index 00000000000..4e28beeed15
--- /dev/null
+++ b/fs/jbd/Kconfig
@@ -0,0 +1,30 @@
+config JBD
+	tristate
+	help
+	  This is a generic journalling layer for block devices.  It is
+	  currently used by the ext3 file system, but it could also be
+	  used to add journal support to other file systems or block
+	  devices such as RAID or LVM.
+
+	  If you are using the ext3 file system, you need to say Y here.
+	  If you are not using ext3 then you will probably want to say N.
+
+	  To compile this device as a module, choose M here: the module will be
+	  called jbd.  If you are compiling ext3 into the kernel, you
+	  cannot compile this code as a module.
+
+config JBD_DEBUG
+	bool "JBD (ext3) debugging support"
+	depends on JBD && DEBUG_FS
+	help
+	  If you are using the ext3 journaled file system (or potentially any
+	  other file system/device using JBD), this option allows you to
+	  enable debugging output while the system is running, in order to
+	  help track down any problems you are having.  By default the
+	  debugging output will be turned off.
+
+	  If you select Y here, then you will be able to turn on debugging
+	  with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
+	  number between 1 and 5, the higher the number, the more debugging
+	  output is generated.  To turn debugging off again, do
+	  "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ae08c057e75..25719d902c5 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal)
 		printk(KERN_WARNING
 			"JBD: Detected IO errors while flushing file data "
 			"on %s\n", bdevname(journal->j_fs_dev, b));
+		if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
+			journal_abort(journal, err);
 		err = 0;
 	}
 
@@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal)
 		jh = commit_transaction->t_buffers;
 
 		/* If we're in abort mode, we just un-journal the buffer and
-		   release it for background writing. */
+		   release it. */
 
 		if (is_journal_aborted(journal)) {
+			clear_buffer_jbddirty(jh2bh(jh));
 			JBUFFER_TRACE(jh, "journal is aborting: refile");
 			journal_refile_buffer(journal, jh);
 			/* If that was the last one, we need to clean up
@@ -762,6 +765,9 @@ wait_for_iobuf:
 		/* AKPM: bforget here */
 	}
 
+	if (err)
+		journal_abort(journal, err);
+
 	jbd_debug(3, "JBD: commit phase 6\n");
 
 	if (journal_write_commit_record(journal, commit_transaction))
@@ -852,6 +858,8 @@ restart_loop:
 		if (buffer_jbddirty(bh)) {
 			JBUFFER_TRACE(jh, "add to new checkpointing trans");
 			__journal_insert_checkpoint(jh, commit_transaction);
+			if (is_journal_aborted(journal))
+				clear_buffer_jbddirty(bh);
 			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
 			__journal_refile_buffer(jh);
 			jbd_unlock_bh_state(bh);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 0540ca27a44..d15cd6e7251 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	journal_t *journal = handle->h_transaction->t_journal;
 	int need_brelse = 0;
 	struct journal_head *jh;
+	int ret = 0;
 
 	if (is_handle_aborted(handle))
-		return 0;
+		return ret;
 
 	jh = journal_add_journal_head(bh);
 	JBUFFER_TRACE(jh, "entry");
@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 				   time if it is redirtied */
 			}
 
-			/* journal_clean_data_list() may have got there first */
+			/*
+			 * We cannot remove the buffer with io error from the
+			 * committing transaction, because otherwise it would
+			 * miss the error and the commit would not abort.
+			 */
+			if (unlikely(!buffer_uptodate(bh))) {
+				ret = -EIO;
+				goto no_journal;
+			}
+
 			if (jh->b_transaction != NULL) {
 				JBUFFER_TRACE(jh, "unfile from commit");
 				__journal_temp_unlink_buffer(jh);
@@ -1108,7 +1118,7 @@ no_journal:
 	}
 	JBUFFER_TRACE(jh, "exit");
 	journal_put_journal_head(jh);
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
new file mode 100644
index 00000000000..f32f346f4b0
--- /dev/null
+++ b/fs/jbd2/Kconfig
@@ -0,0 +1,33 @@
+config JBD2
+	tristate
+	select CRC32
+	help
+	  This is a generic journaling layer for block devices that support
+	  both 32-bit and 64-bit block numbers.  It is currently used by
+	  the ext4 and OCFS2 filesystems, but it could also be used to add
+	  journal support to other file systems or block devices such
+	  as RAID or LVM.
+
+	  If you are using ext4 or OCFS2, you need to say Y here.
+	  If you are not using ext4 or OCFS2 then you will
+	  probably want to say N.
+
+	  To compile this device as a module, choose M here. The module will be
+	  called jbd2.  If you are compiling ext4 or OCFS2 into the kernel,
+	  you cannot compile this code as a module.
+
+config JBD2_DEBUG
+	bool "JBD2 (ext4) debugging support"
+	depends on JBD2 && DEBUG_FS
+	help
+	  If you are using the ext4 journaled file system (or
+	  potentially any other filesystem/device using JBD2), this option
+	  allows you to enable debugging output while the system is running,
+	  in order to help track down any problems you are having.
+	  By default, the debugging output will be turned off.
+
+	  If you select Y here, then you will be able to turn on debugging
+	  with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
+	  number between 1 and 5. The higher the number, the more debugging
+	  output is generated.  To turn debugging off again, do
+	  "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
new file mode 100644
index 00000000000..6ae169cd8fa
--- /dev/null
+++ b/fs/jffs2/Kconfig
@@ -0,0 +1,188 @@
+config JFFS2_FS
+	tristate "Journalling Flash File System v2 (JFFS2) support"
+	select CRC32
+	depends on MTD
+	help
+	  JFFS2 is the second generation of the Journalling Flash File System
+	  for use on diskless embedded devices. It provides improved wear
+	  levelling, compression and support for hard links. You cannot use
+	  this on normal block devices, only on 'MTD' devices.
+
+	  Further information on the design and implementation of JFFS2 is
+	  available at <http://sources.redhat.com/jffs2/>.
+
+config JFFS2_FS_DEBUG
+	int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
+	depends on JFFS2_FS
+	default "0"
+	help
+	  This controls the amount of debugging messages produced by the JFFS2
+	  code. Set it to zero for use in production systems. For evaluation,
+	  testing and debugging, it's advisable to set it to one. This will
+	  enable a few assertions and will print debugging messages at the
+	  KERN_DEBUG loglevel, where they won't normally be visible. Level 2
+	  is unlikely to be useful - it enables extra debugging in certain
+	  areas which at one point needed debugging, but when the bugs were
+	  located and fixed, the detailed messages were relegated to level 2.
+
+	  If reporting bugs, please try to have available a full dump of the
+	  messages at debug level 1 while the misbehaviour was occurring.
+
+config JFFS2_FS_WRITEBUFFER
+	bool "JFFS2 write-buffering support"
+	depends on JFFS2_FS
+	default y
+	help
+	  This enables the write-buffering support in JFFS2.
+
+	  This functionality is required to support JFFS2 on the following
+	  types of flash devices:
+	    - NAND flash
+	    - NOR flash with transparent ECC
+	    - DataFlash
+
+config JFFS2_FS_WBUF_VERIFY
+	bool "Verify JFFS2 write-buffer reads"
+	depends on JFFS2_FS_WRITEBUFFER
+	default n
+	help
+	  This causes JFFS2 to read back every page written through the
+	  write-buffer, and check for errors.
+
+config JFFS2_SUMMARY
+	bool "JFFS2 summary support (EXPERIMENTAL)"
+	depends on JFFS2_FS && EXPERIMENTAL
+	default n
+	help
+	  This feature makes it possible to use summary information
+	  for faster filesystem mount.
+
+	  The summary information can be inserted into a filesystem image
+	  by the utility 'sumtool'.
+
+	  If unsure, say 'N'.
+
+config JFFS2_FS_XATTR
+	bool "JFFS2 XATTR support (EXPERIMENTAL)"
+	depends on JFFS2_FS && EXPERIMENTAL
+	default n
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+config JFFS2_FS_POSIX_ACL
+	bool "JFFS2 POSIX Access Control Lists"
+	depends on JFFS2_FS_XATTR
+	default y
+	select FS_POSIX_ACL
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the Posix ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config JFFS2_FS_SECURITY
+	bool "JFFS2 Security Labels"
+	depends on JFFS2_FS_XATTR
+	default y
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the jffs2 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
+
+config JFFS2_COMPRESSION_OPTIONS
+	bool "Advanced compression options for JFFS2"
+	depends on JFFS2_FS
+	default n
+	help
+	  Enabling this option allows you to explicitly choose which
+	  compression modules, if any, are enabled in JFFS2. Removing
+	  compressors can mean you cannot read existing file systems,
+	  and enabling experimental compressors can mean that you
+	  write a file system which cannot be read by a standard kernel.
+
+	  If unsure, you should _definitely_ say 'N'.
+
+config JFFS2_ZLIB
+	bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
+	select ZLIB_INFLATE
+	select ZLIB_DEFLATE
+	depends on JFFS2_FS
+	default y
+	help
+	  Zlib is designed to be a free, general-purpose, legally unencumbered,
+	  lossless data-compression library for use on virtually any computer
+	  hardware and operating system. See <http://www.gzip.org/zlib/> for
+	  further information.
+
+	  Say 'Y' if unsure.
+
+config JFFS2_LZO
+	bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
+	select LZO_COMPRESS
+	select LZO_DECOMPRESS
+	depends on JFFS2_FS
+	default n
+	help
+	  minilzo-based compression. Generally works better than Zlib.
+
+	  This feature was added in July, 2007. Say 'N' if you need
+	  compatibility with older bootloaders or kernels.
+
+config JFFS2_RTIME
+	bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
+	depends on JFFS2_FS
+	default y
+	help
+	  Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
+
+config JFFS2_RUBIN
+	bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
+	depends on JFFS2_FS
+	default n
+	help
+	  RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
+
+choice
+	prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
+	default JFFS2_CMODE_PRIORITY
+	depends on JFFS2_FS
+	help
+	  You can set here the default compression mode of JFFS2 from
+	  the available compression modes. Don't touch if unsure.
+
+config JFFS2_CMODE_NONE
+	bool "no compression"
+	help
+	  Uses no compression.
+
+config JFFS2_CMODE_PRIORITY
+	bool "priority"
+	help
+	  Tries the compressors in a predefined order and chooses the first
+	  successful one.
+
+config JFFS2_CMODE_SIZE
+	bool "size (EXPERIMENTAL)"
+	help
+	  Tries all compressors and chooses the one which has the smallest
+	  result.
+
+config JFFS2_CMODE_FAVOURLZO
+	bool "Favour LZO"
+	help
+	  Tries all compressors and chooses the one which has the smallest
+	  result but gives some preference to LZO (which has faster
+	  decompression) at the expense of size.
+
+endchoice
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 86739ee53b3..f25e70c1b51 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this,
 }
 
 /* jffs2_compress:
- * @data: Pointer to uncompressed data
- * @cdata: Pointer to returned pointer to buffer for compressed data
+ * @data_in: Pointer to uncompressed data
+ * @cpage_out: Pointer to returned pointer to buffer for compressed data
  * @datalen: On entry, holds the amount of data available for compression.
  *	On exit, expected to hold the amount of data actually compressed.
  * @cdatalen: On entry, holds the amount of space available for compressed
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index cd219ef5525..b1aaae823a5 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -311,7 +311,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	/* FIXME: If you care. We'd need to use frags for the target
 	   if it grows much more than this */
 	if (targetlen > 254)
-		return -EINVAL;
+		return -ENAMETOOLONG;
 
 	ri = jffs2_alloc_raw_inode();
 
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index dddb2a6c9e2..259461b910a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
 	instr->len = c->sector_size;
 	instr->callback = jffs2_erase_callback;
 	instr->priv = (unsigned long)(&instr[1]);
-	instr->fail_addr = 0xffffffff;
+	instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
 	((struct erase_priv_struct *)instr->priv)->jeb = jeb;
 	((struct erase_priv_struct *)instr->priv)->c = c;
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
 {
 	/* For NAND, if the failure did not occur at the device level for a
 	   specific physical page, don't bother updating the bad block table. */
-	if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) {
+	if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) {
 		/* We had a device-level failure to erase.  Let's see if we've
 		   failed too many times. */
 		if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 086c4383022..249305d65d5 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = 0;
 	buf->f_ffree = 0;
 	buf->f_namelen = JFFS2_MAX_NAME_LEN;
+	buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC;
+	buf->f_fsid.val[1] = c->mtd->index;
 
 	spin_lock(&c->erase_completion_lock);
 	avail = c->dirty_size + c->free_size;
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
 
 	memset(ri, 0, sizeof(*ri));
 	/* Set OS-specific defaults for new inodes */
-	ri->uid = cpu_to_je16(current->fsuid);
+	ri->uid = cpu_to_je16(current_fsuid());
 
 	if (dir_i->i_mode & S_ISGID) {
 		ri->gid = cpu_to_je16(dir_i->i_gid);
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
 	} else {
-		ri->gid = cpu_to_je16(current->fsgid);
+		ri->gid = cpu_to_je16(current_fsgid());
 	}
 
 	/* POSIX ACLs have to be processed now, at least partly.
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index a9bf9603c1b..0875b60b4bf 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -261,6 +261,10 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
 
 	jffs2_sum_reset_collected(c->summary); /* reset collected summary */
 
+	/* adjust write buffer offset, else we get a non contiguous write bug */
+	if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
+		c->wbuf_ofs = 0xffffffff;
+
 	D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
 
 	return 0;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 0e78b00035e..d9a721e6db7 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
 
 	memset(c->wbuf,0xff,c->wbuf_pagesize);
 	/* adjust write buffer offset, else we get a non contiguous write bug */
-	if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize))
-		c->wbuf_ofs += c->wbuf_pagesize;
-	else
-		c->wbuf_ofs = 0xffffffff;
+	c->wbuf_ofs += c->wbuf_pagesize;
 	c->wbuf_len = 0;
 	return 0;
 }
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 6a09760c596..c2e9cfd9e5a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -40,6 +40,16 @@ unsigned short nfs_callback_tcpport;
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
 
+/*
+ * If the kernel has IPv6 support available, always listen for
+ * both AF_INET and AF_INET6 requests.
+ */
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static const sa_family_t	nfs_callback_family = AF_INET6;
+#else
+static const sa_family_t	nfs_callback_family = AF_INET;
+#endif
+
 static int param_set_port(const char *val, struct kernel_param *kp)
 {
 	char *endp;
@@ -106,7 +116,7 @@ int nfs_callback_up(void)
 	if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
 		goto out;
 	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
-				AF_INET, NULL);
+				nfs_callback_family, NULL);
 	ret = -ENOMEM;
 	if (!serv)
 		goto out_err;
@@ -116,7 +126,8 @@ int nfs_callback_up(void)
 	if (ret <= 0)
 		goto out_err;
 	nfs_callback_tcpport = ret;
-	dprintk("Callback port = 0x%x\n", nfs_callback_tcpport);
+	dprintk("NFS: Callback listener port = %u (af %u)\n",
+			nfs_callback_tcpport, nfs_callback_family);
 
 	nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
 	if (IS_ERR(nfs_callback_info.rqst)) {
@@ -149,8 +160,8 @@ out:
 	mutex_unlock(&nfs_callback_mutex);
 	return ret;
 out_err:
-	dprintk("Couldn't create callback socket or server thread; err = %d\n",
-		ret);
+	dprintk("NFS: Couldn't create callback socket or server thread; "
+		"err = %d\n", ret);
 	nfs_callback_info.users--;
 	goto out;
 }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ab70d46ecb..efdba2e802d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1517,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 	if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
 							GFP_KERNEL)) {
 		pagevec_add(&lru_pvec, page);
-		pagevec_lru_add(&lru_pvec);
+		pagevec_lru_add_file(&lru_pvec);
 		SetPageUptodate(page);
 		unlock_page(page);
 	} else
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c910413eaec..83e700a2b0c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1659,8 +1659,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		struct nfs_open_context *ctx;
 
 		ctx = nfs_file_open_context(sattr->ia_file);
-		cred = ctx->cred;
-		state = ctx->state;
+		if (ctx) {
+			cred = ctx->cred;
+			state = ctx->state;
+		}
 	}
 
 	status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8b28b95c9e4..a3b0061dfd4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2459,7 +2459,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
+	s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
@@ -2544,7 +2544,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
+	s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d020866d423..3140a4429af 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
 			pages[nr] = *cached_page;
 			page_cache_get(*cached_page);
 			if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
-				__pagevec_lru_add(lru_pvec);
+				__pagevec_lru_add_file(lru_pvec);
 			*cached_page = NULL;
 		}
 		index++;
@@ -2084,7 +2084,7 @@ err_out:
 						OSYNC_METADATA|OSYNC_DATA);
 		}
   	}
-	pagevec_lru_add(&lru_pvec);
+	pagevec_lru_add_file(&lru_pvec);
 	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
 			written ? "written" : "status", (unsigned long)written,
 			(long)status);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f4bc0e78953..bb9f4b05703 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -388,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 		/* add up live thread stats at the group level */
 		if (whole) {
+			struct task_cputime cputime;
 			struct task_struct *t = task;
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				utime = cputime_add(utime, task_utime(t));
-				stime = cputime_add(stime, task_stime(t));
 				gtime = cputime_add(gtime, task_gtime(t));
 				t = next_thread(t);
 			} while (t != task);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			utime = cputime_add(utime, sig->utime);
-			stime = cputime_add(stime, sig->stime);
+			thread_group_cputime(task, &cputime);
+			utime = cputime.utime;
+			stime = cputime.stime;
 			gtime = cputime_add(gtime, sig->gtime);
 		}
 
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 59ea42e1ef0..7ea52c79b2d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/pagemap.h>
+#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
@@ -136,6 +137,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 	unsigned long allowed;
 	struct vmalloc_info vmi;
 	long cached;
+	unsigned long pages[NR_LRU_LISTS];
+	int lru;
 
 /*
  * display in kilobytes.
@@ -154,51 +157,70 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 
 	get_vmalloc_info(&vmi);
 
+	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+		pages[lru] = global_page_state(NR_LRU_BASE + lru);
+
 	/*
 	 * Tagged format, for easy grepping and expansion.
 	 */
 	len = sprintf(page,
-		"MemTotal:     %8lu kB\n"
-		"MemFree:      %8lu kB\n"
-		"Buffers:      %8lu kB\n"
-		"Cached:       %8lu kB\n"
-		"SwapCached:   %8lu kB\n"
-		"Active:       %8lu kB\n"
-		"Inactive:     %8lu kB\n"
+		"MemTotal:       %8lu kB\n"
+		"MemFree:        %8lu kB\n"
+		"Buffers:        %8lu kB\n"
+		"Cached:         %8lu kB\n"
+		"SwapCached:     %8lu kB\n"
+		"Active:         %8lu kB\n"
+		"Inactive:       %8lu kB\n"
+		"Active(anon):   %8lu kB\n"
+		"Inactive(anon): %8lu kB\n"
+		"Active(file):   %8lu kB\n"
+		"Inactive(file): %8lu kB\n"
+#ifdef CONFIG_UNEVICTABLE_LRU
+		"Unevictable:    %8lu kB\n"
+		"Mlocked:        %8lu kB\n"
+#endif
 #ifdef CONFIG_HIGHMEM
-		"HighTotal:    %8lu kB\n"
-		"HighFree:     %8lu kB\n"
-		"LowTotal:     %8lu kB\n"
-		"LowFree:      %8lu kB\n"
+		"HighTotal:      %8lu kB\n"
+		"HighFree:       %8lu kB\n"
+		"LowTotal:       %8lu kB\n"
+		"LowFree:        %8lu kB\n"
 #endif
-		"SwapTotal:    %8lu kB\n"
-		"SwapFree:     %8lu kB\n"
-		"Dirty:        %8lu kB\n"
-		"Writeback:    %8lu kB\n"
-		"AnonPages:    %8lu kB\n"
-		"Mapped:       %8lu kB\n"
-		"Slab:         %8lu kB\n"
-		"SReclaimable: %8lu kB\n"
-		"SUnreclaim:   %8lu kB\n"
-		"PageTables:   %8lu kB\n"
+		"SwapTotal:      %8lu kB\n"
+		"SwapFree:       %8lu kB\n"
+		"Dirty:          %8lu kB\n"
+		"Writeback:      %8lu kB\n"
+		"AnonPages:      %8lu kB\n"
+		"Mapped:         %8lu kB\n"
+		"Slab:           %8lu kB\n"
+		"SReclaimable:   %8lu kB\n"
+		"SUnreclaim:     %8lu kB\n"
+		"PageTables:     %8lu kB\n"
 #ifdef CONFIG_QUICKLIST
-		"Quicklists:   %8lu kB\n"
+		"Quicklists:     %8lu kB\n"
 #endif
-		"NFS_Unstable: %8lu kB\n"
-		"Bounce:       %8lu kB\n"
-		"WritebackTmp: %8lu kB\n"
-		"CommitLimit:  %8lu kB\n"
-		"Committed_AS: %8lu kB\n"
-		"VmallocTotal: %8lu kB\n"
-		"VmallocUsed:  %8lu kB\n"
-		"VmallocChunk: %8lu kB\n",
+		"NFS_Unstable:   %8lu kB\n"
+		"Bounce:         %8lu kB\n"
+		"WritebackTmp:   %8lu kB\n"
+		"CommitLimit:    %8lu kB\n"
+		"Committed_AS:   %8lu kB\n"
+		"VmallocTotal:   %8lu kB\n"
+		"VmallocUsed:    %8lu kB\n"
+		"VmallocChunk:   %8lu kB\n",
 		K(i.totalram),
 		K(i.freeram),
 		K(i.bufferram),
 		K(cached),
 		K(total_swapcache_pages),
-		K(global_page_state(NR_ACTIVE)),
-		K(global_page_state(NR_INACTIVE)),
+		K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
+		K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
+		K(pages[LRU_ACTIVE_ANON]),
+		K(pages[LRU_INACTIVE_ANON]),
+		K(pages[LRU_ACTIVE_FILE]),
+		K(pages[LRU_INACTIVE_FILE]),
+#ifdef CONFIG_UNEVICTABLE_LRU
+		K(pages[LRU_UNEVICTABLE]),
+		K(global_page_state(NR_MLOCK)),
+#endif
 #ifdef CONFIG_HIGHMEM
 		K(i.totalhigh),
 		K(i.freehigh),
@@ -500,17 +522,13 @@ static const struct file_operations proc_vmalloc_operations = {
 
 static int show_stat(struct seq_file *p, void *v)
 {
-	int i;
+	int i, j;
 	unsigned long jif;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
 	cputime64_t guest;
 	u64 sum = 0;
 	struct timespec boottime;
-	unsigned int *per_irq_sum;
-
-	per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL);
-	if (!per_irq_sum)
-		return -ENOMEM;
+	unsigned int per_irq_sum;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -519,8 +537,6 @@ static int show_stat(struct seq_file *p, void *v)
 	jif = boottime.tv_sec;
 
 	for_each_possible_cpu(i) {
-		int j;
-
 		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
 		nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
 		system = cputime64_add(system, kstat_cpu(i).cpustat.system);
@@ -530,11 +546,10 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		for (j = 0; j < NR_IRQS; j++) {
-			unsigned int temp = kstat_cpu(i).irqs[j];
-			sum += temp;
-			per_irq_sum[j] += temp;
-		}
+
+		for_each_irq_nr(j)
+			sum += kstat_irqs_cpu(j, i);
+
 		sum += arch_irq_stat_cpu(i);
 	}
 	sum += arch_irq_stat();
@@ -576,8 +591,15 @@ static int show_stat(struct seq_file *p, void *v)
 	}
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
-	for (i = 0; i < NR_IRQS; i++)
-		seq_printf(p, " %u", per_irq_sum[i]);
+	/* sum again ? it could be updated? */
+	for_each_irq_nr(j) {
+		per_irq_sum = 0;
+
+		for_each_possible_cpu(i)
+			per_irq_sum += kstat_irqs_cpu(j, i);
+
+		seq_printf(p, " %u", per_irq_sum);
+	}
 
 	seq_printf(p,
 		"\nctxt %llu\n"
@@ -591,7 +613,6 @@ static int show_stat(struct seq_file *p, void *v)
 		nr_running(),
 		nr_iowait());
 
-	kfree(per_irq_sum);
 	return 0;
 }
 
@@ -630,15 +651,14 @@ static const struct file_operations proc_stat_operations = {
  */
 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 {
-	return (*pos <= NR_IRQS) ? pos : NULL;
+	return (*pos <= nr_irqs) ? pos : NULL;
 }
 
+
 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
 	(*pos)++;
-	if (*pos > NR_IRQS)
-		return NULL;
-	return pos;
+	return (*pos <= nr_irqs) ? pos : NULL;
 }
 
 static void int_seq_stop(struct seq_file *f, void *v)
@@ -646,7 +666,6 @@ static void int_seq_stop(struct seq_file *f, void *v)
 	/* Nothing to do */
 }
 
-
 static const struct seq_operations int_seq_ops = {
 	.start = int_seq_start,
 	.next  = int_seq_next,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 841368b87a2..cd9ca67f841 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -32,9 +32,6 @@ static size_t elfcorebuf_sz;
 /* Total size of vmcore file. */
 static u64 vmcore_size;
 
-/* Stores the physical address of elf header of crash image. */
-unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-
 struct proc_dir_entry *proc_vmcore = NULL;
 
 /* Reads a page from the oldmem device from given offset. */
@@ -647,7 +644,7 @@ static int __init vmcore_init(void)
 	int rc = 0;
 
 	/* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
-	if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX))
+	if (!(is_vmcore_usable()))
 		return rc;
 	rc = parse_crash_elf_headers();
 	if (rc) {
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5145cb9125a..76acdbc3461 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 			goto add_error;
 
 		if (!pagevec_add(&lru_pvec, page))
-			__pagevec_lru_add(&lru_pvec);
+			__pagevec_lru_add_file(&lru_pvec);
 
 		unlock_page(page);
 	}
 
-	pagevec_lru_add(&lru_pvec);
+	pagevec_lru_add_file(&lru_pvec);
 	return 0;
 
  fsize_exceeded:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b13123424e4..f031d1c925f 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
 		inode->i_mapping->a_ops = &ramfs_aops;
 		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
 		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+		mapping_set_unevictable(inode->i_mapping);
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
 		default:
diff --git a/fs/seq_file.c b/fs/seq_file.c
index bd20f7f5a93..eba2eabcd2b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -452,17 +452,34 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 
 int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
 {
-	size_t len = bitmap_scnprintf_len(nr_bits);
+	if (m->count < m->size) {
+		int len = bitmap_scnprintf(m->buf + m->count,
+				m->size - m->count, bits, nr_bits);
+		if (m->count + len < m->size) {
+			m->count += len;
+			return 0;
+		}
+	}
+	m->count = m->size;
+	return -1;
+}
+EXPORT_SYMBOL(seq_bitmap);
 
-	if (m->count + len < m->size) {
-		bitmap_scnprintf(m->buf + m->count, m->size - m->count,
-				 bits, nr_bits);
-		m->count += len;
-		return 0;
+int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+		unsigned int nr_bits)
+{
+	if (m->count < m->size) {
+		int len = bitmap_scnlistprintf(m->buf + m->count,
+				m->size - m->count, bits, nr_bits);
+		if (m->count + len < m->size) {
+			m->count += len;
+			return 0;
+		}
 	}
 	m->count = m->size;
 	return -1;
 }
+EXPORT_SYMBOL(seq_bitmap_list);
 
 static void *single_start(struct seq_file *p, loff_t *pos)
 {
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 73db464cd08..1a4973e1066 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -414,19 +414,21 @@ static int do_budget_space(struct ubifs_info *c)
 	 *    @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
 	 *    @c->lst.taken_empty_lebs
 	 *
-	 * @empty_lebs are available because they are empty. @freeable_cnt are
-	 * available because they contain only free and dirty space and the
-	 * index allocation always occurs after wbufs are synch'ed.
-	 * @idx_gc_cnt are available because they are index LEBs that have been
-	 * garbage collected (including trivial GC) and are awaiting the commit
-	 * before they can be unmapped - note that the in-the-gaps method will
-	 * grab these if it needs them. @taken_empty_lebs are empty_lebs that
-	 * have already been allocated for some purpose (also includes those
-	 * LEBs on the @idx_gc list).
+	 * @c->lst.empty_lebs are available because they are empty.
+	 * @c->freeable_cnt are available because they contain only free and
+	 * dirty space, @c->idx_gc_cnt are available because they are index
+	 * LEBs that have been garbage collected and are awaiting the commit
+	 * before they can be used. And the in-the-gaps method will grab these
+	 * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have
+	 * already been allocated for some purpose.
 	 *
-	 * Note, @taken_empty_lebs may temporarily be higher by one because of
-	 * the way we serialize LEB allocations and budgeting. See a comment in
-	 * 'ubifs_find_free_space()'.
+	 * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because
+	 * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they
+	 * are taken until after the commit).
+	 *
+	 * Note, @c->lst.taken_empty_lebs may temporarily be higher by one
+	 * because of the way we serialize LEB allocations and budgeting. See a
+	 * comment in 'ubifs_find_free_space()'.
 	 */
 	lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
 	       c->lst.taken_empty_lebs;
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 5bb51dac3c1..a0ada596b17 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -91,8 +91,6 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
  *
  * Note, if the input buffer was not compressed, it is copied to the output
  * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
- *
- * This functions returns %0 on success or a negative error code on failure.
  */
 void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
 		    int *compr_type)
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index d7f7645779f..7186400750e 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -222,30 +222,38 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
 {
 	const struct ubifs_inode *ui = ubifs_inode(inode);
 
-	printk(KERN_DEBUG "inode      %lu\n", inode->i_ino);
-	printk(KERN_DEBUG "size       %llu\n",
+	printk(KERN_DEBUG "Dump in-memory inode:");
+	printk(KERN_DEBUG "\tinode          %lu\n", inode->i_ino);
+	printk(KERN_DEBUG "\tsize           %llu\n",
 	       (unsigned long long)i_size_read(inode));
-	printk(KERN_DEBUG "nlink      %u\n", inode->i_nlink);
-	printk(KERN_DEBUG "uid        %u\n", (unsigned int)inode->i_uid);
-	printk(KERN_DEBUG "gid        %u\n", (unsigned int)inode->i_gid);
-	printk(KERN_DEBUG "atime      %u.%u\n",
+	printk(KERN_DEBUG "\tnlink          %u\n", inode->i_nlink);
+	printk(KERN_DEBUG "\tuid            %u\n", (unsigned int)inode->i_uid);
+	printk(KERN_DEBUG "\tgid            %u\n", (unsigned int)inode->i_gid);
+	printk(KERN_DEBUG "\tatime          %u.%u\n",
 	       (unsigned int)inode->i_atime.tv_sec,
 	       (unsigned int)inode->i_atime.tv_nsec);
-	printk(KERN_DEBUG "mtime      %u.%u\n",
+	printk(KERN_DEBUG "\tmtime          %u.%u\n",
 	       (unsigned int)inode->i_mtime.tv_sec,
 	       (unsigned int)inode->i_mtime.tv_nsec);
-	printk(KERN_DEBUG "ctime       %u.%u\n",
+	printk(KERN_DEBUG "\tctime          %u.%u\n",
 	       (unsigned int)inode->i_ctime.tv_sec,
 	       (unsigned int)inode->i_ctime.tv_nsec);
-	printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum);
-	printk(KERN_DEBUG "xattr_size  %u\n", ui->xattr_size);
-	printk(KERN_DEBUG "xattr_cnt   %u\n", ui->xattr_cnt);
-	printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names);
-	printk(KERN_DEBUG "dirty       %u\n", ui->dirty);
-	printk(KERN_DEBUG "xattr       %u\n", ui->xattr);
-	printk(KERN_DEBUG "flags       %d\n", ui->flags);
-	printk(KERN_DEBUG "compr_type  %d\n", ui->compr_type);
-	printk(KERN_DEBUG "data_len    %d\n", ui->data_len);
+	printk(KERN_DEBUG "\tcreat_sqnum    %llu\n", ui->creat_sqnum);
+	printk(KERN_DEBUG "\txattr_size     %u\n", ui->xattr_size);
+	printk(KERN_DEBUG "\txattr_cnt      %u\n", ui->xattr_cnt);
+	printk(KERN_DEBUG "\txattr_names    %u\n", ui->xattr_names);
+	printk(KERN_DEBUG "\tdirty          %u\n", ui->dirty);
+	printk(KERN_DEBUG "\txattr          %u\n", ui->xattr);
+	printk(KERN_DEBUG "\tbulk_read      %u\n", ui->xattr);
+	printk(KERN_DEBUG "\tsynced_i_size  %llu\n",
+	       (unsigned long long)ui->synced_i_size);
+	printk(KERN_DEBUG "\tui_size        %llu\n",
+	       (unsigned long long)ui->ui_size);
+	printk(KERN_DEBUG "\tflags          %d\n", ui->flags);
+	printk(KERN_DEBUG "\tcompr_type     %d\n", ui->compr_type);
+	printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
+	printk(KERN_DEBUG "\tread_in_a_row  %lu\n", ui->read_in_a_row);
+	printk(KERN_DEBUG "\tdata_len       %d\n", ui->data_len);
 }
 
 void dbg_dump_node(const struct ubifs_info *c, const void *node)
@@ -647,6 +655,43 @@ void dbg_dump_lprops(struct ubifs_info *c)
 	}
 }
 
+void dbg_dump_lpt_info(struct ubifs_info *c)
+{
+	int i;
+
+	spin_lock(&dbg_lock);
+	printk(KERN_DEBUG "\tlpt_sz:        %lld\n", c->lpt_sz);
+	printk(KERN_DEBUG "\tpnode_sz:      %d\n", c->pnode_sz);
+	printk(KERN_DEBUG "\tnnode_sz:      %d\n", c->nnode_sz);
+	printk(KERN_DEBUG "\tltab_sz:       %d\n", c->ltab_sz);
+	printk(KERN_DEBUG "\tlsave_sz:      %d\n", c->lsave_sz);
+	printk(KERN_DEBUG "\tbig_lpt:       %d\n", c->big_lpt);
+	printk(KERN_DEBUG "\tlpt_hght:      %d\n", c->lpt_hght);
+	printk(KERN_DEBUG "\tpnode_cnt:     %d\n", c->pnode_cnt);
+	printk(KERN_DEBUG "\tnnode_cnt:     %d\n", c->nnode_cnt);
+	printk(KERN_DEBUG "\tdirty_pn_cnt:  %d\n", c->dirty_pn_cnt);
+	printk(KERN_DEBUG "\tdirty_nn_cnt:  %d\n", c->dirty_nn_cnt);
+	printk(KERN_DEBUG "\tlsave_cnt:     %d\n", c->lsave_cnt);
+	printk(KERN_DEBUG "\tspace_bits:    %d\n", c->space_bits);
+	printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
+	printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
+	printk(KERN_DEBUG "\tlpt_spc_bits:  %d\n", c->lpt_spc_bits);
+	printk(KERN_DEBUG "\tpcnt_bits:     %d\n", c->pcnt_bits);
+	printk(KERN_DEBUG "\tlnum_bits:     %d\n", c->lnum_bits);
+	printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
+	printk(KERN_DEBUG "\tLPT head is at %d:%d\n",
+	       c->nhead_lnum, c->nhead_offs);
+	printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs);
+	if (c->big_lpt)
+		printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n",
+		       c->lsave_lnum, c->lsave_offs);
+	for (i = 0; i < c->lpt_lebs; i++)
+		printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d "
+		       "cmt %d\n", i + c->lpt_first, c->ltab[i].free,
+		       c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt);
+	spin_unlock(&dbg_lock);
+}
+
 void dbg_dump_leb(const struct ubifs_info *c, int lnum)
 {
 	struct ubifs_scan_leb *sleb;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 50315fc5718..33d6b95071e 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -224,6 +224,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
 void dbg_dump_budg(struct ubifs_info *c);
 void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
 void dbg_dump_lprops(struct ubifs_info *c);
+void dbg_dump_lpt_info(struct ubifs_info *c);
 void dbg_dump_leb(const struct ubifs_info *c, int lnum);
 void dbg_dump_znode(const struct ubifs_info *c,
 		    const struct ubifs_znode *znode);
@@ -249,6 +250,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int dbg_check_cats(struct ubifs_info *c);
 int dbg_check_ltab(struct ubifs_info *c);
+int dbg_chk_lpt_free_spc(struct ubifs_info *c);
+int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
 int dbg_check_synced_i_size(struct inode *inode);
 int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
 int dbg_check_tnc(struct ubifs_info *c, int extra);
@@ -367,6 +370,7 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 #define dbg_dump_budg(c)                      ({})
 #define dbg_dump_lprop(c, lp)                 ({})
 #define dbg_dump_lprops(c)                    ({})
+#define dbg_dump_lpt_info(c)                  ({})
 #define dbg_dump_leb(c, lnum)                 ({})
 #define dbg_dump_znode(c, znode)              ({})
 #define dbg_dump_heap(c, heap, cat)           ({})
@@ -379,6 +383,8 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 #define dbg_check_old_index(c, zroot)              0
 #define dbg_check_cats(c)                          0
 #define dbg_check_ltab(c)                          0
+#define dbg_chk_lpt_free_spc(c)                    0
+#define dbg_chk_lpt_sz(c, action, len)             0
 #define dbg_check_synced_i_size(inode)             0
 #define dbg_check_dir_size(c, dir)                 0
 #define dbg_check_tnc(c, x)                        0
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 3d698e2022b..51cf511d44d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -147,6 +147,12 @@ static int do_readpage(struct page *page)
 				err = ret;
 				if (err != -ENOENT)
 					break;
+			} else if (block + 1 == beyond) {
+				int dlen = le32_to_cpu(dn->size);
+				int ilen = i_size & (UBIFS_BLOCK_SIZE - 1);
+
+				if (ilen && ilen < dlen)
+					memset(addr + ilen, 0, dlen - ilen);
 			}
 		}
 		if (++i >= UBIFS_BLOCKS_PER_PAGE)
@@ -577,8 +583,262 @@ out:
 	return copied;
 }
 
+/**
+ * populate_page - copy data nodes into a page for bulk-read.
+ * @c: UBIFS file-system description object
+ * @page: page
+ * @bu: bulk-read information
+ * @n: next zbranch slot
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int populate_page(struct ubifs_info *c, struct page *page,
+			 struct bu_info *bu, int *n)
+{
+	int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
+	struct inode *inode = page->mapping->host;
+	loff_t i_size = i_size_read(inode);
+	unsigned int page_block;
+	void *addr, *zaddr;
+	pgoff_t end_index;
+
+	dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
+		inode->i_ino, page->index, i_size, page->flags);
+
+	addr = zaddr = kmap(page);
+
+	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	if (!i_size || page->index > end_index) {
+		hole = 1;
+		memset(addr, 0, PAGE_CACHE_SIZE);
+		goto out_hole;
+	}
+
+	page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+	while (1) {
+		int err, len, out_len, dlen;
+
+		if (nn >= bu->cnt) {
+			hole = 1;
+			memset(addr, 0, UBIFS_BLOCK_SIZE);
+		} else if (key_block(c, &bu->zbranch[nn].key) == page_block) {
+			struct ubifs_data_node *dn;
+
+			dn = bu->buf + (bu->zbranch[nn].offs - offs);
+
+			ubifs_assert(dn->ch.sqnum >
+				     ubifs_inode(inode)->creat_sqnum);
+
+			len = le32_to_cpu(dn->size);
+			if (len <= 0 || len > UBIFS_BLOCK_SIZE)
+				goto out_err;
+
+			dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+			out_len = UBIFS_BLOCK_SIZE;
+			err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
+					       le16_to_cpu(dn->compr_type));
+			if (err || len != out_len)
+				goto out_err;
+
+			if (len < UBIFS_BLOCK_SIZE)
+				memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
+
+			nn += 1;
+			read = (i << UBIFS_BLOCK_SHIFT) + len;
+		} else if (key_block(c, &bu->zbranch[nn].key) < page_block) {
+			nn += 1;
+			continue;
+		} else {
+			hole = 1;
+			memset(addr, 0, UBIFS_BLOCK_SIZE);
+		}
+		if (++i >= UBIFS_BLOCKS_PER_PAGE)
+			break;
+		addr += UBIFS_BLOCK_SIZE;
+		page_block += 1;
+	}
+
+	if (end_index == page->index) {
+		int len = i_size & (PAGE_CACHE_SIZE - 1);
+
+		if (len && len < read)
+			memset(zaddr + len, 0, read - len);
+	}
+
+out_hole:
+	if (hole) {
+		SetPageChecked(page);
+		dbg_gen("hole");
+	}
+
+	SetPageUptodate(page);
+	ClearPageError(page);
+	flush_dcache_page(page);
+	kunmap(page);
+	*n = nn;
+	return 0;
+
+out_err:
+	ClearPageUptodate(page);
+	SetPageError(page);
+	flush_dcache_page(page);
+	kunmap(page);
+	ubifs_err("bad data node (block %u, inode %lu)",
+		  page_block, inode->i_ino);
+	return -EINVAL;
+}
+
+/**
+ * ubifs_do_bulk_read - do bulk-read.
+ * @c: UBIFS file-system description object
+ * @page1: first page
+ *
+ * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
+ */
+static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
+{
+	pgoff_t offset = page1->index, end_index;
+	struct address_space *mapping = page1->mapping;
+	struct inode *inode = mapping->host;
+	struct ubifs_inode *ui = ubifs_inode(inode);
+	struct bu_info *bu;
+	int err, page_idx, page_cnt, ret = 0, n = 0;
+	loff_t isize;
+
+	bu = kmalloc(sizeof(struct bu_info), GFP_NOFS);
+	if (!bu)
+		return 0;
+
+	bu->buf_len = c->bulk_read_buf_size;
+	bu->buf = kmalloc(bu->buf_len, GFP_NOFS);
+	if (!bu->buf)
+		goto out_free;
+
+	data_key_init(c, &bu->key, inode->i_ino,
+		      offset << UBIFS_BLOCKS_PER_PAGE_SHIFT);
+
+	err = ubifs_tnc_get_bu_keys(c, bu);
+	if (err)
+		goto out_warn;
+
+	if (bu->eof) {
+		/* Turn off bulk-read at the end of the file */
+		ui->read_in_a_row = 1;
+		ui->bulk_read = 0;
+	}
+
+	page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
+	if (!page_cnt) {
+		/*
+		 * This happens when there are multiple blocks per page and the
+		 * blocks for the first page we are looking for, are not
+		 * together. If all the pages were like this, bulk-read would
+		 * reduce performance, so we turn it off for a while.
+		 */
+		ui->read_in_a_row = 0;
+		ui->bulk_read = 0;
+		goto out_free;
+	}
+
+	if (bu->cnt) {
+		err = ubifs_tnc_bulk_read(c, bu);
+		if (err)
+			goto out_warn;
+	}
+
+	err = populate_page(c, page1, bu, &n);
+	if (err)
+		goto out_warn;
+
+	unlock_page(page1);
+	ret = 1;
+
+	isize = i_size_read(inode);
+	if (isize == 0)
+		goto out_free;
+	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+
+	for (page_idx = 1; page_idx < page_cnt; page_idx++) {
+		pgoff_t page_offset = offset + page_idx;
+		struct page *page;
+
+		if (page_offset > end_index)
+			break;
+		page = find_or_create_page(mapping, page_offset,
+					   GFP_NOFS | __GFP_COLD);
+		if (!page)
+			break;
+		if (!PageUptodate(page))
+			err = populate_page(c, page, bu, &n);
+		unlock_page(page);
+		page_cache_release(page);
+		if (err)
+			break;
+	}
+
+	ui->last_page_read = offset + page_idx - 1;
+
+out_free:
+	kfree(bu->buf);
+	kfree(bu);
+	return ret;
+
+out_warn:
+	ubifs_warn("ignoring error %d and skipping bulk-read", err);
+	goto out_free;
+}
+
+/**
+ * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
+ * @page: page from which to start bulk-read.
+ *
+ * Some flash media are capable of reading sequentially at faster rates. UBIFS
+ * bulk-read facility is designed to take advantage of that, by reading in one
+ * go consecutive data nodes that are also located consecutively in the same
+ * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
+ */
+static int ubifs_bulk_read(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct ubifs_info *c = inode->i_sb->s_fs_info;
+	struct ubifs_inode *ui = ubifs_inode(inode);
+	pgoff_t index = page->index, last_page_read = ui->last_page_read;
+	int ret = 0;
+
+	ui->last_page_read = index;
+
+	if (!c->bulk_read)
+		return 0;
+	/*
+	 * Bulk-read is protected by ui_mutex, but it is an optimization, so
+	 * don't bother if we cannot lock the mutex.
+	 */
+	if (!mutex_trylock(&ui->ui_mutex))
+		return 0;
+	if (index != last_page_read + 1) {
+		/* Turn off bulk-read if we stop reading sequentially */
+		ui->read_in_a_row = 1;
+		if (ui->bulk_read)
+			ui->bulk_read = 0;
+		goto out_unlock;
+	}
+	if (!ui->bulk_read) {
+		ui->read_in_a_row += 1;
+		if (ui->read_in_a_row < 3)
+			goto out_unlock;
+		/* Three reads in a row, so switch on bulk-read */
+		ui->bulk_read = 1;
+	}
+	ret = ubifs_do_bulk_read(c, page);
+out_unlock:
+	mutex_unlock(&ui->ui_mutex);
+	return ret;
+}
+
 static int ubifs_readpage(struct file *file, struct page *page)
 {
+	if (ubifs_bulk_read(page))
+		return 0;
 	do_readpage(page);
 	unlock_page(page);
 	return 0;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 47814cde240..717d79c97c5 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -901,11 +901,11 @@ static int get_idx_gc_leb(struct ubifs_info *c)
 	 * it is needed now for this commit.
 	 */
 	lp = ubifs_lpt_lookup_dirty(c, lnum);
-	if (unlikely(IS_ERR(lp)))
+	if (IS_ERR(lp))
 		return PTR_ERR(lp);
 	lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
 			     lp->flags | LPROPS_INDEX, -1);
-	if (unlikely(IS_ERR(lp)))
+	if (IS_ERR(lp))
 		return PTR_ERR(lp);
 	dbg_find("LEB %d, dirty %d and free %d flags %#x",
 		 lp->lnum, lp->dirty, lp->free, lp->flags);
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 02aba36fe3d..0bef6501d58 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -96,6 +96,48 @@ static int switch_gc_head(struct ubifs_info *c)
 }
 
 /**
+ * joinup - bring data nodes for an inode together.
+ * @c: UBIFS file-system description object
+ * @sleb: describes scanned LEB
+ * @inum: inode number
+ * @blk: block number
+ * @data: list to which to add data nodes
+ *
+ * This function looks at the first few nodes in the scanned LEB @sleb and adds
+ * them to @data if they are data nodes from @inum and have a larger block
+ * number than @blk. This function returns %0 on success and a negative error
+ * code on failure.
+ */
+static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum,
+		  unsigned int blk, struct list_head *data)
+{
+	int err, cnt = 6, lnum = sleb->lnum, offs;
+	struct ubifs_scan_node *snod, *tmp;
+	union ubifs_key *key;
+
+	list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+		key = &snod->key;
+		if (key_inum(c, key) == inum &&
+		    key_type(c, key) == UBIFS_DATA_KEY &&
+		    key_block(c, key) > blk) {
+			offs = snod->offs;
+			err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0);
+			if (err < 0)
+				return err;
+			list_del(&snod->list);
+			if (err) {
+				list_add_tail(&snod->list, data);
+				blk = key_block(c, key);
+			} else
+				kfree(snod);
+			cnt = 6;
+		} else if (--cnt == 0)
+			break;
+	}
+	return 0;
+}
+
+/**
  * move_nodes - move nodes.
  * @c: UBIFS file-system description object
  * @sleb: describes nodes to move
@@ -116,16 +158,21 @@ static int switch_gc_head(struct ubifs_info *c)
 static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 {
 	struct ubifs_scan_node *snod, *tmp;
-	struct list_head large, medium, small;
+	struct list_head data, large, medium, small;
 	struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
 	int avail, err, min = INT_MAX;
+	unsigned int blk = 0;
+	ino_t inum = 0;
 
+	INIT_LIST_HEAD(&data);
 	INIT_LIST_HEAD(&large);
 	INIT_LIST_HEAD(&medium);
 	INIT_LIST_HEAD(&small);
 
-	list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
-		struct list_head *lst;
+	while (!list_empty(&sleb->nodes)) {
+		struct list_head *lst = sleb->nodes.next;
+
+		snod = list_entry(lst, struct ubifs_scan_node, list);
 
 		ubifs_assert(snod->type != UBIFS_IDX_NODE);
 		ubifs_assert(snod->type != UBIFS_REF_NODE);
@@ -136,7 +183,6 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 		if (err < 0)
 			goto out;
 
-		lst = &snod->list;
 		list_del(lst);
 		if (!err) {
 			/* The node is obsolete, remove it from the list */
@@ -145,15 +191,30 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 		}
 
 		/*
-		 * Sort the list of nodes so that large nodes go first, and
-		 * small nodes go last.
+		 * Sort the list of nodes so that data nodes go first, large
+		 * nodes go second, and small nodes go last.
 		 */
-		if (snod->len > MEDIUM_NODE_WM)
-			list_add(lst, &large);
+		if (key_type(c, &snod->key) == UBIFS_DATA_KEY) {
+			if (inum != key_inum(c, &snod->key)) {
+				if (inum) {
+					/*
+					 * Try to move data nodes from the same
+					 * inode together.
+					 */
+					err = joinup(c, sleb, inum, blk, &data);
+					if (err)
+						goto out;
+				}
+				inum = key_inum(c, &snod->key);
+				blk = key_block(c, &snod->key);
+			}
+			list_add_tail(lst, &data);
+		} else if (snod->len > MEDIUM_NODE_WM)
+			list_add_tail(lst, &large);
 		else if (snod->len > SMALL_NODE_WM)
-			list_add(lst, &medium);
+			list_add_tail(lst, &medium);
 		else
-			list_add(lst, &small);
+			list_add_tail(lst, &small);
 
 		/* And find the smallest node */
 		if (snod->len < min)
@@ -164,6 +225,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 	 * Join the tree lists so that we'd have one roughly sorted list
 	 * ('large' will be the head of the joined list).
 	 */
+	list_splice(&data, &large);
 	list_splice(&medium, large.prev);
 	list_splice(&small, large.prev);
 
@@ -653,7 +715,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
 	 */
 	while (1) {
 		lp = ubifs_fast_find_freeable(c);
-		if (unlikely(IS_ERR(lp))) {
+		if (IS_ERR(lp)) {
 			err = PTR_ERR(lp);
 			goto out;
 		}
@@ -665,7 +727,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
 		if (err)
 			goto out;
 		lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
-		if (unlikely(IS_ERR(lp))) {
+		if (IS_ERR(lp)) {
 			err = PTR_ERR(lp);
 			goto out;
 		}
@@ -680,7 +742,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
 	/* Record index freeable LEBs for unmapping after commit */
 	while (1) {
 		lp = ubifs_fast_find_frdi_idx(c);
-		if (unlikely(IS_ERR(lp))) {
+		if (IS_ERR(lp)) {
 			err = PTR_ERR(lp);
 			goto out;
 		}
@@ -696,7 +758,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
 		/* Don't release the LEB until after the next commit */
 		flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
 		lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
-		if (unlikely(IS_ERR(lp))) {
+		if (IS_ERR(lp)) {
 			err = PTR_ERR(lp);
 			kfree(idx_gc);
 			goto out;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 054363f2b20..01682713af6 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -62,6 +62,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
 {
 	if (!c->ro_media) {
 		c->ro_media = 1;
+		c->no_chk_data_crc = 0;
 		ubifs_warn("switched to read-only mode, error %d", err);
 		dbg_dump_stack();
 	}
@@ -74,6 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
  * @lnum: logical eraseblock number
  * @offs: offset within the logical eraseblock
  * @quiet: print no messages
+ * @chk_crc: indicates whether to always check the CRC
  *
  * This function checks node magic number and CRC checksum. This function also
  * validates node length to prevent UBIFS from becoming crazy when an attacker
@@ -85,7 +87,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
  * or magic.
  */
 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
-		     int offs, int quiet)
+		     int offs, int quiet, int chk_crc)
 {
 	int err = -EINVAL, type, node_len;
 	uint32_t crc, node_crc, magic;
@@ -121,6 +123,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
 		   node_len > c->ranges[type].max_len)
 		goto out_len;
 
+	if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc)
+		if (c->no_chk_data_crc)
+			return 0;
+
 	crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
 	node_crc = le32_to_cpu(ch->crc);
 	if (crc != node_crc) {
@@ -722,7 +728,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 		goto out;
 	}
 
-	err = ubifs_check_node(c, buf, lnum, offs, 0);
+	err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
 	if (err) {
 		ubifs_err("expected node type %d", type);
 		return err;
@@ -781,7 +787,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
 		goto out;
 	}
 
-	err = ubifs_check_node(c, buf, lnum, offs, 0);
+	err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
 	if (err) {
 		ubifs_err("expected node type %d", type);
 		return err;
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 8f747600754..9ee65086f62 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -484,7 +484,7 @@ static inline void key_copy(const struct ubifs_info *c,
  * @key2: the second key to compare
  *
  * This function compares 2 keys and returns %-1 if @key1 is less than
- * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2.
+ * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2.
  */
 static inline int keys_cmp(const struct ubifs_info *c,
 			   const union ubifs_key *key1,
@@ -503,6 +503,26 @@ static inline int keys_cmp(const struct ubifs_info *c,
 }
 
 /**
+ * keys_eq - determine if keys are equivalent.
+ * @c: UBIFS file-system description object
+ * @key1: the first key to compare
+ * @key2: the second key to compare
+ *
+ * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and
+ * %0 if not.
+ */
+static inline int keys_eq(const struct ubifs_info *c,
+			  const union ubifs_key *key1,
+			  const union ubifs_key *key2)
+{
+	if (key1->u32[0] != key2->u32[0])
+		return 0;
+	if (key1->u32[1] != key2->u32[1])
+		return 0;
+	return 1;
+}
+
+/**
  * is_hash_key - is a key vulnerable to hash collisions.
  * @c: UBIFS file-system description object
  * @key: key
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 2ba93da71b6..f27176e9b70 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -125,6 +125,7 @@ static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
 			}
 		}
 	}
+
 	/* Not greater than parent, so compare to children */
 	while (1) {
 		/* Compare to left child */
@@ -460,18 +461,6 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
 }
 
 /**
- * ubifs_get_lprops - get reference to LEB properties.
- * @c: the UBIFS file-system description object
- *
- * This function locks lprops. Lprops have to be unlocked by
- * 'ubifs_release_lprops()'.
- */
-void ubifs_get_lprops(struct ubifs_info *c)
-{
-	mutex_lock(&c->lp_mutex);
-}
-
-/**
  * calc_dark - calculate LEB dark space size.
  * @c: the UBIFS file-system description object
  * @spc: amount of free and dirty space in the LEB
@@ -576,7 +565,6 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
 	ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
 
 	spin_lock(&c->space_lock);
-
 	if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
 		c->lst.taken_empty_lebs -= 1;
 
@@ -637,31 +625,12 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
 		c->lst.taken_empty_lebs += 1;
 
 	change_category(c, lprops);
-
 	c->idx_gc_cnt += idx_gc_cnt;
-
 	spin_unlock(&c->space_lock);
-
 	return lprops;
 }
 
 /**
- * ubifs_release_lprops - release lprops lock.
- * @c: the UBIFS file-system description object
- *
- * This function has to be called after each 'ubifs_get_lprops()' call to
- * unlock lprops.
- */
-void ubifs_release_lprops(struct ubifs_info *c)
-{
-	ubifs_assert(mutex_is_locked(&c->lp_mutex));
-	ubifs_assert(c->lst.empty_lebs >= 0 &&
-		     c->lst.empty_lebs <= c->main_lebs);
-
-	mutex_unlock(&c->lp_mutex);
-}
-
-/**
  * ubifs_get_lp_stats - get lprops statistics.
  * @c: UBIFS file-system description object
  * @st: return statistics
@@ -1262,7 +1231,6 @@ static int scan_check_cb(struct ubifs_info *c,
 	}
 
 	ubifs_scan_destroy(sleb);
-
 	return LPT_SCAN_CONTINUE;
 
 out_print:
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 9ff2463177e..db8bd0e518b 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -109,7 +109,8 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
 	c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
 	c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
 	c->lpt_sz += c->ltab_sz;
-	c->lpt_sz += c->lsave_sz;
+	if (c->big_lpt)
+		c->lpt_sz += c->lsave_sz;
 
 	/* Add wastage */
 	sz = c->lpt_sz;
@@ -287,25 +288,56 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
 	const int k = 32 - nrbits;
 	uint8_t *p = *addr;
 	int b = *pos;
-	uint32_t val;
+	uint32_t uninitialized_var(val);
+	const int bytes = (nrbits + b + 7) >> 3;
 
 	ubifs_assert(nrbits > 0);
 	ubifs_assert(nrbits <= 32);
 	ubifs_assert(*pos >= 0);
 	ubifs_assert(*pos < 8);
 	if (b) {
-		val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) |
-		      ((uint32_t)p[4] << 24);
+		switch (bytes) {
+		case 2:
+			val = p[1];
+			break;
+		case 3:
+			val = p[1] | ((uint32_t)p[2] << 8);
+			break;
+		case 4:
+			val = p[1] | ((uint32_t)p[2] << 8) |
+				     ((uint32_t)p[3] << 16);
+			break;
+		case 5:
+			val = p[1] | ((uint32_t)p[2] << 8) |
+				     ((uint32_t)p[3] << 16) |
+				     ((uint32_t)p[4] << 24);
+		}
 		val <<= (8 - b);
 		val |= *p >> b;
 		nrbits += b;
-	} else
-		val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
-		      ((uint32_t)p[3] << 24);
+	} else {
+		switch (bytes) {
+		case 1:
+			val = p[0];
+			break;
+		case 2:
+			val = p[0] | ((uint32_t)p[1] << 8);
+			break;
+		case 3:
+			val = p[0] | ((uint32_t)p[1] << 8) |
+				     ((uint32_t)p[2] << 16);
+			break;
+		case 4:
+			val = p[0] | ((uint32_t)p[1] << 8) |
+				     ((uint32_t)p[2] << 16) |
+				     ((uint32_t)p[3] << 24);
+			break;
+		}
+	}
 	val <<= k;
 	val >>= k;
 	b = nrbits & 7;
-	p += nrbits / 8;
+	p += nrbits >> 3;
 	*addr = p;
 	*pos = b;
 	ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 5f0b83e20af..eed5a0025d6 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -177,8 +177,6 @@ static int alloc_lpt_leb(struct ubifs_info *c, int *lnum)
 			return 0;
 		}
 	}
-	dbg_err("last LEB %d", *lnum);
-	dump_stack();
 	return -ENOSPC;
 }
 
@@ -193,6 +191,9 @@ static int layout_cnodes(struct ubifs_info *c)
 	int lnum, offs, len, alen, done_lsave, done_ltab, err;
 	struct ubifs_cnode *cnode;
 
+	err = dbg_chk_lpt_sz(c, 0, 0);
+	if (err)
+		return err;
 	cnode = c->lpt_cnext;
 	if (!cnode)
 		return 0;
@@ -206,6 +207,7 @@ static int layout_cnodes(struct ubifs_info *c)
 		c->lsave_lnum = lnum;
 		c->lsave_offs = offs;
 		offs += c->lsave_sz;
+		dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 	}
 
 	if (offs + c->ltab_sz <= c->leb_size) {
@@ -213,6 +215,7 @@ static int layout_cnodes(struct ubifs_info *c)
 		c->ltab_lnum = lnum;
 		c->ltab_offs = offs;
 		offs += c->ltab_sz;
+		dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 	}
 
 	do {
@@ -226,9 +229,10 @@ static int layout_cnodes(struct ubifs_info *c)
 		while (offs + len > c->leb_size) {
 			alen = ALIGN(offs, c->min_io_size);
 			upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+			dbg_chk_lpt_sz(c, 2, alen - offs);
 			err = alloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -238,6 +242,7 @@ static int layout_cnodes(struct ubifs_info *c)
 				c->lsave_lnum = lnum;
 				c->lsave_offs = offs;
 				offs += c->lsave_sz;
+				dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 				continue;
 			}
 			if (!done_ltab) {
@@ -245,6 +250,7 @@ static int layout_cnodes(struct ubifs_info *c)
 				c->ltab_lnum = lnum;
 				c->ltab_offs = offs;
 				offs += c->ltab_sz;
+				dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 				continue;
 			}
 			break;
@@ -257,6 +263,7 @@ static int layout_cnodes(struct ubifs_info *c)
 			c->lpt_offs = offs;
 		}
 		offs += len;
+		dbg_chk_lpt_sz(c, 1, len);
 		cnode = cnode->cnext;
 	} while (cnode && cnode != c->lpt_cnext);
 
@@ -265,9 +272,10 @@ static int layout_cnodes(struct ubifs_info *c)
 		if (offs + c->lsave_sz > c->leb_size) {
 			alen = ALIGN(offs, c->min_io_size);
 			upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+			dbg_chk_lpt_sz(c, 2, alen - offs);
 			err = alloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -276,6 +284,7 @@ static int layout_cnodes(struct ubifs_info *c)
 		c->lsave_lnum = lnum;
 		c->lsave_offs = offs;
 		offs += c->lsave_sz;
+		dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 	}
 
 	/* Make sure to place LPT's own lprops table */
@@ -283,9 +292,10 @@ static int layout_cnodes(struct ubifs_info *c)
 		if (offs + c->ltab_sz > c->leb_size) {
 			alen = ALIGN(offs, c->min_io_size);
 			upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+			dbg_chk_lpt_sz(c, 2, alen - offs);
 			err = alloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -294,11 +304,23 @@ static int layout_cnodes(struct ubifs_info *c)
 		c->ltab_lnum = lnum;
 		c->ltab_offs = offs;
 		offs += c->ltab_sz;
+		dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 	}
 
 	alen = ALIGN(offs, c->min_io_size);
 	upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+	dbg_chk_lpt_sz(c, 4, alen - offs);
+	err = dbg_chk_lpt_sz(c, 3, alen);
+	if (err)
+		return err;
 	return 0;
+
+no_space:
+	ubifs_err("LPT out of space");
+	dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
+		"done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
+	dbg_dump_lpt_info(c);
+	return err;
 }
 
 /**
@@ -333,8 +355,6 @@ static int realloc_lpt_leb(struct ubifs_info *c, int *lnum)
 			*lnum = i + c->lpt_first;
 			return 0;
 		}
-	dbg_err("last LEB %d", *lnum);
-	dump_stack();
 	return -ENOSPC;
 }
 
@@ -369,12 +389,14 @@ static int write_cnodes(struct ubifs_info *c)
 		done_lsave = 1;
 		ubifs_pack_lsave(c, buf + offs, c->lsave);
 		offs += c->lsave_sz;
+		dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 	}
 
 	if (offs + c->ltab_sz <= c->leb_size) {
 		done_ltab = 1;
 		ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
 		offs += c->ltab_sz;
+		dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 	}
 
 	/* Loop for each cnode */
@@ -392,10 +414,12 @@ static int write_cnodes(struct ubifs_info *c)
 						       alen, UBI_SHORTTERM);
 				if (err)
 					return err;
+				dbg_chk_lpt_sz(c, 4, alen - wlen);
 			}
+			dbg_chk_lpt_sz(c, 2, 0);
 			err = realloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			from = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
@@ -408,12 +432,14 @@ static int write_cnodes(struct ubifs_info *c)
 				done_lsave = 1;
 				ubifs_pack_lsave(c, buf + offs, c->lsave);
 				offs += c->lsave_sz;
+				dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 				continue;
 			}
 			if (!done_ltab) {
 				done_ltab = 1;
 				ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
 				offs += c->ltab_sz;
+				dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 				continue;
 			}
 			break;
@@ -435,6 +461,7 @@ static int write_cnodes(struct ubifs_info *c)
 		clear_bit(COW_ZNODE, &cnode->flags);
 		smp_mb__after_clear_bit();
 		offs += len;
+		dbg_chk_lpt_sz(c, 1, len);
 		cnode = cnode->cnext;
 	} while (cnode && cnode != c->lpt_cnext);
 
@@ -448,9 +475,10 @@ static int write_cnodes(struct ubifs_info *c)
 					      UBI_SHORTTERM);
 			if (err)
 				return err;
+			dbg_chk_lpt_sz(c, 2, alen - wlen);
 			err = realloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -461,6 +489,7 @@ static int write_cnodes(struct ubifs_info *c)
 		done_lsave = 1;
 		ubifs_pack_lsave(c, buf + offs, c->lsave);
 		offs += c->lsave_sz;
+		dbg_chk_lpt_sz(c, 1, c->lsave_sz);
 	}
 
 	/* Make sure to place LPT's own lprops table */
@@ -473,9 +502,10 @@ static int write_cnodes(struct ubifs_info *c)
 					      UBI_SHORTTERM);
 			if (err)
 				return err;
+			dbg_chk_lpt_sz(c, 2, alen - wlen);
 			err = realloc_lpt_leb(c, &lnum);
 			if (err)
-				return err;
+				goto no_space;
 			offs = 0;
 			ubifs_assert(lnum >= c->lpt_first &&
 				     lnum <= c->lpt_last);
@@ -486,6 +516,7 @@ static int write_cnodes(struct ubifs_info *c)
 		done_ltab = 1;
 		ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
 		offs += c->ltab_sz;
+		dbg_chk_lpt_sz(c, 1, c->ltab_sz);
 	}
 
 	/* Write remaining data in buffer */
@@ -495,6 +526,12 @@ static int write_cnodes(struct ubifs_info *c)
 	err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM);
 	if (err)
 		return err;
+
+	dbg_chk_lpt_sz(c, 4, alen - wlen);
+	err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size));
+	if (err)
+		return err;
+
 	c->nhead_lnum = lnum;
 	c->nhead_offs = ALIGN(offs, c->min_io_size);
 
@@ -503,7 +540,15 @@ static int write_cnodes(struct ubifs_info *c)
 	dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
 	if (c->big_lpt)
 		dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
+
 	return 0;
+
+no_space:
+	ubifs_err("LPT out of space mismatch");
+	dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
+	        "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
+	dbg_dump_lpt_info(c);
+	return err;
 }
 
 /**
@@ -1044,6 +1089,8 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
 	int pos = 0, node_type, node_len;
 	uint16_t crc, calc_crc;
 
+	if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8)
+		return 0;
 	node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
 	if (node_type == UBIFS_LPT_NOT_A_NODE)
 		return 0;
@@ -1156,6 +1203,9 @@ int ubifs_lpt_start_commit(struct ubifs_info *c)
 	dbg_lp("");
 
 	mutex_lock(&c->lp_mutex);
+	err = dbg_chk_lpt_free_spc(c);
+	if (err)
+		goto out;
 	err = dbg_check_ltab(c);
 	if (err)
 		goto out;
@@ -1645,4 +1695,121 @@ int dbg_check_ltab(struct ubifs_info *c)
 	return 0;
 }
 
+/**
+ * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_chk_lpt_free_spc(struct ubifs_info *c)
+{
+	long long free = 0;
+	int i;
+
+	for (i = 0; i < c->lpt_lebs; i++) {
+		if (c->ltab[i].tgc || c->ltab[i].cmt)
+			continue;
+		if (i + c->lpt_first == c->nhead_lnum)
+			free += c->leb_size - c->nhead_offs;
+		else if (c->ltab[i].free == c->leb_size)
+			free += c->leb_size;
+	}
+	if (free < c->lpt_sz) {
+		dbg_err("LPT space error: free %lld lpt_sz %lld",
+			free, c->lpt_sz);
+		dbg_dump_lpt_info(c);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * dbg_chk_lpt_sz - check LPT does not write more than LPT size.
+ * @c: the UBIFS file-system description object
+ * @action: action
+ * @len: length written
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
+{
+	long long chk_lpt_sz, lpt_sz;
+	int err = 0;
+
+	switch (action) {
+	case 0:
+		c->chk_lpt_sz = 0;
+		c->chk_lpt_sz2 = 0;
+		c->chk_lpt_lebs = 0;
+		c->chk_lpt_wastage = 0;
+		if (c->dirty_pn_cnt > c->pnode_cnt) {
+			dbg_err("dirty pnodes %d exceed max %d",
+				c->dirty_pn_cnt, c->pnode_cnt);
+			err = -EINVAL;
+		}
+		if (c->dirty_nn_cnt > c->nnode_cnt) {
+			dbg_err("dirty nnodes %d exceed max %d",
+				c->dirty_nn_cnt, c->nnode_cnt);
+			err = -EINVAL;
+		}
+		return err;
+	case 1:
+		c->chk_lpt_sz += len;
+		return 0;
+	case 2:
+		c->chk_lpt_sz += len;
+		c->chk_lpt_wastage += len;
+		c->chk_lpt_lebs += 1;
+		return 0;
+	case 3:
+		chk_lpt_sz = c->leb_size;
+		chk_lpt_sz *= c->chk_lpt_lebs;
+		chk_lpt_sz += len - c->nhead_offs;
+		if (c->chk_lpt_sz != chk_lpt_sz) {
+			dbg_err("LPT wrote %lld but space used was %lld",
+				c->chk_lpt_sz, chk_lpt_sz);
+			err = -EINVAL;
+		}
+		if (c->chk_lpt_sz > c->lpt_sz) {
+			dbg_err("LPT wrote %lld but lpt_sz is %lld",
+				c->chk_lpt_sz, c->lpt_sz);
+			err = -EINVAL;
+		}
+		if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) {
+			dbg_err("LPT layout size %lld but wrote %lld",
+				c->chk_lpt_sz, c->chk_lpt_sz2);
+			err = -EINVAL;
+		}
+		if (c->chk_lpt_sz2 && c->new_nhead_offs != len) {
+			dbg_err("LPT new nhead offs: expected %d was %d",
+				c->new_nhead_offs, len);
+			err = -EINVAL;
+		}
+		lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
+		lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
+		lpt_sz += c->ltab_sz;
+		if (c->big_lpt)
+			lpt_sz += c->lsave_sz;
+		if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) {
+			dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
+				c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz);
+			err = -EINVAL;
+		}
+		if (err)
+			dbg_dump_lpt_info(c);
+		c->chk_lpt_sz2 = c->chk_lpt_sz;
+		c->chk_lpt_sz = 0;
+		c->chk_lpt_wastage = 0;
+		c->chk_lpt_lebs = 0;
+		c->new_nhead_offs = len;
+		return err;
+	case 4:
+		c->chk_lpt_sz += len;
+		c->chk_lpt_wastage += len;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 #endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4c12a9215d7..4fa81d867e4 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -310,4 +310,31 @@ static inline int ubifs_tnc_lookup(struct ubifs_info *c,
 	return ubifs_tnc_locate(c, key, node, NULL, NULL);
 }
 
+/**
+ * ubifs_get_lprops - get reference to LEB properties.
+ * @c: the UBIFS file-system description object
+ *
+ * This function locks lprops. Lprops have to be unlocked by
+ * 'ubifs_release_lprops()'.
+ */
+static inline void ubifs_get_lprops(struct ubifs_info *c)
+{
+	mutex_lock(&c->lp_mutex);
+}
+
+/**
+ * ubifs_release_lprops - release lprops lock.
+ * @c: the UBIFS file-system description object
+ *
+ * This function has to be called after each 'ubifs_get_lprops()' call to
+ * unlock lprops.
+ */
+static inline void ubifs_release_lprops(struct ubifs_info *c)
+{
+	ubifs_assert(mutex_is_locked(&c->lp_mutex));
+	ubifs_assert(c->lst.empty_lebs >= 0 &&
+		     c->lst.empty_lebs <= c->main_lebs);
+	mutex_unlock(&c->lp_mutex);
+}
+
 #endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index acf5c5fffc6..0ed82479b44 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -87,7 +87,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
 
 	dbg_scan("scanning %s", dbg_ntype(ch->node_type));
 
-	if (ubifs_check_node(c, buf, lnum, offs, quiet))
+	if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
 		return SCANNED_A_CORRUPT_NODE;
 
 	if (ch->node_type == UBIFS_PAD_NODE) {
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a9220333b3..8780efbf40a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -401,6 +401,16 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
 	else if (c->mount_opts.unmount_mode == 1)
 		seq_printf(s, ",norm_unmount");
 
+	if (c->mount_opts.bulk_read == 2)
+		seq_printf(s, ",bulk_read");
+	else if (c->mount_opts.bulk_read == 1)
+		seq_printf(s, ",no_bulk_read");
+
+	if (c->mount_opts.chk_data_crc == 2)
+		seq_printf(s, ",chk_data_crc");
+	else if (c->mount_opts.chk_data_crc == 1)
+		seq_printf(s, ",no_chk_data_crc");
+
 	return 0;
 }
 
@@ -408,13 +418,26 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 {
 	struct ubifs_info *c = sb->s_fs_info;
 	int i, ret = 0, err;
+	long long bud_bytes;
 
-	if (c->jheads)
+	if (c->jheads) {
 		for (i = 0; i < c->jhead_cnt; i++) {
 			err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
 			if (err && !ret)
 				ret = err;
 		}
+
+		/* Commit the journal unless it has too little data */
+		spin_lock(&c->buds_lock);
+		bud_bytes = c->bud_bytes;
+		spin_unlock(&c->buds_lock);
+		if (bud_bytes > c->leb_size) {
+			err = ubifs_run_commit(c);
+			if (err)
+				return err;
+		}
+	}
+
 	/*
 	 * We ought to call sync for c->ubi but it does not have one. If it had
 	 * it would in turn call mtd->sync, however mtd operations are
@@ -538,6 +561,18 @@ static int init_constants_early(struct ubifs_info *c)
 	 * calculations when reporting free space.
 	 */
 	c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
+	/* Buffer size for bulk-reads */
+	c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
+	if (c->bulk_read_buf_size > c->leb_size)
+		c->bulk_read_buf_size = c->leb_size;
+	if (c->bulk_read_buf_size > 128 * 1024) {
+		/* Check if we can kmalloc more than 128KiB */
+		void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL);
+
+		kfree(try);
+		if (!try)
+			c->bulk_read_buf_size = 128 * 1024;
+	}
 	return 0;
 }
 
@@ -840,17 +875,29 @@ static int check_volume_empty(struct ubifs_info *c)
  *
  * Opt_fast_unmount: do not run a journal commit before un-mounting
  * Opt_norm_unmount: run a journal commit before un-mounting
+ * Opt_bulk_read: enable bulk-reads
+ * Opt_no_bulk_read: disable bulk-reads
+ * Opt_chk_data_crc: check CRCs when reading data nodes
+ * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
  * Opt_err: just end of array marker
  */
 enum {
 	Opt_fast_unmount,
 	Opt_norm_unmount,
+	Opt_bulk_read,
+	Opt_no_bulk_read,
+	Opt_chk_data_crc,
+	Opt_no_chk_data_crc,
 	Opt_err,
 };
 
 static const match_table_t tokens = {
 	{Opt_fast_unmount, "fast_unmount"},
 	{Opt_norm_unmount, "norm_unmount"},
+	{Opt_bulk_read, "bulk_read"},
+	{Opt_no_bulk_read, "no_bulk_read"},
+	{Opt_chk_data_crc, "chk_data_crc"},
+	{Opt_no_chk_data_crc, "no_chk_data_crc"},
 	{Opt_err, NULL},
 };
 
@@ -888,6 +935,22 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
 			c->mount_opts.unmount_mode = 1;
 			c->fast_unmount = 0;
 			break;
+		case Opt_bulk_read:
+			c->mount_opts.bulk_read = 2;
+			c->bulk_read = 1;
+			break;
+		case Opt_no_bulk_read:
+			c->mount_opts.bulk_read = 1;
+			c->bulk_read = 0;
+			break;
+		case Opt_chk_data_crc:
+			c->mount_opts.chk_data_crc = 2;
+			c->no_chk_data_crc = 0;
+			break;
+		case Opt_no_chk_data_crc:
+			c->mount_opts.chk_data_crc = 1;
+			c->no_chk_data_crc = 1;
+			break;
 		default:
 			ubifs_err("unrecognized mount option \"%s\" "
 				  "or missing value", p);
@@ -996,6 +1059,8 @@ static int mount_ubifs(struct ubifs_info *c)
 			goto out_free;
 	}
 
+	c->always_chk_crc = 1;
+
 	err = ubifs_read_superblock(c);
 	if (err)
 		goto out_free;
@@ -1032,8 +1097,6 @@ static int mount_ubifs(struct ubifs_info *c)
 
 		/* Create background thread */
 		c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
-		if (!c->bgt)
-			c->bgt = ERR_PTR(-EINVAL);
 		if (IS_ERR(c->bgt)) {
 			err = PTR_ERR(c->bgt);
 			c->bgt = NULL;
@@ -1139,24 +1202,28 @@ static int mount_ubifs(struct ubifs_info *c)
 	if (err)
 		goto out_infos;
 
+	c->always_chk_crc = 0;
+
 	ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
 		  c->vi.ubi_num, c->vi.vol_id, c->vi.name);
 	if (mounted_read_only)
 		ubifs_msg("mounted read-only");
 	x = (long long)c->main_lebs * c->leb_size;
-	ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
-		  x, x >> 10, x >> 20, c->main_lebs);
+	ubifs_msg("file system size:   %lld bytes (%lld KiB, %lld MiB, %d "
+		  "LEBs)", x, x >> 10, x >> 20, c->main_lebs);
 	x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
-	ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
-		  x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
-	ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
-	ubifs_msg("media format %d, latest format %d",
+	ubifs_msg("journal size:       %lld bytes (%lld KiB, %lld MiB, %d "
+		  "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
+	ubifs_msg("media format:       %d (latest is %d)",
 		  c->fmt_version, UBIFS_FORMAT_VERSION);
+	ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
+	ubifs_msg("reserved for root:  %llu bytes (%llu KiB)",
+		c->report_rp_size, c->report_rp_size >> 10);
 
 	dbg_msg("compiled on:         " __DATE__ " at " __TIME__);
 	dbg_msg("min. I/O unit size:  %d bytes", c->min_io_size);
 	dbg_msg("LEB size:            %d bytes (%d KiB)",
-		c->leb_size, c->leb_size / 1024);
+		c->leb_size, c->leb_size >> 10);
 	dbg_msg("data journal heads:  %d",
 		c->jhead_cnt - NONDATA_JHEADS_CNT);
 	dbg_msg("UUID:                %02X%02X%02X%02X-%02X%02X"
@@ -1282,6 +1349,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 
 	mutex_lock(&c->umount_mutex);
 	c->remounting_rw = 1;
+	c->always_chk_crc = 1;
 
 	/* Check for enough free space */
 	if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
@@ -1345,20 +1413,20 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 
 	/* Create background thread */
 	c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
-	if (!c->bgt)
-		c->bgt = ERR_PTR(-EINVAL);
 	if (IS_ERR(c->bgt)) {
 		err = PTR_ERR(c->bgt);
 		c->bgt = NULL;
 		ubifs_err("cannot spawn \"%s\", error %d",
 			  c->bgt_name, err);
-		return err;
+		goto out;
 	}
 	wake_up_process(c->bgt);
 
 	c->orph_buf = vmalloc(c->leb_size);
-	if (!c->orph_buf)
-		return -ENOMEM;
+	if (!c->orph_buf) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	/* Check for enough log space */
 	lnum = c->lhead_lnum + 1;
@@ -1385,6 +1453,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 	dbg_gen("re-mounted read-write");
 	c->vfs_sb->s_flags &= ~MS_RDONLY;
 	c->remounting_rw = 0;
+	c->always_chk_crc = 0;
 	mutex_unlock(&c->umount_mutex);
 	return 0;
 
@@ -1400,6 +1469,7 @@ out:
 	c->ileb_buf = NULL;
 	ubifs_lpt_free(c, 1);
 	c->remounting_rw = 0;
+	c->always_chk_crc = 0;
 	mutex_unlock(&c->umount_mutex);
 	return err;
 }
@@ -1408,12 +1478,9 @@ out:
  * commit_on_unmount - commit the journal when un-mounting.
  * @c: UBIFS file-system description object
  *
- * This function is called during un-mounting and it commits the journal unless
- * the "fast unmount" mode is enabled. It also avoids committing the journal if
- * it contains too few data.
- *
- * Sometimes recovery requires the journal to be committed at least once, and
- * this function takes care about this.
+ * This function is called during un-mounting and re-mounting, and it commits
+ * the journal unless the "fast unmount" mode is enabled. It also avoids
+ * committing the journal if it contains too few data.
  */
 static void commit_on_unmount(struct ubifs_info *c)
 {
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 7634c597088..d27fd918b9c 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -284,7 +284,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
 	}
 
 	zn = copy_znode(c, znode);
-	if (unlikely(IS_ERR(zn)))
+	if (IS_ERR(zn))
 		return zn;
 
 	if (zbr->len) {
@@ -470,6 +470,10 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
 	if (node_len != len)
 		return 0;
 
+	if (type == UBIFS_DATA_NODE && !c->always_chk_crc)
+		if (c->no_chk_data_crc)
+			return 0;
+
 	crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
 	node_crc = le32_to_cpu(ch->crc);
 	if (crc != node_crc)
@@ -1128,7 +1132,7 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
 			ubifs_assert(znode == c->zroot.znode);
 			znode = dirty_cow_znode(c, &c->zroot);
 		}
-		if (unlikely(IS_ERR(znode)) || !p)
+		if (IS_ERR(znode) || !p)
 			break;
 		ubifs_assert(path[p - 1] >= 0);
 		ubifs_assert(path[p - 1] < znode->child_cnt);
@@ -1492,6 +1496,289 @@ out:
 }
 
 /**
+ * ubifs_tnc_get_bu_keys - lookup keys for bulk-read.
+ * @c: UBIFS file-system description object
+ * @bu: bulk-read parameters and results
+ *
+ * Lookup consecutive data node keys for the same inode that reside
+ * consecutively in the same LEB.
+ */
+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
+{
+	int n, err = 0, lnum = -1, uninitialized_var(offs);
+	int uninitialized_var(len);
+	unsigned int block = key_block(c, &bu->key);
+	struct ubifs_znode *znode;
+
+	bu->cnt = 0;
+	bu->blk_cnt = 0;
+	bu->eof = 0;
+
+	mutex_lock(&c->tnc_mutex);
+	/* Find first key */
+	err = ubifs_lookup_level0(c, &bu->key, &znode, &n);
+	if (err < 0)
+		goto out;
+	if (err) {
+		/* Key found */
+		len = znode->zbranch[n].len;
+		/* The buffer must be big enough for at least 1 node */
+		if (len > bu->buf_len) {
+			err = -EINVAL;
+			goto out;
+		}
+		/* Add this key */
+		bu->zbranch[bu->cnt++] = znode->zbranch[n];
+		bu->blk_cnt += 1;
+		lnum = znode->zbranch[n].lnum;
+		offs = ALIGN(znode->zbranch[n].offs + len, 8);
+	}
+	while (1) {
+		struct ubifs_zbranch *zbr;
+		union ubifs_key *key;
+		unsigned int next_block;
+
+		/* Find next key */
+		err = tnc_next(c, &znode, &n);
+		if (err)
+			goto out;
+		zbr = &znode->zbranch[n];
+		key = &zbr->key;
+		/* See if there is another data key for this file */
+		if (key_inum(c, key) != key_inum(c, &bu->key) ||
+		    key_type(c, key) != UBIFS_DATA_KEY) {
+			err = -ENOENT;
+			goto out;
+		}
+		if (lnum < 0) {
+			/* First key found */
+			lnum = zbr->lnum;
+			offs = ALIGN(zbr->offs + zbr->len, 8);
+			len = zbr->len;
+			if (len > bu->buf_len) {
+				err = -EINVAL;
+				goto out;
+			}
+		} else {
+			/*
+			 * The data nodes must be in consecutive positions in
+			 * the same LEB.
+			 */
+			if (zbr->lnum != lnum || zbr->offs != offs)
+				goto out;
+			offs += ALIGN(zbr->len, 8);
+			len = ALIGN(len, 8) + zbr->len;
+			/* Must not exceed buffer length */
+			if (len > bu->buf_len)
+				goto out;
+		}
+		/* Allow for holes */
+		next_block = key_block(c, key);
+		bu->blk_cnt += (next_block - block - 1);
+		if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+			goto out;
+		block = next_block;
+		/* Add this key */
+		bu->zbranch[bu->cnt++] = *zbr;
+		bu->blk_cnt += 1;
+		/* See if we have room for more */
+		if (bu->cnt >= UBIFS_MAX_BULK_READ)
+			goto out;
+		if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+			goto out;
+	}
+out:
+	if (err == -ENOENT) {
+		bu->eof = 1;
+		err = 0;
+	}
+	bu->gc_seq = c->gc_seq;
+	mutex_unlock(&c->tnc_mutex);
+	if (err)
+		return err;
+	/*
+	 * An enormous hole could cause bulk-read to encompass too many
+	 * page cache pages, so limit the number here.
+	 */
+	if (bu->blk_cnt > UBIFS_MAX_BULK_READ)
+		bu->blk_cnt = UBIFS_MAX_BULK_READ;
+	/*
+	 * Ensure that bulk-read covers a whole number of page cache
+	 * pages.
+	 */
+	if (UBIFS_BLOCKS_PER_PAGE == 1 ||
+	    !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1)))
+		return 0;
+	if (bu->eof) {
+		/* At the end of file we can round up */
+		bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1;
+		return 0;
+	}
+	/* Exclude data nodes that do not make up a whole page cache page */
+	block = key_block(c, &bu->key) + bu->blk_cnt;
+	block &= ~(UBIFS_BLOCKS_PER_PAGE - 1);
+	while (bu->cnt) {
+		if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block)
+			break;
+		bu->cnt -= 1;
+	}
+	return 0;
+}
+
+/**
+ * read_wbuf - bulk-read from a LEB with a wbuf.
+ * @wbuf: wbuf that may overlap the read
+ * @buf: buffer into which to read
+ * @len: read length
+ * @lnum: LEB number from which to read
+ * @offs: offset from which to read
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
+		     int offs)
+{
+	const struct ubifs_info *c = wbuf->c;
+	int rlen, overlap;
+
+	dbg_io("LEB %d:%d, length %d", lnum, offs, len);
+	ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+	ubifs_assert(!(offs & 7) && offs < c->leb_size);
+	ubifs_assert(offs + len <= c->leb_size);
+
+	spin_lock(&wbuf->lock);
+	overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
+	if (!overlap) {
+		/* We may safely unlock the write-buffer and read the data */
+		spin_unlock(&wbuf->lock);
+		return ubi_read(c->ubi, lnum, buf, offs, len);
+	}
+
+	/* Don't read under wbuf */
+	rlen = wbuf->offs - offs;
+	if (rlen < 0)
+		rlen = 0;
+
+	/* Copy the rest from the write-buffer */
+	memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
+	spin_unlock(&wbuf->lock);
+
+	if (rlen > 0)
+		/* Read everything that goes before write-buffer */
+		return ubi_read(c->ubi, lnum, buf, offs, rlen);
+
+	return 0;
+}
+
+/**
+ * validate_data_node - validate data nodes for bulk-read.
+ * @c: UBIFS file-system description object
+ * @buf: buffer containing data node to validate
+ * @zbr: zbranch of data node to validate
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+static int validate_data_node(struct ubifs_info *c, void *buf,
+			      struct ubifs_zbranch *zbr)
+{
+	union ubifs_key key1;
+	struct ubifs_ch *ch = buf;
+	int err, len;
+
+	if (ch->node_type != UBIFS_DATA_NODE) {
+		ubifs_err("bad node type (%d but expected %d)",
+			  ch->node_type, UBIFS_DATA_NODE);
+		goto out_err;
+	}
+
+	err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
+	if (err) {
+		ubifs_err("expected node type %d", UBIFS_DATA_NODE);
+		goto out;
+	}
+
+	len = le32_to_cpu(ch->len);
+	if (len != zbr->len) {
+		ubifs_err("bad node length %d, expected %d", len, zbr->len);
+		goto out_err;
+	}
+
+	/* Make sure the key of the read node is correct */
+	key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
+	if (!keys_eq(c, &zbr->key, &key1)) {
+		ubifs_err("bad key in node at LEB %d:%d",
+			  zbr->lnum, zbr->offs);
+		dbg_tnc("looked for key %s found node's key %s",
+			DBGKEY(&zbr->key), DBGKEY1(&key1));
+		goto out_err;
+	}
+
+	return 0;
+
+out_err:
+	err = -EINVAL;
+out:
+	ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs);
+	dbg_dump_node(c, buf);
+	dbg_dump_stack();
+	return err;
+}
+
+/**
+ * ubifs_tnc_bulk_read - read a number of data nodes in one go.
+ * @c: UBIFS file-system description object
+ * @bu: bulk-read parameters and results
+ *
+ * This functions reads and validates the data nodes that were identified by the
+ * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success,
+ * -EAGAIN to indicate a race with GC, or another negative error code on
+ * failure.
+ */
+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
+{
+	int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i;
+	struct ubifs_wbuf *wbuf;
+	void *buf;
+
+	len = bu->zbranch[bu->cnt - 1].offs;
+	len += bu->zbranch[bu->cnt - 1].len - offs;
+	if (len > bu->buf_len) {
+		ubifs_err("buffer too small %d vs %d", bu->buf_len, len);
+		return -EINVAL;
+	}
+
+	/* Do the read */
+	wbuf = ubifs_get_wbuf(c, lnum);
+	if (wbuf)
+		err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
+	else
+		err = ubi_read(c->ubi, lnum, bu->buf, offs, len);
+
+	/* Check for a race with GC */
+	if (maybe_leb_gced(c, lnum, bu->gc_seq))
+		return -EAGAIN;
+
+	if (err && err != -EBADMSG) {
+		ubifs_err("failed to read from LEB %d:%d, error %d",
+			  lnum, offs, err);
+		dbg_dump_stack();
+		dbg_tnc("key %s", DBGKEY(&bu->key));
+		return err;
+	}
+
+	/* Validate the nodes read */
+	buf = bu->buf;
+	for (i = 0; i < bu->cnt; i++) {
+		err = validate_data_node(c, buf, &bu->zbranch[i]);
+		if (err)
+			return err;
+		buf = buf + ALIGN(bu->zbranch[i].len, 8);
+	}
+
+	return 0;
+}
+
+/**
  * do_lookup_nm- look up a "hashed" node.
  * @c: UBIFS file-system description object
  * @key: node key to lookup
@@ -1675,7 +1962,7 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
 {
 	struct ubifs_znode *zn, *zi, *zp;
 	int i, keep, move, appending = 0;
-	union ubifs_key *key = &zbr->key;
+	union ubifs_key *key = &zbr->key, *key1;
 
 	ubifs_assert(n >= 0 && n <= c->fanout);
 
@@ -1716,20 +2003,33 @@ again:
 	zn->level = znode->level;
 
 	/* Decide where to split */
-	if (znode->level == 0 && n == c->fanout &&
-	    key_type(c, key) == UBIFS_DATA_KEY) {
-		union ubifs_key *key1;
-
-		/*
-		 * If this is an inode which is being appended - do not split
-		 * it because no other zbranches can be inserted between
-		 * zbranches of consecutive data nodes anyway.
-		 */
-		key1 = &znode->zbranch[n - 1].key;
-		if (key_inum(c, key1) == key_inum(c, key) &&
-		    key_type(c, key1) == UBIFS_DATA_KEY &&
-		    key_block(c, key1) == key_block(c, key) - 1)
-			appending = 1;
+	if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) {
+		/* Try not to split consecutive data keys */
+		if (n == c->fanout) {
+			key1 = &znode->zbranch[n - 1].key;
+			if (key_inum(c, key1) == key_inum(c, key) &&
+			    key_type(c, key1) == UBIFS_DATA_KEY)
+				appending = 1;
+		} else
+			goto check_split;
+	} else if (appending && n != c->fanout) {
+		/* Try not to split consecutive data keys */
+		appending = 0;
+check_split:
+		if (n >= (c->fanout + 1) / 2) {
+			key1 = &znode->zbranch[0].key;
+			if (key_inum(c, key1) == key_inum(c, key) &&
+			    key_type(c, key1) == UBIFS_DATA_KEY) {
+				key1 = &znode->zbranch[n].key;
+				if (key_inum(c, key1) != key_inum(c, key) ||
+				    key_type(c, key1) != UBIFS_DATA_KEY) {
+					keep = n;
+					move = c->fanout - keep;
+					zi = znode;
+					goto do_split;
+				}
+			}
+		}
 	}
 
 	if (appending) {
@@ -1759,6 +2059,8 @@ again:
 			zbr->znode->parent = zn;
 	}
 
+do_split:
+
 	__set_bit(DIRTY_ZNODE, &zn->flags);
 	atomic_long_inc(&c->dirty_zn_cnt);
 
@@ -1785,14 +2087,11 @@ again:
 
 	/* Insert new znode (produced by spitting) into the parent */
 	if (zp) {
-		i = n;
+		if (n == 0 && zi == znode && znode->iip == 0)
+			correct_parent_keys(c, znode);
+
 		/* Locate insertion point */
 		n = znode->iip + 1;
-		if (appending && n != c->fanout)
-			appending = 0;
-
-		if (i == 0 && zi == znode && znode->iip == 0)
-			correct_parent_keys(c, znode);
 
 		/* Tail recursion */
 		zbr->key = zn->zbranch[0].key;
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index a25c1cc1f8d..b48db999903 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -480,8 +480,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 	}
 
 	/* Make sure the key of the read node is correct */
-	key_read(c, key, &key1);
-	if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) {
+	key_read(c, node + UBIFS_KEY_OFFSET, &key1);
+	if (!keys_eq(c, key, &key1)) {
 		ubifs_err("bad key in node at LEB %d:%d",
 			  zbr->lnum, zbr->offs);
 		dbg_tnc("looked for key %s found node's key %s",
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index a9ecbd9af20..0b378042a3a 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -75,7 +75,6 @@
  */
 #define UBIFS_BLOCK_SIZE  4096
 #define UBIFS_BLOCK_SHIFT 12
-#define UBIFS_BLOCK_MASK  0x00000FFF
 
 /* UBIFS padding byte pattern (must not be first or last byte of node magic) */
 #define UBIFS_PADDING_BYTE 0xCE
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 17c620b93ee..a7bd32fa15b 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -142,6 +142,9 @@
 /* Maximum expected tree height for use by bottom_up_buf */
 #define BOTTOM_UP_HEIGHT 64
 
+/* Maximum number of data nodes to bulk-read */
+#define UBIFS_MAX_BULK_READ 32
+
 /*
  * Lockdep classes for UBIFS inode @ui_mutex.
  */
@@ -328,9 +331,10 @@ struct ubifs_gced_idx_leb {
  *               this inode
  * @dirty: non-zero if the inode is dirty
  * @xattr: non-zero if this is an extended attribute inode
+ * @bulk_read: non-zero if bulk-read should be used
  * @ui_mutex: serializes inode write-back with the rest of VFS operations,
- *            serializes "clean <-> dirty" state changes, protects @dirty,
- *            @ui_size, and @xattr_size
+ *            serializes "clean <-> dirty" state changes, serializes bulk-read,
+ *            protects @dirty, @bulk_read, @ui_size, and @xattr_size
  * @ui_lock: protects @synced_i_size
  * @synced_i_size: synchronized size of inode, i.e. the value of inode size
  *                 currently stored on the flash; used only for regular file
@@ -338,6 +342,8 @@ struct ubifs_gced_idx_leb {
  * @ui_size: inode size used by UBIFS when writing to flash
  * @flags: inode flags (@UBIFS_COMPR_FL, etc)
  * @compr_type: default compression type used for this inode
+ * @last_page_read: page number of last page read (for bulk read)
+ * @read_in_a_row: number of consecutive pages read in a row (for bulk read)
  * @data_len: length of the data attached to the inode
  * @data: inode's data
  *
@@ -379,12 +385,15 @@ struct ubifs_inode {
 	unsigned int xattr_names;
 	unsigned int dirty:1;
 	unsigned int xattr:1;
+	unsigned int bulk_read:1;
 	struct mutex ui_mutex;
 	spinlock_t ui_lock;
 	loff_t synced_i_size;
 	loff_t ui_size;
 	int flags;
 	int compr_type;
+	pgoff_t last_page_read;
+	pgoff_t read_in_a_row;
 	int data_len;
 	void *data;
 };
@@ -698,8 +707,8 @@ struct ubifs_jhead {
  * struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
  * @key: key
  * @znode: znode address in memory
- * @lnum: LEB number of the indexing node
- * @offs: offset of the indexing node within @lnum
+ * @lnum: LEB number of the target node (indexing node or data node)
+ * @offs: target node offset within @lnum
  * @len: target node length
  */
 struct ubifs_zbranch {
@@ -744,6 +753,28 @@ struct ubifs_znode {
 };
 
 /**
+ * struct bu_info - bulk-read information
+ * @key: first data node key
+ * @zbranch: zbranches of data nodes to bulk read
+ * @buf: buffer to read into
+ * @buf_len: buffer length
+ * @gc_seq: GC sequence number to detect races with GC
+ * @cnt: number of data nodes for bulk read
+ * @blk_cnt: number of data blocks including holes
+ * @oef: end of file reached
+ */
+struct bu_info {
+	union ubifs_key key;
+	struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ];
+	void *buf;
+	int buf_len;
+	int gc_seq;
+	int cnt;
+	int blk_cnt;
+	int eof;
+};
+
+/**
  * struct ubifs_node_range - node length range description data structure.
  * @len: fixed node length
  * @min_len: minimum possible node length
@@ -862,9 +893,13 @@ struct ubifs_orphan {
 /**
  * struct ubifs_mount_opts - UBIFS-specific mount options information.
  * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
+ * @bulk_read: enable bulk-reads
+ * @chk_data_crc: check CRCs when reading data nodes
  */
 struct ubifs_mount_opts {
 	unsigned int unmount_mode:2;
+	unsigned int bulk_read:2;
+	unsigned int chk_data_crc:2;
 };
 
 /**
@@ -905,13 +940,12 @@ struct ubifs_mount_opts {
  * @cmt_state: commit state
  * @cs_lock: commit state lock
  * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
+ *
  * @fast_unmount: do not run journal commit before un-mounting
  * @big_lpt: flag that LPT is too big to write whole during commit
- * @check_lpt_free: flag that indicates LPT GC may be needed
- * @nospace: non-zero if the file-system does not have flash space (used as
- *           optimization)
- * @nospace_rp: the same as @nospace, but additionally means that even reserved
- *              pool is full
+ * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
+ *                   recovery)
+ * @bulk_read: enable bulk-reads
  *
  * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
  *             @calc_idx_sz
@@ -935,6 +969,7 @@ struct ubifs_mount_opts {
  * @mst_node: master node
  * @mst_offs: offset of valid master node
  * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
+ * @bulk_read_buf_size: buffer size for bulk-reads
  *
  * @log_lebs: number of logical eraseblocks in the log
  * @log_bytes: log size in bytes
@@ -977,12 +1012,17 @@ struct ubifs_mount_opts {
  *                        but which still have to be taken into account because
  *                        the index has not been committed so far
  * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
- *              @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst;
+ *              @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
+ *              @nospace, and @nospace_rp;
  * @min_idx_lebs: minimum number of LEBs required for the index
  * @old_idx_sz: size of index on flash
  * @calc_idx_sz: temporary variable which is used to calculate new index size
  *               (contains accurate new index size at end of TNC commit start)
  * @lst: lprops statistics
+ * @nospace: non-zero if the file-system does not have flash space (used as
+ *           optimization)
+ * @nospace_rp: the same as @nospace, but additionally means that even reserved
+ *              pool is full
  *
  * @page_budget: budget for a page
  * @inode_budget: budget for an inode
@@ -1061,6 +1101,7 @@ struct ubifs_mount_opts {
  * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
  * @dirty_nn_cnt: number of dirty nnodes
  * @dirty_pn_cnt: number of dirty pnodes
+ * @check_lpt_free: flag that indicates LPT GC may be needed
  * @lpt_sz: LPT size
  * @lpt_nod_buf: buffer for an on-flash nnode or pnode
  * @lpt_buf: buffer of LEB size used by LPT
@@ -1102,6 +1143,7 @@ struct ubifs_mount_opts {
  * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
  * @size_tree: inode size information for recovery
  * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
+ * @always_chk_crc: always check CRCs (while mounting and remounting rw)
  * @mount_opts: UBIFS-specific mount options
  *
  * @dbg_buf: a buffer of LEB size used for debugging purposes
@@ -1146,11 +1188,11 @@ struct ubifs_info {
 	int cmt_state;
 	spinlock_t cs_lock;
 	wait_queue_head_t cmt_wq;
+
 	unsigned int fast_unmount:1;
 	unsigned int big_lpt:1;
-	unsigned int check_lpt_free:1;
-	unsigned int nospace:1;
-	unsigned int nospace_rp:1;
+	unsigned int no_chk_data_crc:1;
+	unsigned int bulk_read:1;
 
 	struct mutex tnc_mutex;
 	struct ubifs_zbranch zroot;
@@ -1175,6 +1217,7 @@ struct ubifs_info {
 	struct ubifs_mst_node *mst_node;
 	int mst_offs;
 	struct mutex mst_mutex;
+	int bulk_read_buf_size;
 
 	int log_lebs;
 	long long log_bytes;
@@ -1218,6 +1261,8 @@ struct ubifs_info {
 	unsigned long long old_idx_sz;
 	unsigned long long calc_idx_sz;
 	struct ubifs_lp_stats lst;
+	unsigned int nospace:1;
+	unsigned int nospace_rp:1;
 
 	int page_budget;
 	int inode_budget;
@@ -1294,6 +1339,7 @@ struct ubifs_info {
 	int lpt_drty_flgs;
 	int dirty_nn_cnt;
 	int dirty_pn_cnt;
+	int check_lpt_free;
 	long long lpt_sz;
 	void *lpt_nod_buf;
 	void *lpt_buf;
@@ -1335,6 +1381,7 @@ struct ubifs_info {
 	struct ubifs_mst_node *rcvrd_mst_node;
 	struct rb_root size_tree;
 	int remounting_rw;
+	int always_chk_crc;
 	struct ubifs_mount_opts mount_opts;
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
@@ -1347,6 +1394,12 @@ struct ubifs_info {
 	unsigned long fail_timeout;
 	unsigned int fail_cnt;
 	unsigned int fail_cnt_max;
+	long long chk_lpt_sz;
+	long long chk_lpt_sz2;
+	long long chk_lpt_wastage;
+	int chk_lpt_lebs;
+	int new_nhead_lnum;
+	int new_nhead_offs;
 #endif
 };
 
@@ -1377,7 +1430,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
 		     int offs, int dtype);
 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
-		     int offs, int quiet);
+		     int offs, int quiet, int chk_crc);
 void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
 void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
 int ubifs_io_init(struct ubifs_info *c);
@@ -1490,6 +1543,8 @@ void destroy_old_idx(struct ubifs_info *c);
 int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
 		       int lnum, int offs);
 int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu);
+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu);
 
 /* tnc_misc.c */
 struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
@@ -1586,12 +1641,10 @@ int ubifs_lpt_post_commit(struct ubifs_info *c);
 void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
 
 /* lprops.c */
-void ubifs_get_lprops(struct ubifs_info *c);
 const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
 					   const struct ubifs_lprops *lp,
 					   int free, int dirty, int flags,
 					   int idx_gc_cnt);
-void ubifs_release_lprops(struct ubifs_info *c);
 void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
 void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
 		      int cat);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 649bec78b64..cfd31e229c8 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -446,7 +446,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 		int type;
 
 		xent = ubifs_tnc_next_ent(c, &key, &nm);
-		if (unlikely(IS_ERR(xent))) {
+		if (IS_ERR(xent)) {
 			err = PTR_ERR(xent);
 			break;
 		}
diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h
index 7efe1000f99..cee97f14af3 100644
--- a/include/asm-cris/thread_info.h
+++ b/include/asm-cris/thread_info.h
@@ -88,6 +88,7 @@ struct thread_info {
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17
+#define TIF_FREEZE		18	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -95,6 +96,7 @@ struct thread_info {
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
diff --git a/include/asm-frv/ide.h b/include/asm-frv/ide.h
index 7ebcc56a222..36107661185 100644
--- a/include/asm-frv/ide.h
+++ b/include/asm-frv/ide.h
@@ -18,15 +18,7 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 
-/****************************************************************************/
-/*
- * some bits needed for parts of the IDE subsystem to compile
- */
-#define __ide_mm_insw(port, addr, n)	insw((unsigned long) (port), addr, n)
-#define __ide_mm_insl(port, addr, n)	insl((unsigned long) (port), addr, n)
-#define __ide_mm_outsw(port, addr, n)	outsw((unsigned long) (port), addr, n)
-#define __ide_mm_outsl(port, addr, n)	outsl((unsigned long) (port), addr, n)
-
+#include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_IDE_H */
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 0f6dabd4b51..12c07c1866b 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -41,7 +41,7 @@ extern void warn_slowpath(const char *file, const int line,
 #define __WARN() warn_on_slowpath(__FILE__, __LINE__)
 #define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
 #else
-#define __WARN_printf(arg...) __WARN()
+#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
 #endif
 
 #ifndef WARN_ON
diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index 71ef3f0b968..89061c1a67d 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -84,12 +84,12 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 	{
-		BCD_TO_BIN(time->tm_sec);
-		BCD_TO_BIN(time->tm_min);
-		BCD_TO_BIN(time->tm_hour);
-		BCD_TO_BIN(time->tm_mday);
-		BCD_TO_BIN(time->tm_mon);
-		BCD_TO_BIN(time->tm_year);
+		time->tm_sec = bcd2bin(time->tm_sec);
+		time->tm_min = bcd2bin(time->tm_min);
+		time->tm_hour = bcd2bin(time->tm_hour);
+		time->tm_mday = bcd2bin(time->tm_mday);
+		time->tm_mon = bcd2bin(time->tm_mon);
+		time->tm_year = bcd2bin(time->tm_year);
 	}
 
 #ifdef CONFIG_MACH_DECSTATION
@@ -159,12 +159,12 @@ static inline int set_rtc_time(struct rtc_time *time)
 
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
 	    || RTC_ALWAYS_BCD) {
-		BIN_TO_BCD(sec);
-		BIN_TO_BCD(min);
-		BIN_TO_BCD(hrs);
-		BIN_TO_BCD(day);
-		BIN_TO_BCD(mon);
-		BIN_TO_BCD(yrs);
+		sec = bin2bcd(sec);
+		min = bin2bcd(min);
+		hrs = bin2bcd(hrs);
+		day = bin2bcd(day);
+		mon = bin2bcd(mon);
+		yrs = bin2bcd(yrs);
 	}
 
 	save_control = CMOS_READ(RTC_CONTROL);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 74c5faf26c0..80744606bad 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -37,6 +37,13 @@
 #define MEM_DISCARD(sec) *(.mem##sec)
 #endif
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+#define MCOUNT_REC()	VMLINUX_SYMBOL(__start_mcount_loc) = .; \
+			*(__mcount_loc)				\
+			VMLINUX_SYMBOL(__stop_mcount_loc) = .;
+#else
+#define MCOUNT_REC()
+#endif
 
 /* .data section */
 #define DATA_DATA							\
@@ -52,7 +59,10 @@
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___markers) = .;				\
 	*(__markers)							\
-	VMLINUX_SYMBOL(__stop___markers) = .;
+	VMLINUX_SYMBOL(__stop___markers) = .;				\
+	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
+	*(__tracepoints)						\
+	VMLINUX_SYMBOL(__stop___tracepoints) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -61,6 +71,7 @@
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
 		*(__markers_strings)	/* Markers: strings */		\
+		*(__tracepoints_strings)/* Tracepoints: strings */	\
 	}								\
 									\
 	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
@@ -188,6 +199,7 @@
 	/* __*init sections */						\
 	__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) {		\
 		*(.ref.rodata)						\
+		MCOUNT_REC()						\
 		DEV_KEEP(init.rodata)					\
 		DEV_KEEP(exit.rodata)					\
 		CPU_KEEP(init.rodata)					\
diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h
index 1daf6cbdd9f..b996a3c8cff 100644
--- a/include/asm-m68k/ide.h
+++ b/include/asm-m68k/ide.h
@@ -92,15 +92,6 @@
 #define outsw_swapw(port, addr, n)	raw_outsw_swapw((u16 *)port, addr, n)
 #endif
 
-
-/* Q40 and Atari have byteswapped IDE busses and since many interesting
- * values in the identification string are text, chars and words they
- * happened to be almost correct without swapping.. However *_capacity
- * is needed for drives over 8 GB. RZ */
-#if defined(CONFIG_Q40) || defined(CONFIG_ATARI)
-#define M68K_IDE_SWAPW  (MACH_IS_Q40 || MACH_IS_ATARI)
-#endif
-
 #ifdef CONFIG_BLK_DEV_FALCON_IDE
 #define IDE_ARCH_LOCK
 
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index abc002798a2..af0fda46e94 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -52,5 +52,6 @@ struct thread_info {
 #define TIF_DELAYED_TRACE	14	/* single step a syscall */
 #define TIF_SYSCALL_TRACE	15	/* syscall trace active */
 #define TIF_MEMDIE		16
+#define TIF_FREEZE		17	/* thread is freezing for suspend */
 
 #endif	/* _ASM_M68K_THREAD_INFO_H */
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index e07e72846c7..62274ab9471 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -69,6 +69,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE	 	5
 #define TIF_SYSCALL_AUDIT	6
 #define TIF_RESTORE_SIGMASK	7
+#define TIF_FREEZE		16	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -77,5 +78,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 #endif
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index d76a0839abe..ef1d72dbdfe 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -40,8 +40,6 @@ extern void generic_apic_probe(void);
 extern unsigned int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
-extern int ioapic_force;
-
 extern int disable_apic;
 /*
  * Basic functions accessing APICs.
@@ -100,6 +98,20 @@ extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
+static inline int x2apic_enabled(void)
+{
+	int msr, msr2;
+
+	if (!cpu_has_x2apic)
+		return 0;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (msr & X2APIC_ENABLE)
+		return 1;
+	return 0;
+}
+#else
+#define x2apic_enabled()	0
 #endif
 
 struct apic_ops {
diff --git a/include/asm-x86/bigsmp/apic.h b/include/asm-x86/bigsmp/apic.h
index 0a9cd7c5ca0..1d9543b9d35 100644
--- a/include/asm-x86/bigsmp/apic.h
+++ b/include/asm-x86/bigsmp/apic.h
@@ -9,22 +9,17 @@ static inline int apic_id_registered(void)
 	return (1);
 }
 
-/* Round robin the irqs amoung the online cpus */
 static inline cpumask_t target_cpus(void)
 {
-	static unsigned long cpu = NR_CPUS;
-	do {
-		if (cpu >= NR_CPUS)
-			cpu = first_cpu(cpu_online_map);
-		else
-			cpu = next_cpu(cpu, cpu_online_map);
-	} while (cpu >= NR_CPUS);
-	return cpumask_of_cpu(cpu);
+#ifdef CONFIG_SMP
+        return cpu_online_map;
+#else
+        return cpumask_of_cpu(0);
+#endif
 }
 
 #undef APIC_DEST_LOGICAL
 #define APIC_DEST_LOGICAL	0
-#define TARGET_CPUS		(target_cpus())
 #define APIC_DFR_VALUE		(APIC_DFR_FLAT)
 #define INT_DELIVERY_MODE	(dest_Fixed)
 #define INT_DEST_MODE		(0)    /* phys delivery to target proc */
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index ed2de22e870..313438e6334 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -94,4 +94,17 @@ extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
+#ifndef CONFIG_EFI
+/*
+ * IF EFI is not configured, have the EFI calls return -ENOSYS.
+ */
+#define efi_call0(_f)					(-ENOSYS)
+#define efi_call1(_f, _a1)				(-ENOSYS)
+#define efi_call2(_f, _a1, _a2)				(-ENOSYS)
+#define efi_call3(_f, _a1, _a2, _a3)			(-ENOSYS)
+#define efi_call4(_f, _a1, _a2, _a3, _a4)		(-ENOSYS)
+#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5)		(-ENOSYS)
+#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6)	(-ENOSYS)
+#endif /* CONFIG_EFI */
+
 #endif /* ASM_X86__EFI_H */
diff --git a/include/asm-x86/es7000/apic.h b/include/asm-x86/es7000/apic.h
index aae50c2fb30..380f0b4f17e 100644
--- a/include/asm-x86/es7000/apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -17,7 +17,6 @@ static inline cpumask_t target_cpus(void)
 	return cpumask_of_cpu(smp_processor_id());
 #endif
 }
-#define TARGET_CPUS	(target_cpus())
 
 #if defined CONFIG_ES7000_CLUSTERED_APIC
 #define APIC_DFR_VALUE		(APIC_DFR_CLUSTER)
@@ -81,7 +80,7 @@ static inline void setup_apic_routing(void)
 	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
 	printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
 		(apic_version[apic] == 0x14) ?
-		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
+		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
 }
 
 static inline int multi_timer_check(int apic, int irq)
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index be0e004ad14..1bb6f9bbe1a 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h
@@ -7,6 +7,16 @@
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/*
+	 * call mcount is "e8 <4 byte offset>"
+	 * The addr points to the 4 byte offset and the caller of this
+	 * function wants the pointer to e8. Simply subtract one.
+	 */
+	return addr - 1;
+}
 #endif
 
 #endif /* CONFIG_FTRACE */
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 34280f02766..6fe4f81bfcf 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -57,6 +57,7 @@ struct genapic {
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
 	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	cpumask_t (*vector_allocation_domain)(int cpu);
 
 #ifdef CONFIG_SMP
 	/* ipi */
@@ -104,6 +105,7 @@ struct genapic {
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
+	APICFUNC(vector_allocation_domain)			\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
 	IPIFUNC(send_IPI_allbutself)			\
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index cbbbb6d4dd3..58b273f6ef0 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -1,6 +1,8 @@
 #ifndef ASM_X86__HPET_H
 #define ASM_X86__HPET_H
 
+#include <linux/msi.h>
+
 #ifdef CONFIG_HPET_TIMER
 
 #define HPET_MMAP_SIZE		1024
@@ -10,6 +12,11 @@
 #define HPET_CFG		0x010
 #define HPET_STATUS		0x020
 #define HPET_COUNTER		0x0f0
+
+#define HPET_Tn_CFG(n)		(0x100 + 0x20 * n)
+#define HPET_Tn_CMP(n)		(0x108 + 0x20 * n)
+#define HPET_Tn_ROUTE(n)	(0x110 + 0x20 * n)
+
 #define HPET_T0_CFG		0x100
 #define HPET_T0_CMP		0x108
 #define HPET_T0_ROUTE		0x110
@@ -65,6 +72,20 @@ extern void hpet_disable(void);
 extern unsigned long hpet_readl(unsigned long a);
 extern void force_hpet_resume(void);
 
+extern void hpet_msi_unmask(unsigned int irq);
+extern void hpet_msi_mask(unsigned int irq);
+extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
+extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
+
+#ifdef CONFIG_PCI_MSI
+extern int arch_setup_hpet_msi(unsigned int irq);
+#else
+static inline int arch_setup_hpet_msi(unsigned int irq)
+{
+	return -EINVAL;
+}
+#endif
+
 #ifdef CONFIG_HPET_EMULATE_RTC
 
 #include <linux/interrupt.h>
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 50f6e0316b5..749d042f055 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -96,13 +96,8 @@ extern asmlinkage void qic_call_function_interrupt(void);
 
 /* SMP */
 extern void smp_apic_timer_interrupt(struct pt_regs *);
-#ifdef CONFIG_X86_32
 extern void smp_spurious_interrupt(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
-#else
-extern asmlinkage void smp_spurious_interrupt(void);
-extern asmlinkage void smp_error_interrupt(void);
-#endif
 #ifdef CONFIG_X86_SMP
 extern void smp_reschedule_interrupt(struct pt_regs *);
 extern void smp_call_function_interrupt(struct pt_regs *);
@@ -115,13 +110,13 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
 #ifdef CONFIG_X86_32
-extern void (*const interrupt[NR_IRQS])(void);
-#else
+extern void (*const interrupt[NR_VECTORS])(void);
+#endif
+
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-#endif
 
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_IO_APIC
 extern void lock_vector_lock(void);
 extern void unlock_vector_lock(void);
 extern void __setup_vector_irq(int cpu);
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 8ec68a50cf1..d35cbd7aa58 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
+#include <asm/irq_vectors.h>
 
 /*
  * Intel IO-APIC support for SMP and UP systems.
@@ -87,24 +88,8 @@ struct IO_APIC_route_entry {
 		mask		:  1,	/* 0: enabled, 1: disabled */
 		__reserved_2	: 15;
 
-#ifdef CONFIG_X86_32
-	union {
-		struct {
-			__u32	__reserved_1	: 24,
-				physical_dest	:  4,
-				__reserved_2	:  4;
-		} physical;
-
-		struct {
-			__u32	__reserved_1	: 24,
-				logical_dest	:  8;
-		} logical;
-	} dest;
-#else
 	__u32	__reserved_3	: 24,
 		dest		:  8;
-#endif
-
 } __attribute__ ((packed));
 
 struct IR_IO_APIC_route_entry {
@@ -203,10 +188,17 @@ extern void restore_IO_APIC_setup(void);
 extern void reinit_intr_remapped_IO_APIC(int);
 #endif
 
+extern int probe_nr_irqs(void);
+
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
 static inline void ioapic_init_mappings(void) { }
+
+static inline int probe_nr_irqs(void)
+{
+	return NR_IRQS;
+}
 #endif
 
 #endif /* ASM_X86__IO_APIC_H */
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index c5d2d767a1f..a8d065d85f5 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -19,19 +19,14 @@
 
 /*
  * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
- * cleanup after irq migration on 64 bit.
+ * cleanup after irq migration.
  */
 #define IRQ_MOVE_CLEANUP_VECTOR	FIRST_EXTERNAL_VECTOR
 
 /*
- * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
- * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit.
+ * Vectors 0x30-0x3f are used for ISA interrupts.
  */
-#ifdef CONFIG_X86_32
-#define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR)
-#else
 #define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR + 0x10)
-#endif
 #define IRQ1_VECTOR		(IRQ0_VECTOR + 1)
 #define IRQ2_VECTOR		(IRQ0_VECTOR + 2)
 #define IRQ3_VECTOR		(IRQ0_VECTOR + 3)
@@ -96,11 +91,7 @@
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
  */
-#ifdef CONFIG_X86_32
-# define FIRST_DEVICE_VECTOR	0x31
-#else
-# define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
-#endif
+#define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
 
 #define NR_VECTORS		256
 
@@ -116,7 +107,6 @@
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
-# define NR_IRQ_VECTORS NR_IRQS
 
 #elif !defined(CONFIG_X86_VOYAGER)
 
@@ -124,23 +114,15 @@
 
 #  define NR_IRQS		224
 
-#  if (224 >= 32 * NR_CPUS)
-#   define NR_IRQ_VECTORS	NR_IRQS
-#  else
-#   define NR_IRQ_VECTORS	(32 * NR_CPUS)
-#  endif
-
 # else /* IO_APIC || PARAVIRT */
 
 #  define NR_IRQS		16
-#  define NR_IRQ_VECTORS	NR_IRQS
 
 # endif
 
 #else /* !VISWS && !VOYAGER */
 
 # define NR_IRQS		224
-# define NR_IRQ_VECTORS		NR_IRQS
 
 #endif /* VISWS */
 
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index 9283b60a1dd..6b1add8e31d 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
 #endif
 
 /*
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 2a330a41b3d..3c66f2cdaec 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -85,6 +85,20 @@ static inline int apicid_to_node(int logical_apicid)
 	return 0;
 #endif
 }
+
+static inline cpumask_t vector_allocation_domain(int cpu)
+{
+        /* Careful. Some cpus do not strictly honor the set of cpus
+         * specified in the interrupt destination when using lowest
+         * priority interrupt delivery mode.
+         *
+         * In particular there was a hyperthreading cpu observed to
+         * deliver interrupts to the wrong hyperthread when only one
+         * hyperthread was specified in the interrupt desitination.
+         */
+        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+        return domain;
+}
 #endif
 
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
@@ -138,6 +152,5 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
 static inline void enable_apic_mode(void)
 {
 }
-
 #endif /* CONFIG_X86_LOCAL_APIC */
 #endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */
diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h
deleted file mode 100644
index f7870e1a220..00000000000
--- a/include/asm-x86/mach-generic/irq_vectors_limits.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
-#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
-
-/*
- * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
- * even with uni-proc kernels, so use a big array.
- *
- * This value should be the same in both the generic and summit subarches.
- * Change one, change 'em both.
- */
-#define NR_IRQS	224
-#define NR_IRQ_VECTORS	1024
-
-#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h
index 5d010c6881d..5085b52da30 100644
--- a/include/asm-x86/mach-generic/mach_apic.h
+++ b/include/asm-x86/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
 #define check_phys_apicid_present (genapic->check_phys_apicid_present)
 #define check_apicid_used (genapic->check_apicid_used)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
 
diff --git a/include/asm-x86/numaq/apic.h b/include/asm-x86/numaq/apic.h
index a8344ba6ea1..0bf2a06b7a4 100644
--- a/include/asm-x86/numaq/apic.h
+++ b/include/asm-x86/numaq/apic.h
@@ -12,8 +12,6 @@ static inline cpumask_t target_cpus(void)
 	return CPU_MASK_ALL;
 }
 
-#define TARGET_CPUS (target_cpus())
-
 #define NO_BALANCE_IRQ (1)
 #define esr_disable (1)
 
diff --git a/include/asm-x86/summit/apic.h b/include/asm-x86/summit/apic.h
index 394b00bb5e7..9b3070f1c2a 100644
--- a/include/asm-x86/summit/apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -22,7 +22,6 @@ static inline cpumask_t target_cpus(void)
 	 */
 	return cpumask_of_cpu(0);
 }
-#define TARGET_CPUS	(target_cpus())
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
 #define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
diff --git a/include/asm-x86/summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
deleted file mode 100644
index 890ce3f5e09..00000000000
--- a/include/asm-x86/summit/irq_vectors_limits.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
-
-/*
- * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
- * even with uni-proc kernels, so use a big array.
- *
- * This value should be the same in both the generic and summit subarches.
- * Change one, change 'em both.
- */
-#define NR_IRQS	224
-#define NR_IRQ_VECTORS	1024
-
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index 7cd6d7ec130..215f1969c26 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -2,9 +2,7 @@
 #define ASM_X86__UV__BIOS_H
 
 /*
- * BIOS layer definitions.
- *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ * UV BIOS layer definitions.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -19,50 +17,78 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
 #include <linux/rtc.h>
 
-#define BIOS_FREQ_BASE			0x01000001
+/*
+ * Values for the BIOS calls.  It is passed as the first * argument in the
+ * BIOS call.  Passing any other value in the first argument will result
+ * in a BIOS_STATUS_UNIMPLEMENTED return status.
+ */
+enum uv_bios_cmd {
+	UV_BIOS_COMMON,
+	UV_BIOS_GET_SN_INFO,
+	UV_BIOS_FREQ_BASE
+};
 
+/*
+ * Status values returned from a BIOS call.
+ */
 enum {
-	BIOS_FREQ_BASE_PLATFORM = 0,
-	BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
-	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
+	BIOS_STATUS_SUCCESS		=  0,
+	BIOS_STATUS_UNIMPLEMENTED	= -ENOSYS,
+	BIOS_STATUS_EINVAL		= -EINVAL,
+	BIOS_STATUS_UNAVAIL		= -EBUSY
 };
 
-# define BIOS_CALL(result, a0, a1, a2, a3, a4, a5, a6, a7)		\
-	do {								\
-		/* XXX - the real call goes here */			\
-		result.status = BIOS_STATUS_UNIMPLEMENTED;		\
-		isrv.v0 = 0;						\
-		isrv.v1 = 0;						\
-	} while (0)
+/*
+ * The UV system table describes specific firmware
+ * capabilities available to the Linux kernel at runtime.
+ */
+struct uv_systab {
+	char signature[4];	/* must be "UVST" */
+	u32 revision;		/* distinguish different firmware revs */
+	u64 function;		/* BIOS runtime callback function ptr */
+};
 
 enum {
-	BIOS_STATUS_SUCCESS		=  0,
-	BIOS_STATUS_UNIMPLEMENTED	= -1,
-	BIOS_STATUS_EINVAL		= -2,
-	BIOS_STATUS_ERROR		= -3
+	BIOS_FREQ_BASE_PLATFORM = 0,
+	BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
+	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
 };
 
-struct uv_bios_retval {
-	/*
-	 * A zero status value indicates call completed without error.
-	 * A negative status value indicates reason of call failure.
-	 * A positive status value indicates success but an
-	 * informational value should be printed (e.g., "reboot for
-	 * change to take effect").
-	 */
-	s64 status;
-	u64 v0;
-	u64 v1;
-	u64 v2;
+union partition_info_u {
+	u64	val;
+	struct {
+		u64	hub_version	:  8,
+			partition_id	: 16,
+			coherence_id	: 16,
+			region_size	: 24;
+	};
 };
 
-extern long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-		   unsigned long *drift_info);
-extern const char *x86_bios_strerror(long status);
+/*
+ * bios calls have 6 parameters
+ */
+extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+
+extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
+extern s64 uv_bios_freq_base(u64, u64 *);
+
+extern void uv_bios_init(void);
+
+extern int uv_type;
+extern long sn_partition_id;
+extern long uv_coherency_id;
+extern long uv_region_size;
+#define partition_coherence_id()	(uv_coherency_id)
+
+extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 
 #endif /* ASM_X86__UV__BIOS_H */
diff --git a/include/asm-x86/uv/uv_irq.h b/include/asm-x86/uv/uv_irq.h
new file mode 100644
index 00000000000..8bf5f32da9c
--- /dev/null
+++ b/include/asm-x86/uv/uv_irq.h
@@ -0,0 +1,36 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ definitions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef ASM_X86__UV__UV_IRQ_H
+#define ASM_X86__UV__UV_IRQ_H
+
+/* If a generic version of this structure gets defined, eliminate this one. */
+struct uv_IO_APIC_route_entry {
+	__u64	vector		:  8,
+		delivery_mode	:  3,
+		dest_mode	:  1,
+		delivery_status	:  1,
+		polarity	:  1,
+		__reserved_1	:  1,
+		trigger		:  1,
+		mask		:  1,
+		__reserved_2	: 15,
+		dest		: 32;
+};
+
+extern struct irq_chip uv_irq_chip;
+
+extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern void arch_disable_uv_irq(int, unsigned long);
+
+extern int uv_setup_irq(char *, int, int, unsigned long);
+extern void uv_teardown_irq(unsigned int, int, unsigned long);
+
+#endif /* ASM_X86__UV__UV_IRQ_H */
diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h
index 7e4131dd546..0f4fe1faf9b 100644
--- a/include/asm-xtensa/thread_info.h
+++ b/include/asm-xtensa/thread_info.h
@@ -134,6 +134,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		5
 #define TIF_RESTORE_SIGMASK	6	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_FREEZE		17	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -142,6 +143,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_IRET		(1<<TIF_IRET)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index bf9aca548f1..e531783e5d7 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -183,6 +183,7 @@ unifdef-y += auto_fs.h
 unifdef-y += auxvec.h
 unifdef-y += binfmts.h
 unifdef-y += blktrace_api.h
+unifdef-y += byteorder.h
 unifdef-y += capability.h
 unifdef-y += capi.h
 unifdef-y += cciss_ioctl.h
@@ -340,6 +341,7 @@ unifdef-y += soundcard.h
 unifdef-y += stat.h
 unifdef-y += stddef.h
 unifdef-y += string.h
+unifdef-y += swab.h
 unifdef-y += synclink.h
 unifdef-y += sysctl.h
 unifdef-y += tcp.h
diff --git a/include/linux/aer.h b/include/linux/aer.h
index f2518141de8..f7df1eefc10 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -10,7 +10,6 @@
 #if defined(CONFIG_PCIEAER)
 /* pci-e port driver needs this function to enable aer */
 extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-extern int pci_find_aer_capability(struct pci_dev *dev);
 extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 #else
@@ -18,10 +17,6 @@ static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
 	return -EINVAL;
 }
-static inline int pci_find_aer_capability(struct pci_dev *dev)
-{
-	return 0;
-}
 static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
 {
 	return -EINVAL;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0a24d5550eb..bee52abb8a4 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
  * BDI_CAP_READ_MAP:       Can be mapped for reading
  * BDI_CAP_WRITE_MAP:      Can be mapped for writing
  * BDI_CAP_EXEC_MAP:       Can be mapped for execution
+ *
+ * BDI_CAP_SWAP_BACKED:    Count shmem/tmpfs objects as swap-backed.
  */
 #define BDI_CAP_NO_ACCT_DIRTY	0x00000001
 #define BDI_CAP_NO_WRITEBACK	0x00000002
@@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 #define BDI_CAP_WRITE_MAP	0x00000020
 #define BDI_CAP_EXEC_MAP	0x00000040
 #define BDI_CAP_NO_ACCT_WB	0x00000080
+#define BDI_CAP_SWAP_BACKED	0x00000100
 
 #define BDI_CAP_VMFLAGS \
 	(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
 				      BDI_CAP_NO_WRITEBACK));
 }
 
+static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
+{
+	return bdi->capabilities & BDI_CAP_SWAP_BACKED;
+}
+
 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 {
 	return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
 	return bdi_cap_account_dirty(mapping->backing_dev_info);
 }
 
+static inline bool mapping_cap_swap_backed(struct address_space *mapping)
+{
+	return bdi_cap_swap_backed(mapping->backing_dev_info);
+}
+
 #endif		/* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/bcd.h b/include/linux/bcd.h
index 7ac518e3c15..22ea563ba3e 100644
--- a/include/linux/bcd.h
+++ b/include/linux/bcd.h
@@ -1,12 +1,3 @@
-/* Permission is hereby granted to copy, modify and redistribute this code
- * in terms of the GNU Library General Public License, Version 2 or later,
- * at your option.
- */
-
-/* macros to translate to/from binary and binary-coded decimal (frequently
- * found in RTC chips).
- */
-
 #ifndef _BCD_H
 #define _BCD_H
 
@@ -15,11 +6,4 @@
 unsigned bcd2bin(unsigned char val) __attribute_const__;
 unsigned char bin2bcd(unsigned val) __attribute_const__;
 
-#define BCD2BIN(val)	bcd2bin(val)
-#define BIN2BCD(val)	bin2bcd(val)
-
-/* backwards compat */
-#define BCD_TO_BIN(val) ((val)=BCD2BIN(val))
-#define BIN_TO_BCD(val) ((val)=BIN2BCD(val))
-
 #endif /* _BCD_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 5379913aca5..a08c33a26ca 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -110,7 +110,6 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
 extern int bitmap_scnprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
-extern int bitmap_scnprintf_len(unsigned int nr_bits);
 extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
 			unsigned long *dst, int nbits);
 extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index eadaab44015..3ce64b90118 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -322,7 +322,7 @@ static inline void wait_on_buffer(struct buffer_head *bh)
 
 static inline int trylock_buffer(struct buffer_head *bh)
 {
-	return likely(!test_and_set_bit(BH_Lock, &bh->b_state));
+	return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state));
 }
 
 static inline void lock_buffer(struct buffer_head *bh)
diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild
index 1133d5f9d81..fbaa7f9cee3 100644
--- a/include/linux/byteorder/Kbuild
+++ b/include/linux/byteorder/Kbuild
@@ -1,3 +1,4 @@
 unifdef-y += big_endian.h
 unifdef-y += little_endian.h
 unifdef-y += swab.h
+unifdef-y += swabb.h
diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 44f95b92393..1cba3f3efe5 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
 
 #define __constant_htonl(x) ((__force __be32)(__u32)(x))
 #define __constant_ntohl(x) ((__force __u32)(__be32)(x))
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 4cc170a3176..cedc1b5a289 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
 
 #define __constant_htonl(x) ((__force __be32)___constant_swab32((x)))
 #define __constant_ntohl(x) ___constant_swab32((__force __be32)(x))
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 30934e4bfaa..8b00f6643e9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -9,12 +9,12 @@
  */
 
 #include <linux/sched.h>
-#include <linux/kref.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
 #include <linux/cgroupstats.h>
 #include <linux/prio_heap.h>
+#include <linux/rwsem.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -137,6 +137,15 @@ struct cgroup {
 	 * release_list_lock
 	 */
 	struct list_head release_list;
+
+	/* pids_mutex protects the fields below */
+	struct rw_semaphore pids_mutex;
+	/* Array of process ids in the cgroup */
+	pid_t *tasks_pids;
+	/* How many files are using the current tasks_pids array */
+	int pids_use_count;
+	/* Length of the current tasks_pids array */
+	int pids_length;
 };
 
 /* A css_set is a structure holding pointers to a set of
@@ -149,7 +158,7 @@ struct cgroup {
 struct css_set {
 
 	/* Reference count */
-	struct kref ref;
+	atomic_t refcount;
 
 	/*
 	 * List running through all cgroup groups in the same hash
@@ -394,6 +403,9 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
 
+void cgroup_mm_owner_callbacks(struct task_struct *old,
+			       struct task_struct *new);
+
 #else /* !CONFIG_CGROUPS */
 
 static inline int cgroup_init_early(void) { return 0; }
@@ -412,15 +424,9 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 	return -EINVAL;
 }
 
+static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
+					     struct task_struct *new) {}
+
 #endif /* !CONFIG_CGROUPS */
 
-#ifdef CONFIG_MM_OWNER
-extern void
-cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new);
-#else /* !CONFIG_MM_OWNER */
-static inline void
-cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
-}
-#endif /* CONFIG_MM_OWNER */
 #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index e2877454ec8..9c22396e8b5 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -48,3 +48,9 @@ SUBSYS(devices)
 #endif
 
 /* */
+
+#ifdef CONFIG_CGROUP_FREEZER
+SUBSYS(freezer)
+#endif
+
+/* */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 55e434feec9..f88d32f8ff7 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -45,7 +45,8 @@ struct clocksource;
  * @read:		returns a cycle value
  * @mask:		bitmask for two's complement
  *			subtraction of non 64 bit counters
- * @mult:		cycle to nanosecond multiplier
+ * @mult:		cycle to nanosecond multiplier (adjusted by NTP)
+ * @mult_orig:		cycle to nanosecond multiplier (unadjusted by NTP)
  * @shift:		cycle to nanosecond divisor (power of two)
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
@@ -63,6 +64,7 @@ struct clocksource {
 	cycle_t (*read)(void);
 	cycle_t mask;
 	u32 mult;
+	u32 mult_orig;
 	u32 shift;
 	unsigned long flags;
 	cycle_t (*vread)(void);
@@ -77,6 +79,7 @@ struct clocksource {
 	/* timekeeping specific data, ignore */
 	cycle_t cycle_interval;
 	u64	xtime_interval;
+	u32	raw_interval;
 	/*
 	 * Second part is written at each timer interrupt
 	 * Keep it in a different cache line to dirty no
@@ -85,6 +88,7 @@ struct clocksource {
 	cycle_t cycle_last ____cacheline_aligned_in_smp;
 	u64 xtime_nsec;
 	s64 error;
+	struct timespec raw_time;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
@@ -201,17 +205,19 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
 {
 	u64 tmp;
 
-	/* XXX - All of this could use a whole lot of optimization */
+	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = length_nsec;
 	tmp <<= c->shift;
-	tmp += c->mult/2;
-	do_div(tmp, c->mult);
+	tmp += c->mult_orig/2;
+	do_div(tmp, c->mult_orig);
 
 	c->cycle_interval = (cycle_t)tmp;
 	if (c->cycle_interval == 0)
 		c->cycle_interval = 1;
 
+	/* Go back from cycles -> shifted ns, this time use ntp adjused mult */
 	c->xtime_interval = (u64)c->cycle_interval * c->mult;
+	c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
 }
 
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8322141ee48..98115d9d04d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -44,6 +44,8 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # error Sorry, your compiler is too old/not recognized.
 #endif
 
+#define notrace __attribute__((no_instrument_function))
+
 /* Intel compiler defines __GNUC__. So we will overwrite implementations
  * coming from above header files here
  */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 025e4f57510..0acf3b737e2 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -8,12 +8,9 @@
 #include <linux/proc_fs.h>
 
 #define ELFCORE_ADDR_MAX	(-1ULL)
+#define ELFCORE_ADDR_ERR	(-2ULL)
 
-#ifdef CONFIG_PROC_VMCORE
 extern unsigned long long elfcorehdr_addr;
-#else
-static const unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-#endif
 
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
@@ -28,10 +25,43 @@ extern struct proc_dir_entry *proc_vmcore;
 
 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
 
+/*
+ * is_kdump_kernel() checks whether this kernel is booting after a panic of
+ * previous kernel or not. This is determined by checking if previous kernel
+ * has passed the elf core header address on command line.
+ *
+ * This is not just a test if CONFIG_CRASH_DUMP is enabled or not. It will
+ * return 1 if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic of
+ * previous kernel.
+ */
+
 static inline int is_kdump_kernel(void)
 {
 	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
 }
+
+/* is_vmcore_usable() checks if the kernel is booting after a panic and
+ * the vmcore region is usable.
+ *
+ * This makes use of the fact that due to alignment -2ULL is not
+ * a valid pointer, much in the vain of IS_ERR(), except
+ * dealing directly with an unsigned long long rather than a pointer.
+ */
+
+static inline int is_vmcore_usable(void)
+{
+	return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+}
+
+/* vmcore_unusable() marks the vmcore as unusable,
+ * without disturbing the logic of is_kdump_kernel()
+ */
+
+static inline void vmcore_unusable(void)
+{
+	if (is_kdump_kernel())
+		elfcorehdr_addr = ELFCORE_ADDR_ERR;
+}
 #else /* !CONFIG_CRASH_DUMP */
 static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index c360c558e59..f1984fc3e06 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -45,7 +45,6 @@ extern struct list_head dmar_drhd_units;
 	list_for_each_entry(drhd, &dmar_drhd_units, list)
 
 extern int dmar_table_init(void);
-extern int early_dmar_detect(void);
 extern int dmar_dev_scope_init(void);
 
 /* Intel IOMMU detection */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 807373d467f..bb66feb164b 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -208,6 +208,9 @@ typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_siz
 #define EFI_GLOBAL_VARIABLE_GUID \
     EFI_GUID(  0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c )
 
+#define UV_SYSTEM_TABLE_GUID \
+    EFI_GUID(  0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
@@ -255,6 +258,7 @@ extern struct efi {
 	unsigned long boot_info;	/* boot info table */
 	unsigned long hcdp;		/* HCDP table */
 	unsigned long uga;		/* UGA table */
+	unsigned long uv_systab;	/* UV system table */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 159d9b476cd..d14f0291848 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -380,6 +380,8 @@ struct ext3_inode {
 #define EXT3_MOUNT_QUOTA		0x80000 /* Some quota option set */
 #define EXT3_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
 #define EXT3_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+#define EXT3_MOUNT_DATA_ERR_ABORT	0x400000 /* Abort on file data write
+						  * error in ordered mode */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 531ccd5f596..75a81eaf343 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -808,6 +808,7 @@ struct fb_tile_ops {
 struct fb_info {
 	int node;
 	int flags;
+	struct mutex lock;		/* Lock for open/release/ioctl funcs */
 	struct fb_var_screeninfo var;	/* Current var */
 	struct fb_fix_screeninfo fix;	/* Current fix */
 	struct fb_monspecs monspecs;	/* Current Monitor specs */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index deddeedf325..8f225339eee 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -6,7 +6,7 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_FREEZER
 /*
  * Check if a process has been frozen
  */
@@ -39,28 +39,18 @@ static inline void clear_freeze_flag(struct task_struct *p)
 	clear_tsk_thread_flag(p, TIF_FREEZE);
 }
 
+static inline bool should_send_signal(struct task_struct *p)
+{
+	return !(p->flags & PF_FREEZER_NOSIG);
+}
+
 /*
  * Wake up a frozen process
- *
- * task_lock() is taken to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails.  Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
  */
-static inline int thaw_process(struct task_struct *p)
-{
-	task_lock(p);
-	if (frozen(p)) {
-		p->flags &= ~PF_FROZEN;
-		task_unlock(p);
-		wake_up_process(p);
-		return 1;
-	}
-	clear_freeze_flag(p);
-	task_unlock(p);
-	return 0;
-}
+extern int __thaw_process(struct task_struct *p);
+
+/* Takes and releases task alloc lock using task_lock() */
+extern int thaw_process(struct task_struct *p);
 
 extern void refrigerator(void);
 extern int freeze_processes(void);
@@ -75,6 +65,15 @@ static inline int try_to_freeze(void)
 		return 0;
 }
 
+extern bool freeze_task(struct task_struct *p, bool sig_only);
+extern void cancel_freezing(struct task_struct *p);
+
+#ifdef CONFIG_CGROUP_FREEZER
+extern int cgroup_frozen(struct task_struct *task);
+#else /* !CONFIG_CGROUP_FREEZER */
+static inline int cgroup_frozen(struct task_struct *task) { return 0; }
+#endif /* !CONFIG_CGROUP_FREEZER */
+
 /*
  * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
  * calls wait_for_completion(&vfork) and reset right after it returns from this
@@ -166,7 +165,7 @@ static inline void set_freezable_with_signal(void)
 	} while (try_to_freeze());					\
 	__retval;							\
 })
-#else /* !CONFIG_PM_SLEEP */
+#else /* !CONFIG_FREEZER */
 static inline int frozen(struct task_struct *p) { return 0; }
 static inline int freezing(struct task_struct *p) { return 0; }
 static inline void set_freeze_flag(struct task_struct *p) {}
@@ -191,6 +190,6 @@ static inline void set_freezable_with_signal(void) {}
 #define wait_event_freezable_timeout(wq, condition, timeout)		\
 		wait_event_interruptible_timeout(wq, condition, timeout)
 
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* !CONFIG_FREEZER */
 
 #endif	/* FREEZER_H_INCLUDED */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index bb384068272..a3d46151be1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1,10 +1,14 @@
 #ifndef _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 
-#ifdef CONFIG_FTRACE
-
 #include <linux/linkage.h>
 #include <linux/fs.h>
+#include <linux/ktime.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+#ifdef CONFIG_FTRACE
 
 extern int ftrace_enabled;
 extern int
@@ -36,6 +40,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
 # define register_ftrace_function(ops) do { } while (0)
 # define unregister_ftrace_function(ops) do { } while (0)
 # define clear_ftrace_function(ops) do { } while (0)
+static inline void ftrace_kill_atomic(void) { }
 #endif /* CONFIG_FTRACE */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -76,8 +81,10 @@ extern void mcount_call(void);
 
 extern int skip_trace(unsigned long ip);
 
-void ftrace_disable_daemon(void);
-void ftrace_enable_daemon(void);
+extern void ftrace_release(void *start, unsigned long size);
+
+extern void ftrace_disable_daemon(void);
+extern void ftrace_enable_daemon(void);
 
 #else
 # define skip_trace(ip)				({ 0; })
@@ -85,6 +92,7 @@ void ftrace_enable_daemon(void);
 # define ftrace_set_filter(buf, len, reset)	do { } while (0)
 # define ftrace_disable_daemon()		do { } while (0)
 # define ftrace_enable_daemon()			do { } while (0)
+static inline void ftrace_release(void *start, unsigned long size) { }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
@@ -98,9 +106,11 @@ static inline void tracer_disable(void)
 #endif
 }
 
-/* Ftrace disable/restore without lock. Some synchronization mechanism
+/*
+ * Ftrace disable/restore without lock. Some synchronization mechanism
  * must be used to prevent ftrace_enabled to be changed between
- * disable/restore. */
+ * disable/restore.
+ */
 static inline int __ftrace_enabled_save(void)
 {
 #ifdef CONFIG_FTRACE
@@ -157,9 +167,71 @@ static inline void __ftrace_enabled_restore(int enabled)
 #ifdef CONFIG_TRACING
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
+
+/**
+ * ftrace_printk - printf formatting in the ftrace buffer
+ * @fmt: the printf format for printing
+ *
+ * Note: __ftrace_printk is an internal function for ftrace_printk and
+ *       the @ip is passed in via the ftrace_printk macro.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please refrain from leaving ftrace_printks scattered around in
+ * your code.
+ */
+# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
+extern int
+__ftrace_printk(unsigned long ip, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern void ftrace_dump(void);
 #else
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
+static inline int
+ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
+
+static inline int
+ftrace_printk(const char *fmt, ...)
+{
+	return 0;
+}
+static inline void ftrace_dump(void) { }
 #endif
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+extern void ftrace_init(void);
+extern void ftrace_init_module(unsigned long *start, unsigned long *end);
+#else
+static inline void ftrace_init(void) { }
+static inline void
+ftrace_init_module(unsigned long *start, unsigned long *end) { }
+#endif
+
+
+struct boot_trace {
+	pid_t			caller;
+	char			func[KSYM_NAME_LEN];
+	int			result;
+	unsigned long long	duration;		/* usecs */
+	ktime_t			calltime;
+	ktime_t			rettime;
+};
+
+#ifdef CONFIG_BOOT_TRACER
+extern void trace_boot(struct boot_trace *it, initcall_t fn);
+extern void start_boot_trace(void);
+extern void stop_boot_trace(void);
+#else
+static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
+static inline void start_boot_trace(void) { }
+static inline void stop_boot_trace(void) { }
+#endif
+
+
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 265635dc990..350fe9767bb 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -17,8 +17,14 @@
  *  - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
  *  - add blksize field to fuse_attr
  *  - add file flags field to fuse_read_in and fuse_write_in
+ *
+ * 7.10
+ *  - add nonseekable open flag
  */
 
+#ifndef _LINUX_FUSE_H
+#define _LINUX_FUSE_H
+
 #include <asm/types.h>
 #include <linux/major.h>
 
@@ -26,7 +32,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 9
+#define FUSE_KERNEL_MINOR_VERSION 10
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -98,9 +104,11 @@ struct fuse_file_lock {
  *
  * FOPEN_DIRECT_IO: bypass page cache for this open file
  * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
+ * FOPEN_NONSEEKABLE: the file is not seekable
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
+#define FOPEN_NONSEEKABLE	(1 << 2)
 
 /**
  * INIT request/reply flags
@@ -409,3 +417,5 @@ struct fuse_dirent {
 #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
 #define FUSE_DIRENT_SIZE(d) \
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+#endif /* _LINUX_FUSE_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2f245fe63bd..9a4e35cd5f7 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -125,12 +125,12 @@ struct hrtimer {
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
 	unsigned long			state;
-	enum hrtimer_cb_mode		cb_mode;
 	struct list_head		cb_entry;
+	enum hrtimer_cb_mode		cb_mode;
 #ifdef CONFIG_TIMER_STATS
+	int				start_pid;
 	void				*start_site;
 	char				start_comm[16];
-	int				start_pid;
 #endif
 };
 
@@ -155,10 +155,8 @@ struct hrtimer_sleeper {
  * @first:		pointer to the timer node which expires first
  * @resolution:		the resolution of the clock, in nanoseconds
  * @get_time:		function to retrieve the current time of the clock
- * @get_softirq_time:	function to retrieve the current time from the softirq
  * @softirq_time:	the time when running the hrtimer queue in the softirq
  * @offset:		offset of this clock to the monotonic base
- * @reprogram:		function to reprogram the timer event
  */
 struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
@@ -167,13 +165,9 @@ struct hrtimer_clock_base {
 	struct rb_node		*first;
 	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
-	ktime_t			(*get_softirq_time)(void);
 	ktime_t			softirq_time;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t			offset;
-	int			(*reprogram)(struct hrtimer *t,
-					     struct hrtimer_clock_base *b,
-					     ktime_t n);
 #endif
 };
 
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
new file mode 100644
index 00000000000..fb604dcd38f
--- /dev/null
+++ b/include/linux/i2c/twl4030.h
@@ -0,0 +1,343 @@
+/*
+ * twl4030.h - header for TWL4030 PM and audio CODEC device
+ *
+ * Copyright (C) 2005-2006 Texas Instruments, Inc.
+ *
+ * Based on tlv320aic23.c:
+ * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#ifndef __TWL4030_H_
+#define __TWL4030_H_
+
+/*
+ * Using the twl4030 core we address registers using a pair
+ *	{ module id, relative register offset }
+ * which that core then maps to the relevant
+ *	{ i2c slave, absolute register address }
+ *
+ * The module IDs are meaningful only to the twl4030 core code,
+ * which uses them as array indices to look up the first register
+ * address each module uses within a given i2c slave.
+ */
+
+/* Slave 0 (i2c address 0x48) */
+#define TWL4030_MODULE_USB		0x00
+
+/* Slave 1 (i2c address 0x49) */
+#define TWL4030_MODULE_AUDIO_VOICE	0x01
+#define TWL4030_MODULE_GPIO		0x02
+#define TWL4030_MODULE_INTBR		0x03
+#define TWL4030_MODULE_PIH		0x04
+#define TWL4030_MODULE_TEST		0x05
+
+/* Slave 2 (i2c address 0x4a) */
+#define TWL4030_MODULE_KEYPAD		0x06
+#define TWL4030_MODULE_MADC		0x07
+#define TWL4030_MODULE_INTERRUPTS	0x08
+#define TWL4030_MODULE_LED		0x09
+#define TWL4030_MODULE_MAIN_CHARGE	0x0A
+#define TWL4030_MODULE_PRECHARGE	0x0B
+#define TWL4030_MODULE_PWM0		0x0C
+#define TWL4030_MODULE_PWM1		0x0D
+#define TWL4030_MODULE_PWMA		0x0E
+#define TWL4030_MODULE_PWMB		0x0F
+
+/* Slave 3 (i2c address 0x4b) */
+#define TWL4030_MODULE_BACKUP		0x10
+#define TWL4030_MODULE_INT		0x11
+#define TWL4030_MODULE_PM_MASTER	0x12
+#define TWL4030_MODULE_PM_RECEIVER	0x13
+#define TWL4030_MODULE_RTC		0x14
+#define TWL4030_MODULE_SECURED_REG	0x15
+
+/*
+ * Read and write single 8-bit registers
+ */
+int twl4030_i2c_write_u8(u8 mod_no, u8 val, u8 reg);
+int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
+
+/*
+ * Read and write several 8-bit registers at once.
+ *
+ * IMPORTANT:  For twl4030_i2c_write(), allocate num_bytes + 1
+ * for the value, and populate your data starting at offset 1.
+ */
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes);
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes);
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * NOTE:  at up to 1024 registers, this is a big chip.
+ *
+ * Avoid putting register declarations in this file, instead of into
+ * a driver-private file, unless some of the registers in a block
+ * need to be shared with other drivers.  One example is blocks that
+ * have Secondary IRQ Handler (SIH) registers.
+ */
+
+#define TWL4030_SIH_CTRL_EXCLEN_MASK	BIT(0)
+#define TWL4030_SIH_CTRL_PENDDIS_MASK	BIT(1)
+#define TWL4030_SIH_CTRL_COR_MASK	BIT(2)
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * GPIO Block Register offsets (use TWL4030_MODULE_GPIO)
+ */
+
+#define REG_GPIODATAIN1			0x0
+#define REG_GPIODATAIN2			0x1
+#define REG_GPIODATAIN3			0x2
+#define REG_GPIODATADIR1		0x3
+#define REG_GPIODATADIR2		0x4
+#define REG_GPIODATADIR3		0x5
+#define REG_GPIODATAOUT1		0x6
+#define REG_GPIODATAOUT2		0x7
+#define REG_GPIODATAOUT3		0x8
+#define REG_CLEARGPIODATAOUT1		0x9
+#define REG_CLEARGPIODATAOUT2		0xA
+#define REG_CLEARGPIODATAOUT3		0xB
+#define REG_SETGPIODATAOUT1		0xC
+#define REG_SETGPIODATAOUT2		0xD
+#define REG_SETGPIODATAOUT3		0xE
+#define REG_GPIO_DEBEN1			0xF
+#define REG_GPIO_DEBEN2			0x10
+#define REG_GPIO_DEBEN3			0x11
+#define REG_GPIO_CTRL			0x12
+#define REG_GPIOPUPDCTR1		0x13
+#define REG_GPIOPUPDCTR2		0x14
+#define REG_GPIOPUPDCTR3		0x15
+#define REG_GPIOPUPDCTR4		0x16
+#define REG_GPIOPUPDCTR5		0x17
+#define REG_GPIO_ISR1A			0x19
+#define REG_GPIO_ISR2A			0x1A
+#define REG_GPIO_ISR3A			0x1B
+#define REG_GPIO_IMR1A			0x1C
+#define REG_GPIO_IMR2A			0x1D
+#define REG_GPIO_IMR3A			0x1E
+#define REG_GPIO_ISR1B			0x1F
+#define REG_GPIO_ISR2B			0x20
+#define REG_GPIO_ISR3B			0x21
+#define REG_GPIO_IMR1B			0x22
+#define REG_GPIO_IMR2B			0x23
+#define REG_GPIO_IMR3B			0x24
+#define REG_GPIO_EDR1			0x28
+#define REG_GPIO_EDR2			0x29
+#define REG_GPIO_EDR3			0x2A
+#define REG_GPIO_EDR4			0x2B
+#define REG_GPIO_EDR5			0x2C
+#define REG_GPIO_SIH_CTRL		0x2D
+
+/* Up to 18 signals are available as GPIOs, when their
+ * pins are not assigned to another use (such as ULPI/USB).
+ */
+#define TWL4030_GPIO_MAX		18
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Keypad register offsets (use TWL4030_MODULE_KEYPAD)
+ * ... SIH/interrupt only
+ */
+
+#define TWL4030_KEYPAD_KEYP_ISR1	0x11
+#define TWL4030_KEYPAD_KEYP_IMR1	0x12
+#define TWL4030_KEYPAD_KEYP_ISR2	0x13
+#define TWL4030_KEYPAD_KEYP_IMR2	0x14
+#define TWL4030_KEYPAD_KEYP_SIR		0x15	/* test register */
+#define TWL4030_KEYPAD_KEYP_EDR		0x16
+#define TWL4030_KEYPAD_KEYP_SIH_CTRL	0x17
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Multichannel ADC register offsets (use TWL4030_MODULE_MADC)
+ * ... SIH/interrupt only
+ */
+
+#define TWL4030_MADC_ISR1		0x61
+#define TWL4030_MADC_IMR1		0x62
+#define TWL4030_MADC_ISR2		0x63
+#define TWL4030_MADC_IMR2		0x64
+#define TWL4030_MADC_SIR		0x65	/* test register */
+#define TWL4030_MADC_EDR		0x66
+#define TWL4030_MADC_SIH_CTRL		0x67
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Battery charger register offsets (use TWL4030_MODULE_INTERRUPTS)
+ */
+
+#define TWL4030_INTERRUPTS_BCIISR1A	0x0
+#define TWL4030_INTERRUPTS_BCIISR2A	0x1
+#define TWL4030_INTERRUPTS_BCIIMR1A	0x2
+#define TWL4030_INTERRUPTS_BCIIMR2A	0x3
+#define TWL4030_INTERRUPTS_BCIISR1B	0x4
+#define TWL4030_INTERRUPTS_BCIISR2B	0x5
+#define TWL4030_INTERRUPTS_BCIIMR1B	0x6
+#define TWL4030_INTERRUPTS_BCIIMR2B	0x7
+#define TWL4030_INTERRUPTS_BCISIR1	0x8	/* test register */
+#define TWL4030_INTERRUPTS_BCISIR2	0x9	/* test register */
+#define TWL4030_INTERRUPTS_BCIEDR1	0xa
+#define TWL4030_INTERRUPTS_BCIEDR2	0xb
+#define TWL4030_INTERRUPTS_BCIEDR3	0xc
+#define TWL4030_INTERRUPTS_BCISIHCTRL	0xd
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Power Interrupt block register offsets (use TWL4030_MODULE_INT)
+ */
+
+#define TWL4030_INT_PWR_ISR1		0x0
+#define TWL4030_INT_PWR_IMR1		0x1
+#define TWL4030_INT_PWR_ISR2		0x2
+#define TWL4030_INT_PWR_IMR2		0x3
+#define TWL4030_INT_PWR_SIR		0x4	/* test register */
+#define TWL4030_INT_PWR_EDR1		0x5
+#define TWL4030_INT_PWR_EDR2		0x6
+#define TWL4030_INT_PWR_SIH_CTRL	0x7
+
+/*----------------------------------------------------------------------*/
+
+struct twl4030_bci_platform_data {
+	int *battery_tmp_tbl;
+	unsigned int tblsize;
+};
+
+/* TWL4030_GPIO_MAX (18) GPIOs, with interrupts */
+struct twl4030_gpio_platform_data {
+	int		gpio_base;
+	unsigned	irq_base, irq_end;
+
+	/* package the two LED signals as output-only GPIOs? */
+	bool		use_leds;
+
+	/* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */
+	u8		mmc_cd;
+
+	/* For gpio-N, bit (1 << N) in "pullups" is set if that pullup
+	 * should be enabled.  Else, if that bit is set in "pulldowns",
+	 * that pulldown is enabled.  Don't waste power by letting any
+	 * digital inputs float...
+	 */
+	u32		pullups;
+	u32		pulldowns;
+
+	int		(*setup)(struct device *dev,
+				unsigned gpio, unsigned ngpio);
+	int		(*teardown)(struct device *dev,
+				unsigned gpio, unsigned ngpio);
+};
+
+struct twl4030_madc_platform_data {
+	int		irq_line;
+};
+
+struct twl4030_keypad_data {
+	int rows;
+	int cols;
+	int *keymap;
+	int irq;
+	unsigned int keymapsize;
+	unsigned int rep:1;
+};
+
+enum twl4030_usb_mode {
+	T2_USB_MODE_ULPI = 1,
+	T2_USB_MODE_CEA2011_3PIN = 2,
+};
+
+struct twl4030_usb_data {
+	enum twl4030_usb_mode	usb_mode;
+};
+
+struct twl4030_platform_data {
+	unsigned				irq_base, irq_end;
+	struct twl4030_bci_platform_data	*bci;
+	struct twl4030_gpio_platform_data	*gpio;
+	struct twl4030_madc_platform_data	*madc;
+	struct twl4030_keypad_data		*keypad;
+	struct twl4030_usb_data			*usb;
+
+	/* REVISIT more to come ... _nothing_ should be hard-wired */
+};
+
+/*----------------------------------------------------------------------*/
+
+int twl4030_sih_setup(int module);
+
+/*
+ * FIXME completely stop using TWL4030_IRQ_BASE ... instead, pass the
+ * IRQ data to subsidiary devices using platform device resources.
+ */
+
+/* IRQ information-need base */
+#include <mach/irqs.h>
+/* TWL4030 interrupts */
+
+/* #define TWL4030_MODIRQ_GPIO		(TWL4030_IRQ_BASE + 0) */
+#define TWL4030_MODIRQ_KEYPAD		(TWL4030_IRQ_BASE + 1)
+#define TWL4030_MODIRQ_BCI		(TWL4030_IRQ_BASE + 2)
+#define TWL4030_MODIRQ_MADC		(TWL4030_IRQ_BASE + 3)
+/* #define TWL4030_MODIRQ_USB		(TWL4030_IRQ_BASE + 4) */
+/* #define TWL4030_MODIRQ_PWR		(TWL4030_IRQ_BASE + 5) */
+
+#define TWL4030_PWRIRQ_PWRBTN		(TWL4030_PWR_IRQ_BASE + 0)
+/* #define TWL4030_PWRIRQ_CHG_PRES		(TWL4030_PWR_IRQ_BASE + 1) */
+/* #define TWL4030_PWRIRQ_USB_PRES		(TWL4030_PWR_IRQ_BASE + 2) */
+/* #define TWL4030_PWRIRQ_RTC		(TWL4030_PWR_IRQ_BASE + 3) */
+/* #define TWL4030_PWRIRQ_HOT_DIE		(TWL4030_PWR_IRQ_BASE + 4) */
+/* #define TWL4030_PWRIRQ_PWROK_TIMEOUT	(TWL4030_PWR_IRQ_BASE + 5) */
+/* #define TWL4030_PWRIRQ_MBCHG		(TWL4030_PWR_IRQ_BASE + 6) */
+/* #define TWL4030_PWRIRQ_SC_DETECT	(TWL4030_PWR_IRQ_BASE + 7) */
+
+/* Rest are unsued currently*/
+
+/* Offsets to Power Registers */
+#define TWL4030_VDAC_DEV_GRP		0x3B
+#define TWL4030_VDAC_DEDICATED		0x3E
+#define TWL4030_VAUX1_DEV_GRP		0x17
+#define TWL4030_VAUX1_DEDICATED		0x1A
+#define TWL4030_VAUX2_DEV_GRP		0x1B
+#define TWL4030_VAUX2_DEDICATED		0x1E
+#define TWL4030_VAUX3_DEV_GRP		0x1F
+#define TWL4030_VAUX3_DEDICATED		0x22
+
+/* TWL4030 GPIO interrupt definitions */
+
+#define TWL4030_GPIO_IRQ_NO(n)		(TWL4030_GPIO_IRQ_BASE + (n))
+
+/*
+ * Exported TWL4030 GPIO APIs
+ *
+ * WARNING -- use standard GPIO and IRQ calls instead; these will vanish.
+ */
+int twl4030_set_gpio_debounce(int gpio, int enable);
+
+#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
+	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
+	extern int twl4030charger_usb_en(int enable);
+#else
+	static inline int twl4030charger_usb_en(int enable) { return 0; }
+#endif
+
+#endif /* End of __TWL4030_H */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c47e371554c..89e53cfbc78 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -461,12 +461,26 @@ struct ide_acpi_drive_link;
 struct ide_acpi_hwif_link;
 #endif
 
+struct ide_drive_s;
+
+struct ide_disk_ops {
+	int		(*check)(struct ide_drive_s *, const char *);
+	int		(*get_capacity)(struct ide_drive_s *);
+	void		(*setup)(struct ide_drive_s *);
+	void		(*flush)(struct ide_drive_s *);
+	int		(*init_media)(struct ide_drive_s *, struct gendisk *);
+	int		(*set_doorlock)(struct ide_drive_s *, struct gendisk *,
+					int);
+	ide_startstop_t	(*do_request)(struct ide_drive_s *, struct request *,
+				      sector_t);
+	int		(*end_request)(struct ide_drive_s *, int, int);
+	int		(*ioctl)(struct ide_drive_s *, struct inode *,
+				 struct file *, unsigned int, unsigned long);
+};
+
 /* ATAPI device flags */
 enum {
 	IDE_AFLAG_DRQ_INTERRUPT		= (1 << 0),
-	IDE_AFLAG_MEDIA_CHANGED		= (1 << 1),
-	/* Drive cannot lock the door. */
-	IDE_AFLAG_NO_DOORLOCK		= (1 << 2),
 
 	/* ide-cd */
 	/* Drive cannot eject the disc. */
@@ -498,14 +512,10 @@ enum {
 	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 17),
 
 	/* ide-floppy */
-	/* Format in progress */
-	IDE_AFLAG_FORMAT_IN_PROGRESS	= (1 << 18),
 	/* Avoid commands not supported in Clik drive */
 	IDE_AFLAG_CLIK_DRIVE		= (1 << 19),
 	/* Requires BH algorithm for packets */
 	IDE_AFLAG_ZIP_DRIVE		= (1 << 20),
-	/* Write protect */
-	IDE_AFLAG_WP			= (1 << 21),
 	/* Supports format progress report */
 	IDE_AFLAG_SRFP			= (1 << 22),
 
@@ -578,7 +588,11 @@ enum {
 	/* don't unload heads */
 	IDE_DFLAG_NO_UNLOAD		= (1 << 27),
 	/* heads unloaded, please don't reset port */
-	IDE_DFLAG_PARKED		= (1 << 28)
+	IDE_DFLAG_PARKED		= (1 << 28),
+	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
+	/* write protect */
+	IDE_DFLAG_WP			= (1 << 30),
+	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 31),
 };
 
 struct ide_drive_s {
@@ -597,6 +611,8 @@ struct ide_drive_s {
 #endif
 	struct hwif_s		*hwif;	/* actually (ide_hwif_t *) */
 
+	const struct ide_disk_ops *disk_ops;
+
 	unsigned long dev_flags;
 
 	unsigned long sleep;		/* sleep until this time */
@@ -1123,8 +1139,8 @@ struct ide_driver_s {
 	void		(*resume)(ide_drive_t *);
 	void		(*shutdown)(ide_drive_t *);
 #ifdef CONFIG_IDE_PROC_FS
-	ide_proc_entry_t		*proc;
-	const struct ide_proc_devset	*settings;
+	ide_proc_entry_t *		(*proc_entries)(ide_drive_t *);
+	const struct ide_proc_devset *	(*proc_devsets)(ide_drive_t *);
 #endif
 };
 
diff --git a/include/linux/init.h b/include/linux/init.h
index ad63824460e..0c1264668be 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -40,7 +40,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init		__section(.init.text) __cold
+#define __init		__section(.init.text) __cold notrace
 #define __initdata	__section(.init.data)
 #define __initconst	__section(.init.rodata)
 #define __exitdata	__section(.exit.data)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35a61dc60d5..f58a0cf8929 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -8,6 +8,7 @@
 #include <linux/preempt.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/irqnr.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
 #include <linux/irqflags.h>
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8d9411bc60f..d058c57be02 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -18,6 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/irqnr.h>
 #include <linux/errno.h>
 
 #include <asm/irq.h>
@@ -152,6 +153,7 @@ struct irq_chip {
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
+	unsigned int		irq;
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
@@ -170,7 +172,7 @@ struct irq_desc {
 	cpumask_t		affinity;
 	unsigned int		cpu;
 #endif
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_t		pending_mask;
 #endif
 #ifdef CONFIG_PROC_FS
@@ -179,8 +181,14 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
+
 extern struct irq_desc irq_desc[NR_IRQS];
 
+static inline struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < nr_irqs) ? irq_desc + irq : NULL;
+}
+
 /*
  * Migration helpers for obsolete names, they will go away:
  */
@@ -198,19 +206,15 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
-#ifndef handle_dynamic_tick
-# define handle_dynamic_tick(a)		do { } while (0)
-#endif
-
 #ifdef CONFIG_SMP
 
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 
 void set_pending_irq(unsigned int irq, cpumask_t mask);
 void move_native_irq(int irq);
 void move_masked_irq(int irq);
 
-#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */
+#else /* CONFIG_GENERIC_PENDING_IRQ */
 
 static inline void move_irq(int irq)
 {
@@ -237,19 +241,14 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
 
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_IRQBALANCE
-extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
-#else
-static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-}
-#endif
-
 extern int no_irq_affinity;
 
 static inline int irq_balancing_disabled(unsigned int irq)
 {
-	return irq_desc[irq].status & IRQ_NO_BALANCING_MASK;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	return desc->status & IRQ_NO_BALANCING_MASK;
 }
 
 /* Handle irq action chains: */
@@ -279,10 +278,8 @@ extern unsigned int __do_IRQ(unsigned int irq);
  * irqchip-style controller then we call the ->handle_irq() handler,
  * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
  */
-static inline void generic_handle_irq(unsigned int irq)
+static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
-	struct irq_desc *desc = irq_desc + irq;
-
 #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 	desc->handle_irq(irq, desc);
 #else
@@ -293,6 +290,11 @@ static inline void generic_handle_irq(unsigned int irq)
 #endif
 }
 
+static inline void generic_handle_irq(unsigned int irq)
+{
+	generic_handle_irq_desc(irq, irq_to_desc(irq));
+}
+
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			   int action_ret);
@@ -325,7 +327,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 static inline void __set_irq_handler_unlocked(int irq,
 					      irq_flow_handler_t handler)
 {
-	irq_desc[irq].handle_irq = handler;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->handle_irq = handler;
 }
 
 /*
@@ -353,13 +358,14 @@ extern void set_irq_noprobe(unsigned int irq);
 extern void set_irq_probe(unsigned int irq);
 
 /* Handle dynamic irq creation and destruction */
+extern unsigned int create_irq_nr(unsigned int irq_want);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	return desc->action != NULL;
 }
 
@@ -374,10 +380,10 @@ extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
 extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
-#define get_irq_chip(irq)	(irq_desc[irq].chip)
-#define get_irq_chip_data(irq)	(irq_desc[irq].chip_data)
-#define get_irq_data(irq)	(irq_desc[irq].handler_data)
-#define get_irq_msi(irq)	(irq_desc[irq].msi_desc)
+#define get_irq_chip(irq)	(irq_to_desc(irq)->chip)
+#define get_irq_chip_data(irq)	(irq_to_desc(irq)->chip_data)
+#define get_irq_data(irq)	(irq_to_desc(irq)->handler_data)
+#define get_irq_msi(irq)	(irq_to_desc(irq)->msi_desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
new file mode 100644
index 00000000000..3171ddc3b39
--- /dev/null
+++ b/include/linux/irqnr.h
@@ -0,0 +1,24 @@
+#ifndef _LINUX_IRQNR_H
+#define _LINUX_IRQNR_H
+
+#ifndef CONFIG_GENERIC_HARDIRQS
+#include <asm/irq.h>
+# define nr_irqs		NR_IRQS
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0; irq < nr_irqs; irq++)
+#else
+extern int nr_irqs;
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+
+# define for_each_irq_desc_reverse(irq, desc)			\
+	for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 );	\
+	     irq > 0; irq--, desc--)
+#endif
+
+#define for_each_irq_nr(irq)			\
+	for (irq = 0; irq < nr_irqs; irq++)
+
+#endif
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 7ebbcb1c9ba..35d4f6342fa 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -816,6 +816,9 @@ struct journal_s
 #define JFS_FLUSHED	0x008	/* The journal superblock has been flushed */
 #define JFS_LOADED	0x010	/* The journal superblock has been loaded */
 #define JFS_BARRIER	0x020	/* Use IDE barriers */
+#define JFS_ABORT_ON_SYNCDATA_ERR	0x040  /* Abort the journal on file
+						* data write error in ordered
+						* mode */
 
 /*
  * Function declarations for the journaling transaction and buffer
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5a566b705ca..94d17ff64c5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -496,4 +496,9 @@ struct sysinfo {
 #define NUMA_BUILD 0
 #endif
 
+/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
+#endif
+
 #endif
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index cf9f40a91c9..4a145caeee0 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -39,19 +39,34 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
+struct irq_desc;
+
+static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
+					    struct irq_desc *desc)
+{
+	kstat_this_cpu.irqs[irq]++;
+}
+
+static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+       return kstat_cpu(cpu).irqs[irq];
+}
+
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
-static inline int kstat_irqs(int irq)
+static inline unsigned int kstat_irqs(unsigned int irq)
 {
-	int cpu, sum = 0;
+	unsigned int sum = 0;
+	int cpu;
 
 	for_each_possible_cpu(cpu)
-		sum += kstat_cpu(cpu).irqs[irq];
+		sum += kstat_irqs_cpu(irq, cpu);
 
 	return sum;
 }
 
+extern unsigned long long task_delta_exec(struct task_struct *);
 extern void account_user_time(struct task_struct *, cputime_t);
 extern void account_user_time_scaled(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0be7795655f..497b1d1f7a0 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -29,6 +29,7 @@
  *		<jkenisto@us.ibm.com>  and Prasanna S Panchamukhi
  *		<prasanna@in.ibm.com> added function-return probes.
  */
+#include <linux/linkage.h>
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
@@ -47,7 +48,7 @@
 #define KPROBE_HIT_SSDONE	0x00000008
 
 /* Attach to insert probes on any functions which should be ignored*/
-#define __kprobes	__attribute__((__section__(".kprobes.text")))
+#define __kprobes	__attribute__((__section__(".kprobes.text"))) notrace
 
 struct kprobe;
 struct pt_regs;
@@ -256,7 +257,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
 
 #else /* CONFIG_KPROBES */
 
-#define __kprobes	/**/
+#define __kprobes	notrace
 struct jprobe;
 struct kretprobe;
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 947cf84e555..c261aa0584b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -340,6 +340,9 @@ enum {
 
 	ATA_EHI_DID_RESET	= ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET,
 
+	/* mask of flags to transfer *to* the slave link */
+	ATA_EHI_TO_SLAVE_MASK	= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET,
+
 	/* max tries if error condition is still set after ->error_handler */
 	ATA_EH_MAX_TRIES	= 5,
 
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 56ba3739465..9fd1f859021 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -4,8 +4,6 @@
 #include <linux/compiler.h>
 #include <asm/linkage.h>
 
-#define notrace __attribute__((no_instrument_function))
-
 #ifdef __cplusplus
 #define CPP_ASMLINKAGE extern "C"
 #else
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 1290653f924..889196c7fbb 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -160,4 +160,11 @@ extern int marker_probe_unregister_private_data(marker_probe_func *probe,
 extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
 	int num);
 
+/*
+ * marker_synchronize_unregister must be called between the last marker probe
+ * unregistration and the end of module exit to make sure there is no caller
+ * executing a probe when it is freed.
+ */
+#define marker_synchronize_unregister() synchronize_sched()
+
 #endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fdf3967e139..1fbe14d3952 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,16 +27,13 @@ struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
-#define page_reset_bad_cgroup(page)	((page)->page_cgroup = 0)
-
-extern struct page_cgroup *page_get_page_cgroup(struct page *page);
 extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
+extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern void mem_cgroup_move_lists(struct page *page, bool active);
 extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
 
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -44,7 +41,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					unsigned long *scanned, int order,
 					int mode, struct zone *z,
 					struct mem_cgroup *mem_cont,
-					int active);
+					int active, int file);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
@@ -69,21 +66,11 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 							int priority);
 
-extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-				struct zone *zone, int priority);
-extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-				struct zone *zone, int priority);
-
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
-static inline void page_reset_bad_cgroup(struct page *page)
-{
-}
+extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
+					int priority, enum lru_list lru);
 
-static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
-{
-	return NULL;
-}
 
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
 static inline int mem_cgroup_charge(struct page *page,
 					struct mm_struct *mm, gfp_t gfp_mask)
 {
@@ -159,14 +146,9 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 {
 }
 
-static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-					struct zone *zone, int priority)
-{
-	return 0;
-}
-
-static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-					struct zone *zone, int priority)
+static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
+					struct zone *zone, int priority,
+					enum lru_list lru)
 {
 	return 0;
 }
diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h
new file mode 100644
index 00000000000..cad314c1243
--- /dev/null
+++ b/include/linux/mfd/da903x.h
@@ -0,0 +1,201 @@
+#ifndef __LINUX_PMIC_DA903X_H
+#define __LINUX_PMIC_DA903X_H
+
+/* Unified sub device IDs for DA9030/DA9034 */
+enum {
+	DA9030_ID_LED_1,
+	DA9030_ID_LED_2,
+	DA9030_ID_LED_3,
+	DA9030_ID_LED_4,
+	DA9030_ID_LED_PC,
+	DA9030_ID_VIBRA,
+	DA9030_ID_WLED,
+	DA9030_ID_BUCK1,
+	DA9030_ID_BUCK2,
+	DA9030_ID_LDO1,
+	DA9030_ID_LDO2,
+	DA9030_ID_LDO3,
+	DA9030_ID_LDO4,
+	DA9030_ID_LDO5,
+	DA9030_ID_LDO6,
+	DA9030_ID_LDO7,
+	DA9030_ID_LDO8,
+	DA9030_ID_LDO9,
+	DA9030_ID_LDO10,
+	DA9030_ID_LDO11,
+	DA9030_ID_LDO12,
+	DA9030_ID_LDO13,
+	DA9030_ID_LDO14,
+	DA9030_ID_LDO15,
+	DA9030_ID_LDO16,
+	DA9030_ID_LDO17,
+	DA9030_ID_LDO18,
+	DA9030_ID_LDO19,
+	DA9030_ID_LDO_INT,	/* LDO Internal */
+
+	DA9034_ID_LED_1,
+	DA9034_ID_LED_2,
+	DA9034_ID_VIBRA,
+	DA9034_ID_WLED,
+	DA9034_ID_TOUCH,
+
+	DA9034_ID_BUCK1,
+	DA9034_ID_BUCK2,
+	DA9034_ID_LDO1,
+	DA9034_ID_LDO2,
+	DA9034_ID_LDO3,
+	DA9034_ID_LDO4,
+	DA9034_ID_LDO5,
+	DA9034_ID_LDO6,
+	DA9034_ID_LDO7,
+	DA9034_ID_LDO8,
+	DA9034_ID_LDO9,
+	DA9034_ID_LDO10,
+	DA9034_ID_LDO11,
+	DA9034_ID_LDO12,
+	DA9034_ID_LDO13,
+	DA9034_ID_LDO14,
+	DA9034_ID_LDO15,
+};
+
+/*
+ * DA9030/DA9034 LEDs sub-devices uses generic "struct led_info"
+ * as the platform_data
+ */
+
+/* DA9030 flags for "struct led_info"
+ */
+#define DA9030_LED_RATE_ON	(0 << 5)
+#define DA9030_LED_RATE_052S	(1 << 5)
+#define DA9030_LED_DUTY_1_16	(0 << 3)
+#define DA9030_LED_DUTY_1_8	(1 << 3)
+#define DA9030_LED_DUTY_1_4	(2 << 3)
+#define DA9030_LED_DUTY_1_2	(3 << 3)
+
+#define DA9030_VIBRA_MODE_1P3V	(0 << 1)
+#define DA9030_VIBRA_MODE_2P7V	(1 << 1)
+#define DA9030_VIBRA_FREQ_1HZ	(0 << 2)
+#define DA9030_VIBRA_FREQ_2HZ	(1 << 2)
+#define DA9030_VIBRA_FREQ_4HZ	(2 << 2)
+#define DA9030_VIBRA_FREQ_8HZ	(3 << 2)
+#define DA9030_VIBRA_DUTY_ON	(0 << 4)
+#define DA9030_VIBRA_DUTY_75P	(1 << 4)
+#define DA9030_VIBRA_DUTY_50P	(2 << 4)
+#define DA9030_VIBRA_DUTY_25P	(3 << 4)
+
+/* DA9034 flags for "struct led_info" */
+#define DA9034_LED_RAMP		(1 << 7)
+
+/* DA9034 touch screen platform data */
+struct da9034_touch_pdata {
+	int	interval_ms;	/* sampling interval while pen down */
+	int	x_inverted;
+	int	y_inverted;
+};
+
+struct da903x_subdev_info {
+	int		id;
+	const char	*name;
+	void		*platform_data;
+};
+
+struct da903x_platform_data {
+	int num_subdevs;
+	struct da903x_subdev_info *subdevs;
+};
+
+/* bit definitions for DA9030 events */
+#define DA9030_EVENT_ONKEY		(1 << 0)
+#define	DA9030_EVENT_PWREN		(1 << 1)
+#define	DA9030_EVENT_EXTON		(1 << 2)
+#define	DA9030_EVENT_CHDET		(1 << 3)
+#define	DA9030_EVENT_TBAT		(1 << 4)
+#define	DA9030_EVENT_VBATMON		(1 << 5)
+#define	DA9030_EVENT_VBATMON_TXON	(1 << 6)
+#define	DA9030_EVENT_CHIOVER		(1 << 7)
+#define	DA9030_EVENT_TCTO		(1 << 8)
+#define	DA9030_EVENT_CCTO		(1 << 9)
+#define	DA9030_EVENT_ADC_READY		(1 << 10)
+#define	DA9030_EVENT_VBUS_4P4		(1 << 11)
+#define	DA9030_EVENT_VBUS_4P0		(1 << 12)
+#define	DA9030_EVENT_SESS_VALID		(1 << 13)
+#define	DA9030_EVENT_SRP_DETECT		(1 << 14)
+#define	DA9030_EVENT_WATCHDOG		(1 << 15)
+#define	DA9030_EVENT_LDO15		(1 << 16)
+#define	DA9030_EVENT_LDO16		(1 << 17)
+#define	DA9030_EVENT_LDO17		(1 << 18)
+#define	DA9030_EVENT_LDO18		(1 << 19)
+#define	DA9030_EVENT_LDO19		(1 << 20)
+#define	DA9030_EVENT_BUCK2		(1 << 21)
+
+/* bit definitions for DA9034 events */
+#define DA9034_EVENT_ONKEY		(1 << 0)
+#define DA9034_EVENT_EXTON		(1 << 2)
+#define DA9034_EVENT_CHDET		(1 << 3)
+#define DA9034_EVENT_TBAT		(1 << 4)
+#define DA9034_EVENT_VBATMON		(1 << 5)
+#define DA9034_EVENT_REV_IOVER		(1 << 6)
+#define DA9034_EVENT_CH_IOVER		(1 << 7)
+#define DA9034_EVENT_CH_TCTO		(1 << 8)
+#define DA9034_EVENT_CH_CCTO		(1 << 9)
+#define DA9034_EVENT_USB_DEV		(1 << 10)
+#define DA9034_EVENT_OTGCP_IOVER	(1 << 11)
+#define DA9034_EVENT_VBUS_4P55		(1 << 12)
+#define DA9034_EVENT_VBUS_3P8		(1 << 13)
+#define DA9034_EVENT_SESS_1P8		(1 << 14)
+#define DA9034_EVENT_SRP_READY		(1 << 15)
+#define DA9034_EVENT_ADC_MAN		(1 << 16)
+#define DA9034_EVENT_ADC_AUTO4		(1 << 17)
+#define DA9034_EVENT_ADC_AUTO5		(1 << 18)
+#define DA9034_EVENT_ADC_AUTO6		(1 << 19)
+#define DA9034_EVENT_PEN_DOWN		(1 << 20)
+#define DA9034_EVENT_TSI_READY		(1 << 21)
+#define DA9034_EVENT_UART_TX		(1 << 22)
+#define DA9034_EVENT_UART_RX		(1 << 23)
+#define DA9034_EVENT_HEADSET		(1 << 25)
+#define DA9034_EVENT_HOOKSWITCH		(1 << 26)
+#define DA9034_EVENT_WATCHDOG		(1 << 27)
+
+extern int da903x_register_notifier(struct device *dev,
+		struct notifier_block *nb, unsigned int events);
+extern int da903x_unregister_notifier(struct device *dev,
+		struct notifier_block *nb, unsigned int events);
+
+/* Status Query Interface */
+#define DA9030_STATUS_ONKEY		(1 << 0)
+#define DA9030_STATUS_PWREN1		(1 << 1)
+#define DA9030_STATUS_EXTON		(1 << 2)
+#define DA9030_STATUS_CHDET		(1 << 3)
+#define DA9030_STATUS_TBAT		(1 << 4)
+#define DA9030_STATUS_VBATMON		(1 << 5)
+#define DA9030_STATUS_VBATMON_TXON	(1 << 6)
+#define DA9030_STATUS_MCLKDET		(1 << 7)
+
+#define DA9034_STATUS_ONKEY		(1 << 0)
+#define DA9034_STATUS_EXTON		(1 << 2)
+#define DA9034_STATUS_CHDET		(1 << 3)
+#define DA9034_STATUS_TBAT		(1 << 4)
+#define DA9034_STATUS_VBATMON		(1 << 5)
+#define DA9034_STATUS_PEN_DOWN		(1 << 6)
+#define DA9034_STATUS_MCLKDET		(1 << 7)
+#define DA9034_STATUS_USB_DEV		(1 << 8)
+#define DA9034_STATUS_HEADSET		(1 << 9)
+#define DA9034_STATUS_HOOKSWITCH	(1 << 10)
+#define DA9034_STATUS_REMCON		(1 << 11)
+#define DA9034_STATUS_VBUS_VALID_4P55	(1 << 12)
+#define DA9034_STATUS_VBUS_VALID_3P8	(1 << 13)
+#define DA9034_STATUS_SESS_VALID_1P8	(1 << 14)
+#define DA9034_STATUS_SRP_READY		(1 << 15)
+
+extern int da903x_query_status(struct device *dev, unsigned int status);
+
+
+/* NOTE: the two functions below are not intended for use outside
+ * of the DA9034 sub-device drivers
+ */
+extern int da903x_write(struct device *dev, int reg, uint8_t val);
+extern int da903x_read(struct device *dev, int reg, uint8_t *val);
+extern int da903x_update(struct device *dev, int reg, uint8_t val, uint8_t mask);
+extern int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask);
+extern int da903x_clr_bits(struct device *dev, int reg, uint8_t bit_mask);
+#endif /* __LINUX_PMIC_DA903X_H */
diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h
index e83c7f2036f..b4629818aea 100644
--- a/include/linux/mfd/t7l66xb.h
+++ b/include/linux/mfd/t7l66xb.h
@@ -15,8 +15,6 @@
 #include <linux/mfd/tmio.h>
 
 struct t7l66xb_platform_data {
-	int (*enable_clk32k)(struct platform_device *dev);
-	void (*disable_clk32k)(struct platform_device *dev);
 	int (*enable)(struct platform_device *dev);
 	int (*disable)(struct platform_device *dev);
 	int (*suspend)(struct platform_device *dev);
diff --git a/include/linux/mfd/tc6387xb.h b/include/linux/mfd/tc6387xb.h
index fa06e0610b8..b4888209494 100644
--- a/include/linux/mfd/tc6387xb.h
+++ b/include/linux/mfd/tc6387xb.h
@@ -11,9 +11,6 @@
 #define MFD_TC6387XB_H
 
 struct tc6387xb_platform_data {
-	int (*enable_clk32k)(struct platform_device *dev);
-	void (*disable_clk32k)(struct platform_device *dev);
-
 	int (*enable)(struct platform_device *dev);
 	int (*disable)(struct platform_device *dev);
 	int (*suspend)(struct platform_device *dev);
diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h
index fec7b3f7a81..626e448205c 100644
--- a/include/linux/mfd/tc6393xb.h
+++ b/include/linux/mfd/tc6393xb.h
@@ -17,12 +17,12 @@
 #ifndef MFD_TC6393XB_H
 #define MFD_TC6393XB_H
 
+#include <linux/fb.h>
+
 /* Also one should provide the CK3P6MI clock */
 struct tc6393xb_platform_data {
 	u16	scr_pll2cr;	/* PLL2 Control */
 	u16	scr_gper;	/* GP Enable */
-	u32	scr_gpo_doecr;	/* GPO Data OE Control */
-	u32	scr_gpo_dsr;	/* GPO Data Set */
 
 	int	(*enable)(struct platform_device *dev);
 	int	(*disable)(struct platform_device *dev);
@@ -31,15 +31,28 @@ struct tc6393xb_platform_data {
 
 	int	irq_base;	/* base for subdevice irqs */
 	int	gpio_base;
+	int	(*setup)(struct platform_device *dev);
+	void	(*teardown)(struct platform_device *dev);
 
 	struct tmio_nand_data	*nand_data;
+	struct tmio_fb_data	*fb_data;
+
+	unsigned resume_restore : 1; /* make special actions
+					to preserve the state
+					on suspend/resume */
 };
 
+extern int tc6393xb_lcd_mode(struct platform_device *fb,
+			     const struct fb_videomode *mode);
+extern int tc6393xb_lcd_set_power(struct platform_device *fb, bool on);
+
 /*
  * Relative to irq_base
  */
 #define	IRQ_TC6393_NAND		0
 #define	IRQ_TC6393_MMC		1
+#define	IRQ_TC6393_OHCI		2
+#define	IRQ_TC6393_FB		4
 
 #define	TC6393XB_NR_IRQS	8
 
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 03aea612d28..3f34005068d 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,7 +7,6 @@
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 #ifdef CONFIG_MIGRATION
-extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -21,8 +20,6 @@ extern int migrate_vmas(struct mm_struct *mm,
 		const nodemask_t *from, const nodemask_t *to,
 		unsigned long flags);
 #else
-static inline int isolate_lru_page(struct page *p, struct list_head *list)
-					{ return -ENOSYS; }
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private) { return -ENOSYS; }
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 77323a72dd3..cf9c679ab38 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -132,6 +132,15 @@ enum {
 	MLX4_MAILBOX_SIZE	=  4096
 };
 
+enum {
+	/* set port opcode modifiers */
+	MLX4_SET_PORT_GENERAL   = 0x0,
+	MLX4_SET_PORT_RQP_CALC  = 0x1,
+	MLX4_SET_PORT_MAC_TABLE = 0x2,
+	MLX4_SET_PORT_VLAN_TABLE = 0x3,
+	MLX4_SET_PORT_PRIO_MAP  = 0x4,
+};
+
 struct mlx4_dev;
 
 struct mlx4_cmd_mailbox {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b2f94446831..bd9977b8949 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -60,6 +60,7 @@ enum {
 	MLX4_DEV_CAP_FLAG_IPOIB_CSUM	= 1 <<  7,
 	MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1 <<  8,
 	MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1 <<  9,
+	MLX4_DEV_CAP_FLAG_DPDP		= 1 << 12,
 	MLX4_DEV_CAP_FLAG_MEM_WINDOW	= 1 << 16,
 	MLX4_DEV_CAP_FLAG_APM		= 1 << 17,
 	MLX4_DEV_CAP_FLAG_ATOMIC	= 1 << 18,
@@ -145,6 +146,29 @@ enum {
 	MLX4_MTT_FLAG_PRESENT		= 1
 };
 
+enum mlx4_qp_region {
+	MLX4_QP_REGION_FW = 0,
+	MLX4_QP_REGION_ETH_ADDR,
+	MLX4_QP_REGION_FC_ADDR,
+	MLX4_QP_REGION_FC_EXCH,
+	MLX4_NUM_QP_REGION
+};
+
+enum mlx4_port_type {
+	MLX4_PORT_TYPE_IB	= 1 << 0,
+	MLX4_PORT_TYPE_ETH	= 1 << 1,
+};
+
+enum mlx4_special_vlan_idx {
+	MLX4_NO_VLAN_IDX        = 0,
+	MLX4_VLAN_MISS_IDX,
+	MLX4_VLAN_REGULAR
+};
+
+enum {
+	MLX4_NUM_FEXCH          = 64 * 1024,
+};
+
 static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
 {
 	return (major << 32) | (minor << 16) | subminor;
@@ -154,7 +178,9 @@ struct mlx4_caps {
 	u64			fw_ver;
 	int			num_ports;
 	int			vl_cap[MLX4_MAX_PORTS + 1];
-	int			mtu_cap[MLX4_MAX_PORTS + 1];
+	int			ib_mtu_cap[MLX4_MAX_PORTS + 1];
+	u64			def_mac[MLX4_MAX_PORTS + 1];
+	int			eth_mtu_cap[MLX4_MAX_PORTS + 1];
 	int			gid_table_len[MLX4_MAX_PORTS + 1];
 	int			pkey_table_len[MLX4_MAX_PORTS + 1];
 	int			local_ca_ack_delay;
@@ -169,7 +195,6 @@ struct mlx4_caps {
 	int			max_rq_desc_sz;
 	int			max_qp_init_rdma;
 	int			max_qp_dest_rdma;
-	int			reserved_qps;
 	int			sqp_start;
 	int			num_srqs;
 	int			max_srq_wqes;
@@ -201,6 +226,15 @@ struct mlx4_caps {
 	u16			stat_rate_support;
 	u8			port_width_cap[MLX4_MAX_PORTS + 1];
 	int			max_gso_sz;
+	int                     reserved_qps_cnt[MLX4_NUM_QP_REGION];
+	int			reserved_qps;
+	int                     reserved_qps_base[MLX4_NUM_QP_REGION];
+	int                     log_num_macs;
+	int                     log_num_vlans;
+	int                     log_num_prios;
+	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
+	u8			supported_type[MLX4_MAX_PORTS + 1];
+	u32			port_mask;
 };
 
 struct mlx4_buf_list {
@@ -355,6 +389,11 @@ struct mlx4_init_port_param {
 	u64			si_guid;
 };
 
+#define mlx4_foreach_port(port, dev, type)				\
+	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
+		if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \
+		     ~(dev)->caps.port_mask) & 1 << ((port) - 1))
+
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		   struct mlx4_buf *buf);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
@@ -400,7 +439,10 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  int collapsed);
 void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp);
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
+void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
+
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
@@ -416,6 +458,12 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  int block_mcast_loopback);
 int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
 
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
+
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
+
 int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
 		      int npages, u64 iova, u32 *lkey, u32 *rkey);
 int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c61ba10768e..ffee2f74341 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -132,6 +132,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 
 /*
+ * special vmas that are non-mergable, non-mlock()able
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+
+/*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
  */
@@ -700,10 +705,10 @@ static inline int page_mapped(struct page *page)
 extern void show_free_areas(void);
 
 #ifdef CONFIG_SHMEM
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 static inline int shmem_lock(struct file *file, int lock,
-			     struct user_struct *user)
+			    struct user_struct *user)
 {
 	return 0;
 }
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 895bc4e9303..c948350c378 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -1,40 +1,100 @@
-static inline void
-add_page_to_active_list(struct zone *zone, struct page *page)
-{
-	list_add(&page->lru, &zone->active_list);
-	__inc_zone_state(zone, NR_ACTIVE);
-}
+#ifndef LINUX_MM_INLINE_H
+#define LINUX_MM_INLINE_H
 
-static inline void
-add_page_to_inactive_list(struct zone *zone, struct page *page)
+/**
+ * page_is_file_cache - should the page be on a file LRU or anon LRU?
+ * @page: the page to test
+ *
+ * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
+ * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
+ * Used by functions that manipulate the LRU lists, to sort a page
+ * onto the right LRU list.
+ *
+ * We would like to get this info without a page flag, but the state
+ * needs to survive until the page is last deleted from the LRU, which
+ * could be as far down as __page_cache_release.
+ */
+static inline int page_is_file_cache(struct page *page)
 {
-	list_add(&page->lru, &zone->inactive_list);
-	__inc_zone_state(zone, NR_INACTIVE);
+	if (PageSwapBacked(page))
+		return 0;
+
+	/* The page is page cache backed by a normal filesystem. */
+	return LRU_FILE;
 }
 
 static inline void
-del_page_from_active_list(struct zone *zone, struct page *page)
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
-	list_del(&page->lru);
-	__dec_zone_state(zone, NR_ACTIVE);
+	list_add(&page->lru, &zone->lru[l].list);
+	__inc_zone_state(zone, NR_LRU_BASE + l);
 }
 
 static inline void
-del_page_from_inactive_list(struct zone *zone, struct page *page)
+del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
 	list_del(&page->lru);
-	__dec_zone_state(zone, NR_INACTIVE);
+	__dec_zone_state(zone, NR_LRU_BASE + l);
 }
 
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
+	enum lru_list l = LRU_BASE;
+
 	list_del(&page->lru);
-	if (PageActive(page)) {
-		__ClearPageActive(page);
-		__dec_zone_state(zone, NR_ACTIVE);
+	if (PageUnevictable(page)) {
+		__ClearPageUnevictable(page);
+		l = LRU_UNEVICTABLE;
 	} else {
-		__dec_zone_state(zone, NR_INACTIVE);
+		if (PageActive(page)) {
+			__ClearPageActive(page);
+			l += LRU_ACTIVE;
+		}
+		l += page_is_file_cache(page);
+	}
+	__dec_zone_state(zone, NR_LRU_BASE + l);
+}
+
+/**
+ * page_lru - which LRU list should a page be on?
+ * @page: the page to test
+ *
+ * Returns the LRU list a page should be on, as an index
+ * into the array of LRU lists.
+ */
+static inline enum lru_list page_lru(struct page *page)
+{
+	enum lru_list lru = LRU_BASE;
+
+	if (PageUnevictable(page))
+		lru = LRU_UNEVICTABLE;
+	else {
+		if (PageActive(page))
+			lru += LRU_ACTIVE;
+		lru += page_is_file_cache(page);
 	}
+
+	return lru;
 }
 
+/**
+ * inactive_anon_is_low - check if anonymous pages need to be deactivated
+ * @zone: zone to check
+ *
+ * Returns true if the zone does not have enough inactive anon pages,
+ * meaning some active anon pages need to be deactivated.
+ */
+static inline int inactive_anon_is_low(struct zone *zone)
+{
+	unsigned long active, inactive;
+
+	active = zone_page_state(zone, NR_ACTIVE_ANON);
+	inactive = zone_page_state(zone, NR_INACTIVE_ANON);
+
+	if (inactive * zone->inactive_ratio < active)
+		return 1;
+
+	return 0;
+}
+#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9d49fa36bbe..fe825471d5a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -94,9 +94,6 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-	unsigned long page_cgroup;
-#endif
 };
 
 /*
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 61d19e1b7a0..139d7c88d9c 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -34,11 +34,15 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p);
 /* Called from page fault handler. */
 extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
 
-/* Called from ioremap.c */
 #ifdef CONFIG_MMIOTRACE
+/* Called from ioremap.c */
 extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
 							void __iomem *addr);
 extern void mmiotrace_iounmap(volatile void __iomem *addr);
+
+/* For anyone to insert markers. Remember trailing newline. */
+extern int mmiotrace_printk(const char *fmt, ...)
+				__attribute__ ((format (printf, 1, 2)));
 #else
 static inline void mmiotrace_ioremap(resource_size_t offset,
 					unsigned long size, void __iomem *addr)
@@ -48,15 +52,22 @@ static inline void mmiotrace_ioremap(resource_size_t offset,
 static inline void mmiotrace_iounmap(volatile void __iomem *addr)
 {
 }
-#endif /* CONFIG_MMIOTRACE_HOOKS */
+
+static inline int mmiotrace_printk(const char *fmt, ...)
+				__attribute__ ((format (printf, 1, 0)));
+
+static inline int mmiotrace_printk(const char *fmt, ...)
+{
+	return 0;
+}
+#endif /* CONFIG_MMIOTRACE */
 
 enum mm_io_opcode {
 	MMIO_READ = 0x1,     /* struct mmiotrace_rw */
 	MMIO_WRITE = 0x2,    /* struct mmiotrace_rw */
 	MMIO_PROBE = 0x3,    /* struct mmiotrace_map */
 	MMIO_UNPROBE = 0x4,  /* struct mmiotrace_map */
-	MMIO_MARKER = 0x5,   /* raw char data */
-	MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */
+	MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
 };
 
 struct mmiotrace_rw {
@@ -81,5 +92,6 @@ extern void enable_mmiotrace(void);
 extern void disable_mmiotrace(void);
 extern void mmio_trace_rw(struct mmiotrace_rw *rw);
 extern void mmio_trace_mapping(struct mmiotrace_map *map);
+extern int mmio_trace_printk(const char *fmt, va_list args);
 
 #endif /* MMIOTRACE_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 428328a05fa..35a7b5e1946 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -81,21 +81,31 @@ struct zone_padding {
 enum zone_stat_item {
 	/* First 128 byte cacheline (assuming 64 bit words) */
 	NR_FREE_PAGES,
-	NR_INACTIVE,
-	NR_ACTIVE,
+	NR_LRU_BASE,
+	NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+	NR_ACTIVE_ANON,		/*  "     "     "   "       "         */
+	NR_INACTIVE_FILE,	/*  "     "     "   "       "         */
+	NR_ACTIVE_FILE,		/*  "     "     "   "       "         */
+#ifdef CONFIG_UNEVICTABLE_LRU
+	NR_UNEVICTABLE,		/*  "     "     "   "       "         */
+	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
+#else
+	NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
+	NR_MLOCK = NR_ACTIVE_FILE,
+#endif
 	NR_ANON_PAGES,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
 	NR_FILE_PAGES,
 	NR_FILE_DIRTY,
 	NR_WRITEBACK,
-	/* Second 128 byte cacheline */
 	NR_SLAB_RECLAIMABLE,
 	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,		/* used for pagetables */
 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
+	/* Second 128 byte cacheline */
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
@@ -107,6 +117,55 @@ enum zone_stat_item {
 #endif
 	NR_VM_ZONE_STAT_ITEMS };
 
+/*
+ * We do arithmetic on the LRU lists in various places in the code,
+ * so it is important to keep the active lists LRU_ACTIVE higher in
+ * the array than the corresponding inactive lists, and to keep
+ * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
+ *
+ * This has to be kept in sync with the statistics in zone_stat_item
+ * above and the descriptions in vmstat_text in mm/vmstat.c
+ */
+#define LRU_BASE 0
+#define LRU_ACTIVE 1
+#define LRU_FILE 2
+
+enum lru_list {
+	LRU_INACTIVE_ANON = LRU_BASE,
+	LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
+	LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
+	LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
+#ifdef CONFIG_UNEVICTABLE_LRU
+	LRU_UNEVICTABLE,
+#else
+	LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */
+#endif
+	NR_LRU_LISTS
+};
+
+#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+
+#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
+
+static inline int is_file_lru(enum lru_list l)
+{
+	return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+}
+
+static inline int is_active_lru(enum lru_list l)
+{
+	return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
+}
+
+static inline int is_unevictable_lru(enum lru_list l)
+{
+#ifdef CONFIG_UNEVICTABLE_LRU
+	return (l == LRU_UNEVICTABLE);
+#else
+	return 0;
+#endif
+}
+
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
@@ -251,10 +310,22 @@ struct zone {
 
 	/* Fields commonly accessed by the page reclaim scanner */
 	spinlock_t		lru_lock;	
-	struct list_head	active_list;
-	struct list_head	inactive_list;
-	unsigned long		nr_scan_active;
-	unsigned long		nr_scan_inactive;
+	struct {
+		struct list_head list;
+		unsigned long nr_scan;
+	} lru[NR_LRU_LISTS];
+
+	/*
+	 * The pageout code in vmscan.c keeps track of how many of the
+	 * mem/swap backed and file backed pages are refeferenced.
+	 * The higher the rotated/scanned ratio, the more valuable
+	 * that cache is.
+	 *
+	 * The anon LRU stats live in [0], file LRU stats in [1]
+	 */
+	unsigned long		recent_rotated[2];
+	unsigned long		recent_scanned[2];
+
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	unsigned long		flags;		   /* zone flags, see below */
 
@@ -276,6 +347,12 @@ struct zone {
 	 */
 	int prev_priority;
 
+	/*
+	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+	 * this zone's LRU.  Maintained by the pageout code.
+	 */
+	unsigned int inactive_ratio;
+
 
 	ZONE_PADDING(_pad2_)
 	/* Rarely used or read-mostly fields */
@@ -524,8 +601,11 @@ typedef struct pglist_data {
 	struct zone node_zones[MAX_NR_ZONES];
 	struct zonelist node_zonelists[MAX_ZONELISTS];
 	int nr_zones;
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
 	struct page *node_mem_map;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	struct page_cgroup *node_page_cgroup;
+#endif
 #endif
 	struct bootmem_data *bdata;
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -854,6 +934,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #endif
 
 struct page;
+struct page_cgroup;
 struct mem_section {
 	/*
 	 * This is, logically, a pointer to an array of struct
@@ -871,6 +952,14 @@ struct mem_section {
 
 	/* See declaration of similar field in struct zone */
 	unsigned long *pageblock_flags;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	/*
+	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
+	 * section. (see memcontrol.h/page_cgroup.h about this.)
+	 */
+	struct page_cgroup *page_cgroup;
+	unsigned long pad;
+#endif
 };
 
 #ifdef CONFIG_SPARSEMEM_EXTREME
diff --git a/include/linux/module.h b/include/linux/module.h
index a41555cbe00..5d2970cdce9 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/marker.h>
+#include <linux/tracepoint.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -331,6 +332,10 @@ struct module
 	struct marker *markers;
 	unsigned int num_markers;
 #endif
+#ifdef CONFIG_TRACEPOINTS
+	struct tracepoint *tracepoints;
+	unsigned int num_tracepoints;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
@@ -453,6 +458,9 @@ extern void print_modules(void);
 
 extern void module_update_markers(void);
 
+extern void module_update_tracepoints(void);
+extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -557,6 +565,15 @@ static inline void module_update_markers(void)
 {
 }
 
+static inline void module_update_tracepoints(void)
+{
+}
+
+static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+	return 0;
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index d6fb115f5a0..ee5124ec319 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -12,6 +12,7 @@
 #include <linux/mtd/flashchip.h>
 #include <linux/mtd/map.h>
 #include <linux/mtd/cfi_endian.h>
+#include <linux/mtd/xip.h>
 
 #ifdef CONFIG_MTD_CFI_I1
 #define cfi_interleave(cfi) 1
@@ -430,7 +431,6 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t
 {
 	map_word val;
 	uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type);
-
 	val = cfi_build_cmd(cmd, map, cfi);
 
 	if (prev_val)
@@ -483,6 +483,13 @@ static inline void cfi_udelay(int us)
 	}
 }
 
+int __xipram cfi_qry_present(struct map_info *map, __u32 base,
+			     struct cfi_private *cfi);
+int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
+			     struct cfi_private *cfi);
+void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
+			       struct cfi_private *cfi);
+
 struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size,
 			     const char* name);
 struct cfi_fixup {
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index 08dd131301c..d4f38c5fd44 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -73,6 +73,10 @@ struct flchip {
 	int buffer_write_time;
 	int erase_time;
 
+	int word_write_time_max;
+	int buffer_write_time_max;
+	int erase_time_max;
+
 	void *priv;
 };
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 92263654855..eae26bb6430 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -25,8 +25,10 @@
 #define MTD_ERASE_DONE          0x08
 #define MTD_ERASE_FAILED        0x10
 
+#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff
+
 /* If the erase fails, fail_addr might indicate exactly which block failed.  If
-   fail_addr = 0xffffffff, the failure was not at the device level or was not
+   fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not
    specific to any particular block. */
 struct erase_info {
 	struct mtd_info *mtd;
diff --git a/include/linux/mtd/nand-gpio.h b/include/linux/mtd/nand-gpio.h
new file mode 100644
index 00000000000..51534e50f7f
--- /dev/null
+++ b/include/linux/mtd/nand-gpio.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_MTD_NAND_GPIO_H
+#define __LINUX_MTD_NAND_GPIO_H
+
+#include <linux/mtd/nand.h>
+
+struct gpio_nand_platdata {
+	int	gpio_nce;
+	int	gpio_nwp;
+	int	gpio_cle;
+	int	gpio_ale;
+	int	gpio_rdy;
+	void	(*adjust_parts)(struct gpio_nand_platdata *, size_t);
+	struct mtd_partition *parts;
+	unsigned int num_parts;
+	unsigned int options;
+	int	chip_delay;
+};
+
+#endif
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 81774e5facf..733d3f3b4eb 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -248,6 +248,7 @@ struct nand_hw_control {
  * @read_page_raw:	function to read a raw page without ECC
  * @write_page_raw:	function to write a raw page without ECC
  * @read_page:	function to read a page according to the ecc generator requirements
+ * @read_subpage:	function to read parts of the page covered by ECC.
  * @write_page:	function to write a page according to the ecc generator requirements
  * @read_oob:	function to read chip OOB data
  * @write_oob:	function to write chip OOB data
diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index d1b310c92eb..0c6bbe28f38 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -152,6 +152,8 @@
 #define ONENAND_SYS_CFG1_INT		(1 << 6)
 #define ONENAND_SYS_CFG1_IOBE		(1 << 5)
 #define ONENAND_SYS_CFG1_RDY_CONF	(1 << 4)
+#define ONENAND_SYS_CFG1_HF		(1 << 2)
+#define ONENAND_SYS_CFG1_SYNC_WRITE	(1 << 1)
 
 /*
  * Controller Status Register F240h (R)
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 5014f7a9f5d..c92b4d43960 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -73,7 +73,6 @@ struct device;
 struct device_node;
 
 int __devinit of_mtd_parse_partitions(struct device *dev,
-                                      struct mtd_info *mtd,
                                       struct device_node *node,
                                       struct mtd_partition **pparts);
 
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
new file mode 100644
index 00000000000..e77c1cea404
--- /dev/null
+++ b/include/linux/mtd/sh_flctl.h
@@ -0,0 +1,125 @@
+/*
+ * SuperH FLCTL nand controller
+ *
+ * Copyright © 2008 Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __SH_FLCTL_H__
+#define __SH_FLCTL_H__
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+
+/* FLCTL registers */
+#define FLCMNCR(f)		(f->reg + 0x0)
+#define FLCMDCR(f)		(f->reg + 0x4)
+#define FLCMCDR(f)		(f->reg + 0x8)
+#define FLADR(f)		(f->reg + 0xC)
+#define FLADR2(f)		(f->reg + 0x3C)
+#define FLDATAR(f)		(f->reg + 0x10)
+#define FLDTCNTR(f)		(f->reg + 0x14)
+#define FLINTDMACR(f)		(f->reg + 0x18)
+#define FLBSYTMR(f)		(f->reg + 0x1C)
+#define FLBSYCNT(f)		(f->reg + 0x20)
+#define FLDTFIFO(f)		(f->reg + 0x24)
+#define FLECFIFO(f)		(f->reg + 0x28)
+#define FLTRCR(f)		(f->reg + 0x2C)
+#define	FL4ECCRESULT0(f)	(f->reg + 0x80)
+#define	FL4ECCRESULT1(f)	(f->reg + 0x84)
+#define	FL4ECCRESULT2(f)	(f->reg + 0x88)
+#define	FL4ECCRESULT3(f)	(f->reg + 0x8C)
+#define	FL4ECCCR(f)		(f->reg + 0x90)
+#define	FL4ECCCNT(f)		(f->reg + 0x94)
+#define	FLERRADR(f)		(f->reg + 0x98)
+
+/* FLCMNCR control bits */
+#define ECCPOS2		(0x1 << 25)
+#define _4ECCCNTEN	(0x1 << 24)
+#define _4ECCEN		(0x1 << 23)
+#define _4ECCCORRECT	(0x1 << 22)
+#define SNAND_E		(0x1 << 18)	/* SNAND (0=512 1=2048)*/
+#define QTSEL_E		(0x1 << 17)
+#define ENDIAN		(0x1 << 16)	/* 1 = little endian */
+#define FCKSEL_E	(0x1 << 15)
+#define ECCPOS_00	(0x00 << 12)
+#define ECCPOS_01	(0x01 << 12)
+#define ECCPOS_02	(0x02 << 12)
+#define ACM_SACCES_MODE	(0x01 << 10)
+#define NANWF_E		(0x1 << 9)
+#define SE_D		(0x1 << 8)	/* Spare area disable */
+#define	CE1_ENABLE	(0x1 << 4)	/* Chip Enable 1 */
+#define	CE0_ENABLE	(0x1 << 3)	/* Chip Enable 0 */
+#define	TYPESEL_SET	(0x1 << 0)
+
+/* FLCMDCR control bits */
+#define ADRCNT2_E	(0x1 << 31)	/* 5byte address enable */
+#define ADRMD_E		(0x1 << 26)	/* Sector address access */
+#define CDSRC_E		(0x1 << 25)	/* Data buffer selection */
+#define DOSR_E		(0x1 << 24)	/* Status read check */
+#define SELRW		(0x1 << 21)	/*  0:read 1:write */
+#define DOADR_E		(0x1 << 20)	/* Address stage execute */
+#define ADRCNT_1	(0x00 << 18)	/* Address data bytes: 1byte */
+#define ADRCNT_2	(0x01 << 18)	/* Address data bytes: 2byte */
+#define ADRCNT_3	(0x02 << 18)	/* Address data bytes: 3byte */
+#define ADRCNT_4	(0x03 << 18)	/* Address data bytes: 4byte */
+#define DOCMD2_E	(0x1 << 17)	/* 2nd cmd stage execute */
+#define DOCMD1_E	(0x1 << 16)	/* 1st cmd stage execute */
+
+/* FLTRCR control bits */
+#define TRSTRT		(0x1 << 0)	/* translation start */
+#define TREND		(0x1 << 1)	/* translation end */
+
+/* FL4ECCCR control bits */
+#define	_4ECCFA		(0x1 << 2)	/* 4 symbols correct fault */
+#define	_4ECCEND	(0x1 << 1)	/* 4 symbols end */
+#define	_4ECCEXST	(0x1 << 0)	/* 4 symbols exist */
+
+#define INIT_FL4ECCRESULT_VAL	0x03FF03FF
+#define LOOP_TIMEOUT_MAX	0x00010000
+
+#define mtd_to_flctl(mtd)	container_of(mtd, struct sh_flctl, mtd)
+
+struct sh_flctl {
+	struct mtd_info		mtd;
+	struct nand_chip	chip;
+	void __iomem		*reg;
+
+	uint8_t	done_buff[2048 + 64];	/* max size 2048 + 64 */
+	int	read_bytes;
+	int	index;
+	int	seqin_column;		/* column in SEQIN cmd */
+	int	seqin_page_addr;	/* page_addr in SEQIN cmd */
+	uint32_t seqin_read_cmd;		/* read cmd in SEQIN cmd */
+	int	erase1_page_addr;	/* page_addr in ERASE1 cmd */
+	uint32_t erase_ADRCNT;		/* bits of FLCMDCR in ERASE1 cmd */
+	uint32_t rw_ADRCNT;	/* bits of FLCMDCR in READ WRITE cmd */
+
+	int	hwecc_cant_correct[4];
+
+	unsigned page_size:1;	/* NAND page size (0 = 512, 1 = 2048) */
+	unsigned hwecc:1;	/* Hardware ECC (0 = disabled, 1 = enabled) */
+};
+
+struct sh_flctl_platform_data {
+	struct mtd_partition	*parts;
+	int			nr_parts;
+	unsigned long		flcmncr_val;
+
+	unsigned has_hwecc:1;
+};
+
+#endif	/* __SH_FLCTL_H__ */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ac8d0233b05..4eaa8347a0d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -367,8 +367,12 @@ static inline struct nfs_open_context *nfs_file_open_context(struct file *filp)
 
 static inline struct rpc_cred *nfs_file_cred(struct file *file)
 {
-	if (file != NULL)
-		return nfs_file_open_context(file)->cred;
+	if (file != NULL) {
+		struct nfs_open_context *ctx =
+			nfs_file_open_context(file);
+		if (ctx)
+			return ctx->cred;
+	}
 	return NULL;
 }
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c74d3e87531..b12f93a3c34 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -93,6 +93,11 @@ enum pageflags {
 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
 	PG_buddy,		/* Page is free, on buddy lists */
+	PG_swapbacked,		/* Page is backed by RAM/swap */
+#ifdef CONFIG_UNEVICTABLE_LRU
+	PG_unevictable,		/* Page is "unevictable"  */
+	PG_mlocked,		/* Page is vma mlocked */
+#endif
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
@@ -161,6 +166,18 @@ static inline int Page##uname(struct page *page) 			\
 #define TESTSCFLAG(uname, lname)					\
 	TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
 
+#define SETPAGEFLAG_NOOP(uname)						\
+static inline void SetPage##uname(struct page *page) {  }
+
+#define CLEARPAGEFLAG_NOOP(uname)					\
+static inline void ClearPage##uname(struct page *page) {  }
+
+#define __CLEARPAGEFLAG_NOOP(uname)					\
+static inline void __ClearPage##uname(struct page *page) {  }
+
+#define TESTCLEARFLAG_FALSE(uname)					\
+static inline int TestClearPage##uname(struct page *page) { return 0; }
+
 struct page;	/* forward declaration */
 
 TESTPAGEFLAG(Locked, locked)
@@ -169,6 +186,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
 PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
+	TESTCLEARFLAG(Active, active)
 __PAGEFLAG(Slab, slab)
 PAGEFLAG(Checked, checked)		/* Used by some filesystems */
 PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
@@ -176,6 +194,7 @@ PAGEFLAG(SavePinned, savepinned);			/* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
+PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 
 __PAGEFLAG(SlobPage, slob_page)
 __PAGEFLAG(SlobFree, slob_free)
@@ -211,6 +230,25 @@ PAGEFLAG(SwapCache, swapcache)
 PAGEFLAG_FALSE(SwapCache)
 #endif
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
+	TESTCLEARFLAG(Unevictable, unevictable)
+
+#define MLOCK_PAGES 1
+PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
+	TESTSCFLAG(Mlocked, mlocked)
+
+#else
+
+#define MLOCK_PAGES 0
+PAGEFLAG_FALSE(Mlocked)
+	SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
+
+PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
+	SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
+	__CLEARPAGEFLAG_NOOP(Unevictable)
+#endif
+
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 PAGEFLAG(Uncached, uncached)
 #else
@@ -326,15 +364,25 @@ static inline void __ClearPageTail(struct page *page)
 
 #endif /* !PAGEFLAGS_EXTENDED */
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+#define __PG_UNEVICTABLE	(1 << PG_unevictable)
+#define __PG_MLOCKED		(1 << PG_mlocked)
+#else
+#define __PG_UNEVICTABLE	0
+#define __PG_MLOCKED		0
+#endif
+
 #define PAGE_FLAGS	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
 			 1 << PG_buddy | 1 << PG_writeback | \
-			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active)
+			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
+			 __PG_UNEVICTABLE | __PG_MLOCKED)
 
 /*
  * Flags checked in bad_page().  Pages on the free list should not have
  * these flags set.  It they are, there is a problem.
  */
-#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | 1 << PG_reclaim | 1 << PG_dirty)
+#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
+		1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
 
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
@@ -347,7 +395,8 @@ static inline void __ClearPageTail(struct page *page)
  * Pages being prepped should not have these flags set.  It they are, there
  * is a problem.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | 1 << PG_reserved | 1 << PG_dirty)
+#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \
+		1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
 
 #endif /* !__GENERATING_BOUNDS_H */
 #endif	/* PAGE_FLAGS_H */
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
new file mode 100644
index 00000000000..0fd39f2231e
--- /dev/null
+++ b/include/linux/page_cgroup.h
@@ -0,0 +1,103 @@
+#ifndef __LINUX_PAGE_CGROUP_H
+#define __LINUX_PAGE_CGROUP_H
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#include <linux/bit_spinlock.h>
+/*
+ * Page Cgroup can be considered as an extended mem_map.
+ * A page_cgroup page is associated with every page descriptor. The
+ * page_cgroup helps us identify information about the cgroup
+ * All page cgroups are allocated at boot or memory hotplug event,
+ * then the page cgroup for pfn always exists.
+ */
+struct page_cgroup {
+	unsigned long flags;
+	struct mem_cgroup *mem_cgroup;
+	struct page *page;
+	struct list_head lru;		/* per cgroup LRU list */
+};
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat);
+void __init page_cgroup_init(void);
+struct page_cgroup *lookup_page_cgroup(struct page *page);
+
+enum {
+	/* flags for mem_cgroup */
+	PCG_LOCK,  /* page cgroup is locked */
+	PCG_CACHE, /* charged as cache */
+	PCG_USED, /* this object is in use. */
+	/* flags for LRU placement */
+	PCG_ACTIVE, /* page is active in this cgroup */
+	PCG_FILE, /* page is file system backed */
+	PCG_UNEVICTABLE, /* page is unevictableable */
+};
+
+#define TESTPCGFLAG(uname, lname)			\
+static inline int PageCgroup##uname(struct page_cgroup *pc)	\
+	{ return test_bit(PCG_##lname, &pc->flags); }
+
+#define SETPCGFLAG(uname, lname)			\
+static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
+	{ set_bit(PCG_##lname, &pc->flags);  }
+
+#define CLEARPCGFLAG(uname, lname)			\
+static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
+	{ clear_bit(PCG_##lname, &pc->flags);  }
+
+/* Cache flag is set only once (at allocation) */
+TESTPCGFLAG(Cache, CACHE)
+
+TESTPCGFLAG(Used, USED)
+CLEARPCGFLAG(Used, USED)
+
+/* LRU management flags (from global-lru definition) */
+TESTPCGFLAG(File, FILE)
+SETPCGFLAG(File, FILE)
+CLEARPCGFLAG(File, FILE)
+
+TESTPCGFLAG(Active, ACTIVE)
+SETPCGFLAG(Active, ACTIVE)
+CLEARPCGFLAG(Active, ACTIVE)
+
+TESTPCGFLAG(Unevictable, UNEVICTABLE)
+SETPCGFLAG(Unevictable, UNEVICTABLE)
+CLEARPCGFLAG(Unevictable, UNEVICTABLE)
+
+static inline int page_cgroup_nid(struct page_cgroup *pc)
+{
+	return page_to_nid(pc->page);
+}
+
+static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
+{
+	return page_zonenum(pc->page);
+}
+
+static inline void lock_page_cgroup(struct page_cgroup *pc)
+{
+	bit_spin_lock(PCG_LOCK, &pc->flags);
+}
+
+static inline int trylock_page_cgroup(struct page_cgroup *pc)
+{
+	return bit_spin_trylock(PCG_LOCK, &pc->flags);
+}
+
+static inline void unlock_page_cgroup(struct page_cgroup *pc)
+{
+	bit_spin_unlock(PCG_LOCK, &pc->flags);
+}
+
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+struct page_cgroup;
+
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+}
+
+static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+	return NULL;
+}
+#endif
+#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5da31c12101..709742be02f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -32,6 +32,34 @@ static inline void mapping_set_error(struct address_space *mapping, int error)
 	}
 }
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+#define AS_UNEVICTABLE	(__GFP_BITS_SHIFT + 2)	/* e.g., ramdisk, SHM_LOCK */
+
+static inline void mapping_set_unevictable(struct address_space *mapping)
+{
+	set_bit(AS_UNEVICTABLE, &mapping->flags);
+}
+
+static inline void mapping_clear_unevictable(struct address_space *mapping)
+{
+	clear_bit(AS_UNEVICTABLE, &mapping->flags);
+}
+
+static inline int mapping_unevictable(struct address_space *mapping)
+{
+	if (likely(mapping))
+		return test_bit(AS_UNEVICTABLE, &mapping->flags);
+	return !!mapping;
+}
+#else
+static inline void mapping_set_unevictable(struct address_space *mapping) { }
+static inline void mapping_clear_unevictable(struct address_space *mapping) { }
+static inline int mapping_unevictable(struct address_space *mapping)
+{
+	return 0;
+}
+#endif
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
 	return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
@@ -271,19 +299,19 @@ extern int __lock_page_killable(struct page *page);
 extern void __lock_page_nosync(struct page *page);
 extern void unlock_page(struct page *page);
 
-static inline void set_page_locked(struct page *page)
+static inline void __set_page_locked(struct page *page)
 {
-	set_bit(PG_locked, &page->flags);
+	__set_bit(PG_locked, &page->flags);
 }
 
-static inline void clear_page_locked(struct page *page)
+static inline void __clear_page_locked(struct page *page)
 {
-	clear_bit(PG_locked, &page->flags);
+	__clear_bit(PG_locked, &page->flags);
 }
 
 static inline int trylock_page(struct page *page)
 {
-	return !test_and_set_bit(PG_locked, &page->flags);
+	return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
 }
 
 /*
@@ -410,17 +438,17 @@ extern void __remove_from_page_cache(struct page *page);
 
 /*
  * Like add_to_page_cache_locked, but used to add newly allocated pages:
- * the page is new, so we can just run set_page_locked() against it.
+ * the page is new, so we can just run __set_page_locked() against it.
  */
 static inline int add_to_page_cache(struct page *page,
 		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
 {
 	int error;
 
-	set_page_locked(page);
+	__set_page_locked(page);
 	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
 	if (unlikely(error))
-		clear_page_locked(page);
+		__clear_page_locked(page);
 	return error;
 }
 
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 8eb7fa76c1d..e90a2cb0291 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -23,9 +23,9 @@ struct pagevec {
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);
-void __pagevec_lru_add(struct pagevec *pvec);
-void __pagevec_lru_add_active(struct pagevec *pvec);
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 void pagevec_strip(struct pagevec *pvec);
+void pagevec_swap_free(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -81,10 +81,36 @@ static inline void pagevec_free(struct pagevec *pvec)
 		__pagevec_free(pvec);
 }
 
-static inline void pagevec_lru_add(struct pagevec *pvec)
+static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
+{
+	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
+}
+
+static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
+{
+	____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
+}
+
+static inline void __pagevec_lru_add_file(struct pagevec *pvec)
+{
+	____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+}
+
+static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
+{
+	____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+}
+
+static inline void pagevec_lru_add_file(struct pagevec *pvec)
+{
+	if (pagevec_count(pvec))
+		__pagevec_lru_add_file(pvec);
+}
+
+static inline void pagevec_lru_add_anon(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
-		__pagevec_lru_add(pvec);
+		__pagevec_lru_add_anon(pvec);
 }
 
 #endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 98dc6243a70..085187be29c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -214,6 +214,7 @@ struct pci_dev {
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int 	msi_enabled:1;
 	unsigned int	msix_enabled:1;
+	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
 	unsigned int	is_pcie:1;
 	pci_dev_flags_t dev_flags;
@@ -347,7 +348,6 @@ struct pci_bus_region {
 struct pci_dynids {
 	spinlock_t lock;            /* protects list, index */
 	struct list_head list;      /* for IDs added at runtime */
-	unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */
 };
 
 /* ---------------------------------------------------------------- */
@@ -456,8 +456,8 @@ struct pci_driver {
 
 /**
  * PCI_VDEVICE - macro used to describe a specific pci device in short form
- * @vend: the vendor name
- * @dev: the 16 bit PCI Device ID
+ * @vendor: the vendor name
+ * @device: the 16 bit PCI Device ID
  *
  * This macro is used to create a struct pci_device_id that matches a
  * specific PCI device.  The subvendor, and subdevice fields will be set
@@ -631,6 +631,8 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
 
 /* ROM control related routines */
+int pci_enable_rom(struct pci_dev *pdev);
+void pci_disable_rom(struct pci_dev *pdev);
 void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);
 void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom);
 size_t pci_get_rom_size(void __iomem *rom, size_t size);
@@ -643,6 +645,7 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
 bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 void pci_pme_active(struct pci_dev *dev, bool enable);
 int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
+int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
@@ -723,7 +726,7 @@ enum pci_dma_burst_strategy {
 };
 
 struct msix_entry {
-	u16 	vector;	/* kernel uses to write allocated vector */
+	u32	vector;	/* kernel uses to write allocated vector */
 	u16	entry;	/* driver uses to specify entry, OS writes */
 };
 
@@ -1116,5 +1119,20 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
+#ifdef CONFIG_HAS_IOMEM
+static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
+{
+	/*
+	 * Make sure the BAR is actually a memory resource, not an IO resource
+	 */
+	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+		WARN_ON(1);
+		return NULL;
+	}
+	return ioremap_nocache(pci_resource_start(pdev, bar),
+				     pci_resource_len(pdev, bar));
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 8edddc240e4..e5d344bfcb7 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2454,9 +2454,9 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_3	0x3a1a
 #define PCI_DEVICE_ID_INTEL_ICH10_4	0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
-#define PCI_DEVICE_ID_INTEL_PCH_0	0x3b10
-#define PCI_DEVICE_ID_INTEL_PCH_1	0x3b11
-#define PCI_DEVICE_ID_INTEL_PCH_2	0x3b30
+#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN	0x3b00
+#define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX	0x3b1f
+#define PCI_DEVICE_ID_INTEL_PCH_SMBUS	0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 450684f7eaa..eb6686b88f9 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -419,6 +419,10 @@
 #define  PCI_EXP_RTCTL_CRSSVE	0x10	/* CRS Software Visibility Enable */
 #define PCI_EXP_RTCAP		30	/* Root Capabilities */
 #define PCI_EXP_RTSTA		32	/* Root Status */
+#define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
+#define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
+#define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
 
 /* Extended Capabilities (PCI-X 2.0 and Express) */
 #define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
@@ -429,6 +433,7 @@
 #define PCI_EXT_CAP_ID_VC	2
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
+#define PCI_EXT_CAP_ID_ARI	14
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -536,5 +541,14 @@
 #define HT_CAPTYPE_GEN3		0xD0	/* Generation 3 hypertransport configuration */
 #define HT_CAPTYPE_PM		0xE0	/* Hypertransport powermanagement configuration */
 
+/* Alternative Routing-ID Interpretation */
+#define PCI_ARI_CAP		0x04	/* ARI Capability Register */
+#define  PCI_ARI_CAP_MFVC	0x0001	/* MFVC Function Groups Capability */
+#define  PCI_ARI_CAP_ACS	0x0002	/* ACS Function Groups Capability */
+#define  PCI_ARI_CAP_NFN(x)	(((x) >> 8) & 0xff) /* Next Function Number */
+#define PCI_ARI_CTRL		0x06	/* ARI Control Register */
+#define  PCI_ARI_CTRL_MFVC	0x0001	/* MFVC Function Groups Enable */
+#define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
+#define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
 
 #endif /* LINUX_PCI_REGS_H */
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f30ad..a7c72135554 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -45,8 +45,6 @@ struct k_itimer {
 	int it_requeue_pending;		/* waiting to requeue this timer */
 #define REQUEUE_PENDING 1
 	int it_sigev_notify;		/* notify word of sigevent struct */
-	int it_sigev_signo;		/* signo word of sigevent struct */
-	sigval_t it_sigev_value;	/* value word of sigevent struct */
 	struct task_struct *it_process;	/* process to send signal to */
 	struct sigqueue *sigq;		/* signal queue entry. */
 	union {
@@ -115,4 +113,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 
 long clock_nanosleep_restart(struct restart_block *restart_block);
 
+void update_rlimit_cpu(unsigned long rlim_new);
+
 #endif
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index ea96ead1d39..f9348cba6dc 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -165,6 +165,12 @@ struct power_supply_info {
 extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
 
+#if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE)
+extern int power_supply_is_system_supplied(void);
+#else
+static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
+#endif
+
 extern int power_supply_register(struct device *parent,
 				 struct power_supply *psy);
 extern void power_supply_unregister(struct power_supply *psy);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index ea7416c901d..22641d5d45d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -94,7 +94,6 @@ extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
-extern void ptrace_untrace(struct task_struct *child);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
new file mode 100644
index 00000000000..536b0ca46a0
--- /dev/null
+++ b/include/linux/ring_buffer.h
@@ -0,0 +1,127 @@
+#ifndef _LINUX_RING_BUFFER_H
+#define _LINUX_RING_BUFFER_H
+
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+
+struct ring_buffer;
+struct ring_buffer_iter;
+
+/*
+ * Don't reference this struct directly, use functions below.
+ */
+struct ring_buffer_event {
+	u32		type:2, len:3, time_delta:27;
+	u32		array[];
+};
+
+/**
+ * enum ring_buffer_type - internal ring buffer types
+ *
+ * @RINGBUF_TYPE_PADDING:	Left over page padding
+ *				 array is ignored
+ *				 size is variable depending on how much
+ *				  padding is needed
+ *
+ * @RINGBUF_TYPE_TIME_EXTEND:	Extend the time delta
+ *				 array[0] = time delta (28 .. 59)
+ *				 size = 8 bytes
+ *
+ * @RINGBUF_TYPE_TIME_STAMP:	Sync time stamp with external clock
+ *				 array[0] = tv_nsec
+ *				 array[1] = tv_sec
+ *				 size = 16 bytes
+ *
+ * @RINGBUF_TYPE_DATA:		Data record
+ *				 If len is zero:
+ *				  array[0] holds the actual length
+ *				  array[1..(length+3)/4-1] holds data
+ *				 else
+ *				  length = len << 2
+ *				  array[0..(length+3)/4] holds data
+ */
+enum ring_buffer_type {
+	RINGBUF_TYPE_PADDING,
+	RINGBUF_TYPE_TIME_EXTEND,
+	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
+	RINGBUF_TYPE_TIME_STAMP,
+	RINGBUF_TYPE_DATA,
+};
+
+unsigned ring_buffer_event_length(struct ring_buffer_event *event);
+void *ring_buffer_event_data(struct ring_buffer_event *event);
+
+/**
+ * ring_buffer_event_time_delta - return the delta timestamp of the event
+ * @event: the event to get the delta timestamp of
+ *
+ * The delta timestamp is the 27 bit timestamp since the last event.
+ */
+static inline unsigned
+ring_buffer_event_time_delta(struct ring_buffer_event *event)
+{
+	return event->time_delta;
+}
+
+/*
+ * size is in bytes for each per CPU buffer.
+ */
+struct ring_buffer *
+ring_buffer_alloc(unsigned long size, unsigned flags);
+void ring_buffer_free(struct ring_buffer *buffer);
+
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
+
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+			 unsigned long length,
+			 unsigned long *flags);
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+			      struct ring_buffer_event *event,
+			      unsigned long flags);
+int ring_buffer_write(struct ring_buffer *buffer,
+		      unsigned long length, void *data);
+
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts);
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts);
+
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
+void ring_buffer_read_finish(struct ring_buffer_iter *iter);
+
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts);
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
+
+unsigned long ring_buffer_size(struct ring_buffer *buffer);
+
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
+void ring_buffer_reset(struct ring_buffer *buffer);
+
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+			 struct ring_buffer *buffer_b, int cpu);
+
+int ring_buffer_empty(struct ring_buffer *buffer);
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
+
+void ring_buffer_record_disable(struct ring_buffer *buffer);
+void ring_buffer_record_enable(struct ring_buffer *buffer);
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
+
+unsigned long ring_buffer_entries(struct ring_buffer *buffer);
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+
+u64 ring_buffer_time_stamp(int cpu);
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
+
+enum ring_buffer_flags {
+	RB_FL_OVERWRITE		= 1 << 0,
+};
+
+#endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index fed6f5e0b41..89f0564b10c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -39,18 +39,6 @@ struct anon_vma {
 
 #ifdef CONFIG_MMU
 
-extern struct kmem_cache *anon_vma_cachep;
-
-static inline struct anon_vma *anon_vma_alloc(void)
-{
-	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
-}
-
-static inline void anon_vma_free(struct anon_vma *anon_vma)
-{
-	kmem_cache_free(anon_vma_cachep, anon_vma);
-}
-
 static inline void anon_vma_lock(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
@@ -75,6 +63,9 @@ void anon_vma_unlink(struct vm_area_struct *);
 void anon_vma_link(struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 
+extern struct anon_vma *page_lock_anon_vma(struct page *page);
+extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
+
 /*
  * rmap interfaces called when adding or removing pte of page
  */
@@ -117,6 +108,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
  */
 int page_mkclean(struct page *);
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * called in munlock()/munmap() path to check for other vmas holding
+ * the page mlocked.
+ */
+int try_to_munlock(struct page *);
+#else
+static inline int try_to_munlock(struct page *page)
+{
+	return 0;	/* a.k.a. SWAP_SUCCESS */
+}
+#endif
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
@@ -140,5 +144,6 @@ static inline int page_mkclean(struct page *page)
 #define SWAP_SUCCESS	0
 #define SWAP_AGAIN	1
 #define SWAP_FAIL	2
+#define SWAP_MLOCK	3
 
 #endif	/* _LINUX_RMAP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c226c7b8294..5c38db536e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -403,12 +403,21 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_DUMP_MAPPED_PRIVATE	4
 #define MMF_DUMP_MAPPED_SHARED	5
 #define MMF_DUMP_ELF_HEADERS	6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED  8
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	5
+#define MMF_DUMP_FILTER_BITS	7
 #define MMF_DUMP_FILTER_MASK \
 	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 #define MMF_DUMP_FILTER_DEFAULT \
-	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED))
+	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
+	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF	0
+#endif
 
 struct sighand_struct {
 	atomic_t		count;
@@ -425,6 +434,39 @@ struct pacct_struct {
 	unsigned long		ac_minflt, ac_majflt;
 };
 
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime:		time spent in user mode, in &cputime_t units
+ * @stime:		time spent in kernel mode, in &cputime_t units
+ * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
+ *
+ * This structure groups together three kinds of CPU time that are
+ * tracked for threads and thread groups.  Most things considering
+ * CPU time want to group these counts together and treat all three
+ * of them in parallel.
+ */
+struct task_cputime {
+	cputime_t utime;
+	cputime_t stime;
+	unsigned long long sum_exec_runtime;
+};
+/* Alternate field names when used to cache expirations. */
+#define prof_exp	stime
+#define virt_exp	utime
+#define sched_exp	sum_exec_runtime
+
+/**
+ * struct thread_group_cputime - thread group interval timer counts
+ * @totals:		thread group interval timers; substructure for
+ *			uniprocessor kernel, per-cpu for SMP kernel.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU clock calculations.
+ */
+struct thread_group_cputime {
+	struct task_cputime *totals;
+};
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -470,6 +512,17 @@ struct signal_struct {
 	cputime_t it_prof_expires, it_virt_expires;
 	cputime_t it_prof_incr, it_virt_incr;
 
+	/*
+	 * Thread group totals for process CPU clocks.
+	 * See thread_group_cputime(), et al, for details.
+	 */
+	struct thread_group_cputime cputime;
+
+	/* Earliest-expiration cache. */
+	struct task_cputime cputime_expires;
+
+	struct list_head cpu_timers[3];
+
 	/* job control IDs */
 
 	/*
@@ -500,7 +553,7 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
-	cputime_t utime, stime, cutime, cstime;
+	cputime_t cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -509,14 +562,6 @@ struct signal_struct {
 	struct task_io_accounting ioac;
 
 	/*
-	 * Cumulative ns of scheduled CPU time for dead threads in the
-	 * group, not including a zombie group leader.  (This only differs
-	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
-	 * other than jiffies.)
-	 */
-	unsigned long long sum_sched_runtime;
-
-	/*
 	 * We don't bother to synchronize most readers of this at all,
 	 * because there is no reader checking a limit that actually needs
 	 * to get both rlim_cur and rlim_max atomically, and either one
@@ -527,8 +572,6 @@ struct signal_struct {
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
 
-	struct list_head cpu_timers[3];
-
 	/* keep the process-shared keyrings here so that they do the right
 	 * thing in threads created with CLONE_THREAD */
 #ifdef CONFIG_KEYS
@@ -1137,8 +1180,7 @@ struct task_struct {
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
-  	cputime_t it_prof_expires, it_virt_expires;
-	unsigned long long it_sched_expires;
+	struct task_cputime cputime_expires;
 	struct list_head cpu_timers[3];
 
 /* process credentials */
@@ -1588,6 +1630,7 @@ extern unsigned long long cpu_clock(int cpu);
 
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
+extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
 
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
@@ -2085,6 +2128,30 @@ static inline int spin_needbreak(spinlock_t *lock)
 }
 
 /*
+ * Thread group CPU time accounting.
+ */
+
+extern int thread_group_cputime_alloc(struct task_struct *);
+extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+	sig->cputime.totals = NULL;
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+{
+	if (curr->signal->cputime.totals)
+		return 0;
+	return thread_group_cputime_alloc(curr);
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+	free_percpu(sig->cputime.totals);
+}
+
+/*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
  * This is required every time the blocked sigset_t changes.
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index a1783b229ef..dc50bcc282a 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -60,6 +60,19 @@ static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask)
 	return seq_bitmap(m, mask->bits, MAX_NUMNODES);
 }
 
+int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+		unsigned int nr_bits);
+
+static inline int seq_cpumask_list(struct seq_file *m, cpumask_t *mask)
+{
+	return seq_bitmap_list(m, mask->bits, NR_CPUS);
+}
+
+static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask)
+{
+	return seq_bitmap_list(m, mask->bits, MAX_NUMNODES);
+}
+
 int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
 int single_release(struct inode *, struct file *);
 void *__seq_open_private(struct file *, const struct seq_operations *, int);
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
new file mode 100644
index 00000000000..68e212ff9dd
--- /dev/null
+++ b/include/linux/sh_intc.h
@@ -0,0 +1,91 @@
+#ifndef __SH_INTC_H
+#define __SH_INTC_H
+
+typedef unsigned char intc_enum;
+
+struct intc_vect {
+	intc_enum enum_id;
+	unsigned short vect;
+};
+
+#define INTC_VECT(enum_id, vect) { enum_id, vect }
+#define INTC_IRQ(enum_id, irq) INTC_VECT(enum_id, irq2evt(irq))
+
+struct intc_group {
+	intc_enum enum_id;
+	intc_enum enum_ids[32];
+};
+
+#define INTC_GROUP(enum_id, ids...) { enum_id, { ids } }
+
+struct intc_mask_reg {
+	unsigned long set_reg, clr_reg, reg_width;
+	intc_enum enum_ids[32];
+#ifdef CONFIG_SMP
+	unsigned long smp;
+#endif
+};
+
+struct intc_prio_reg {
+	unsigned long set_reg, clr_reg, reg_width, field_width;
+	intc_enum enum_ids[16];
+#ifdef CONFIG_SMP
+	unsigned long smp;
+#endif
+};
+
+struct intc_sense_reg {
+	unsigned long reg, reg_width, field_width;
+	intc_enum enum_ids[16];
+};
+
+#ifdef CONFIG_SMP
+#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8)
+#else
+#define INTC_SMP(stride, nr)
+#endif
+
+struct intc_desc {
+	struct intc_vect *vectors;
+	unsigned int nr_vectors;
+	struct intc_group *groups;
+	unsigned int nr_groups;
+	struct intc_mask_reg *mask_regs;
+	unsigned int nr_mask_regs;
+	struct intc_prio_reg *prio_regs;
+	unsigned int nr_prio_regs;
+	struct intc_sense_reg *sense_regs;
+	unsigned int nr_sense_regs;
+	char *name;
+#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
+	struct intc_mask_reg *ack_regs;
+	unsigned int nr_ack_regs;
+#endif
+};
+
+#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a)
+#define DECLARE_INTC_DESC(symbol, chipname, vectors, groups,		\
+	mask_regs, prio_regs, sense_regs)				\
+struct intc_desc symbol __initdata = {					\
+	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
+	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
+	_INTC_ARRAY(sense_regs),					\
+	chipname,							\
+}
+
+#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
+#define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups,	\
+	mask_regs, prio_regs, sense_regs, ack_regs)			\
+struct intc_desc symbol __initdata = {					\
+	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
+	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
+	_INTC_ARRAY(sense_regs),					\
+	chipname,							\
+	_INTC_ARRAY(ack_regs),						\
+}
+#endif
+
+void __init register_intc_controller(struct intc_desc *desc);
+int intc_set_priority(unsigned int irq, unsigned int prio);
+
+#endif /* __SH_INTC_H */
diff --git a/include/linux/swab.h b/include/linux/swab.h
index 270d5c208a8..bbed279f3b3 100644
--- a/include/linux/swab.h
+++ b/include/linux/swab.h
@@ -47,8 +47,6 @@ static inline __attribute_const__ __u16 ___swab16(__u16 val)
 {
 #ifdef __arch_swab16
 	return __arch_swab16(val);
-#elif defined(__arch_swab16p)
-	return __arch_swab16p(&val);
 #else
 	return __const_swab16(val);
 #endif
@@ -58,8 +56,6 @@ static inline __attribute_const__ __u32 ___swab32(__u32 val)
 {
 #ifdef __arch_swab32
 	return __arch_swab32(val);
-#elif defined(__arch_swab32p)
-	return __arch_swab32p(&val);
 #else
 	return __const_swab32(val);
 #endif
@@ -69,8 +65,6 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val)
 {
 #ifdef __arch_swab64
 	return __arch_swab64(val);
-#elif defined(__arch_swab64p)
-	return __arch_swab64p(&val);
 #elif defined(__SWAB_64_THRU_32__)
 	__u32 h = val >> 32;
 	__u32 l = val & ((1ULL << 32) - 1);
@@ -84,8 +78,6 @@ static inline __attribute_const__ __u32 ___swahw32(__u32 val)
 {
 #ifdef __arch_swahw32
 	return __arch_swahw32(val);
-#elif defined(__arch_swahw32p)
-	return __arch_swahw32p(&val);
 #else
 	return __const_swahw32(val);
 #endif
@@ -95,8 +87,6 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
 {
 #ifdef __arch_swahb32
 	return __arch_swahb32(val);
-#elif defined(__arch_swahb32p)
-	return __arch_swahb32p(&val);
 #else
 	return __const_swahb32(val);
 #endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index de40f169a4e..a3af95b2cb6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -7,6 +7,7 @@
 #include <linux/list.h>
 #include <linux/memcontrol.h>
 #include <linux/sched.h>
+#include <linux/node.h>
 
 #include <asm/atomic.h>
 #include <asm/page.h>
@@ -171,8 +172,10 @@ extern unsigned int nr_free_pagecache_pages(void);
 
 
 /* linux/mm/swap.c */
-extern void lru_cache_add(struct page *);
-extern void lru_cache_add_active(struct page *);
+extern void __lru_cache_add(struct page *, enum lru_list lru);
+extern void lru_cache_add_lru(struct page *, enum lru_list lru);
+extern void lru_cache_add_active_or_unevictable(struct page *,
+					struct vm_area_struct *);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
@@ -180,12 +183,38 @@ extern int lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 
+extern void add_page_to_unevictable_list(struct page *page);
+
+/**
+ * lru_cache_add: add a page to the page lists
+ * @page: the page to add
+ */
+static inline void lru_cache_add_anon(struct page *page)
+{
+	__lru_cache_add(page, LRU_INACTIVE_ANON);
+}
+
+static inline void lru_cache_add_active_anon(struct page *page)
+{
+	__lru_cache_add(page, LRU_ACTIVE_ANON);
+}
+
+static inline void lru_cache_add_file(struct page *page)
+{
+	__lru_cache_add(page, LRU_INACTIVE_FILE);
+}
+
+static inline void lru_cache_add_active_file(struct page *page)
+{
+	__lru_cache_add(page, LRU_ACTIVE_FILE);
+}
+
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 							gfp_t gfp_mask);
-extern int __isolate_lru_page(struct page *page, int mode);
+extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
@@ -204,6 +233,34 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 }
 #endif
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+extern int page_evictable(struct page *page, struct vm_area_struct *vma);
+extern void scan_mapping_unevictable_pages(struct address_space *);
+
+extern unsigned long scan_unevictable_pages;
+extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+					void __user *, size_t *, loff_t *);
+extern int scan_unevictable_register_node(struct node *node);
+extern void scan_unevictable_unregister_node(struct node *node);
+#else
+static inline int page_evictable(struct page *page,
+						struct vm_area_struct *vma)
+{
+	return 1;
+}
+
+static inline void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+}
+
+static inline int scan_unevictable_register_node(struct node *node)
+{
+	return 0;
+}
+
+static inline void scan_unevictable_unregister_node(struct node *node) { }
+#endif
+
 extern int kswapd_run(int nid);
 
 #ifdef CONFIG_MMU
@@ -251,6 +308,7 @@ extern sector_t swapdev_block(int, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
+extern int remove_exclusive_swap_page_ref(struct page *);
 struct backing_dev_info;
 
 /* linux/mm/thrash.c */
@@ -339,6 +397,11 @@ static inline int remove_exclusive_swap_page(struct page *p)
 	return 0;
 }
 
+static inline int remove_exclusive_swap_page_ref(struct page *page)
+{
+	return 0;
+}
+
 static inline swp_entry_t get_swap_page(void)
 {
 	swp_entry_t entry;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b330e289d71..9d68fed50f1 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -21,8 +21,9 @@ struct kobject;
 struct module;
 
 /* FIXME
- * The *owner field is no longer used, but leave around
- * until the tree gets cleaned up fully.
+ * The *owner field is no longer used.
+ * x86 tree has been cleaned up. The owner
+ * attribute is still left for other arches.
  */
 struct attribute {
 	const char		*name;
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 98921a3e1aa..b6ec8189ac0 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -96,9 +96,11 @@ extern cpumask_t *tick_get_broadcast_oneshot_mask(void);
 extern void tick_clock_notify(void);
 extern int tick_check_oneshot_change(int allow_nohz);
 extern struct tick_sched *tick_get_tick_sched(int cpu);
+extern void tick_check_idle(int cpu);
 # else
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+static inline void tick_check_idle(int cpu) { }
 # endif
 
 #else /* CONFIG_GENERIC_CLOCKEVENTS */
@@ -106,26 +108,23 @@ static inline void tick_init(void) { }
 static inline void tick_cancel_sched_timer(int cpu) { }
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+static inline void tick_check_idle(int cpu) { }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
 extern void tick_nohz_stop_sched_tick(int inidle);
 extern void tick_nohz_restart_sched_tick(void);
-extern void tick_nohz_update_jiffies(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
-extern void tick_nohz_stop_idle(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 # else
 static inline void tick_nohz_stop_sched_tick(int inidle) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
-static inline void tick_nohz_update_jiffies(void) { }
 static inline ktime_t tick_nohz_get_sleep_length(void)
 {
 	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
 
 	return len;
 }
-static inline void tick_nohz_stop_idle(int cpu) { }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 # endif /* !NO_HZ */
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 51e883df0fa..4f1c9db5770 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -119,6 +119,7 @@ extern int do_setitimer(int which, struct itimerval *value,
 extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
 extern void getnstimeofday(struct timespec *tv);
+extern void getrawmonotonic(struct timespec *ts);
 extern void getboottime(struct timespec *ts);
 extern void monotonic_to_bootbased(struct timespec *ts);
 
@@ -127,6 +128,9 @@ extern int timekeeping_valid_for_hres(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
 
+struct tms;
+extern void do_sys_times(struct tms *);
+
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:		pointer to the timespec variable to be converted
@@ -216,6 +220,7 @@ struct itimerval {
 #define CLOCK_MONOTONIC			1
 #define CLOCK_PROCESS_CPUTIME_ID	2
 #define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
 
 /*
  * The IDs of various hardware clocks:
diff --git a/include/linux/timex.h b/include/linux/timex.h
index fc6035d29d5..9007313b5b7 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -82,7 +82,7 @@
  */
 #define SHIFT_USEC 16		/* frequency offset scale (shift) */
 #define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
-#define PPM_SCALE_INV_SHIFT 20
+#define PPM_SCALE_INV_SHIFT 19
 #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
 		       PPM_SCALE + 1)
 
@@ -141,8 +141,15 @@ struct timex {
 #define ADJ_MICRO		0x1000	/* select microsecond resolution */
 #define ADJ_NANO		0x2000	/* select nanosecond resolution */
 #define ADJ_TICK		0x4000	/* tick value */
+
+#ifdef __KERNEL__
+#define ADJ_ADJTIME		0x8000	/* switch between adjtime/adjtimex modes */
+#define ADJ_OFFSET_SINGLESHOT	0x0001	/* old-fashioned adjtime */
+#define ADJ_OFFSET_READONLY	0x2000	/* read-only adjtime */
+#else
 #define ADJ_OFFSET_SINGLESHOT	0x8001	/* old-fashioned adjtime */
-#define ADJ_OFFSET_SS_READ	0xa001  /* read-only adjtime */
+#define ADJ_OFFSET_SS_READ	0xa001	/* read-only adjtime */
+#endif
 
 /* xntp 3.4 compatibility names */
 #define MOD_OFFSET	ADJ_OFFSET
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
new file mode 100644
index 00000000000..c5bb39c7a77
--- /dev/null
+++ b/include/linux/tracepoint.h
@@ -0,0 +1,137 @@
+#ifndef _LINUX_TRACEPOINT_H
+#define _LINUX_TRACEPOINT_H
+
+/*
+ * Kernel Tracepoint API.
+ *
+ * See Documentation/tracepoint.txt.
+ *
+ * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Heavily inspired from the Linux Kernel Markers.
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+
+struct module;
+struct tracepoint;
+
+struct tracepoint {
+	const char *name;		/* Tracepoint name */
+	int state;			/* State. */
+	void **funcs;
+} __attribute__((aligned(8)));
+
+
+#define TPPROTO(args...)	args
+#define TPARGS(args...)		args
+
+#ifdef CONFIG_TRACEPOINTS
+
+/*
+ * it_func[0] is never NULL because there is at least one element in the array
+ * when the array itself is non NULL.
+ */
+#define __DO_TRACE(tp, proto, args)					\
+	do {								\
+		void **it_func;						\
+									\
+		rcu_read_lock_sched();					\
+		it_func = rcu_dereference((tp)->funcs);			\
+		if (it_func) {						\
+			do {						\
+				((void(*)(proto))(*it_func))(args);	\
+			} while (*(++it_func));				\
+		}							\
+		rcu_read_unlock_sched();				\
+	} while (0)
+
+/*
+ * Make sure the alignment of the structure in the __tracepoints section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define DEFINE_TRACE(name, proto, args)					\
+	static inline void trace_##name(proto)				\
+	{								\
+		static const char __tpstrtab_##name[]			\
+		__attribute__((section("__tracepoints_strings")))	\
+		= #name ":" #proto;					\
+		static struct tracepoint __tracepoint_##name		\
+		__attribute__((section("__tracepoints"), aligned(8))) =	\
+		{ __tpstrtab_##name, 0, NULL };				\
+		if (unlikely(__tracepoint_##name.state))		\
+			__DO_TRACE(&__tracepoint_##name,		\
+				TPPROTO(proto), TPARGS(args));		\
+	}								\
+	static inline int register_trace_##name(void (*probe)(proto))	\
+	{								\
+		return tracepoint_probe_register(#name ":" #proto,	\
+			(void *)probe);					\
+	}								\
+	static inline void unregister_trace_##name(void (*probe)(proto))\
+	{								\
+		tracepoint_probe_unregister(#name ":" #proto,		\
+			(void *)probe);					\
+	}
+
+extern void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end);
+
+#else /* !CONFIG_TRACEPOINTS */
+#define DEFINE_TRACE(name, proto, args)			\
+	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
+	{ }								\
+	static inline void trace_##name(proto)				\
+	{ }								\
+	static inline int register_trace_##name(void (*probe)(proto))	\
+	{								\
+		return -ENOSYS;						\
+	}								\
+	static inline void unregister_trace_##name(void (*probe)(proto))\
+	{ }
+
+static inline void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{ }
+#endif /* CONFIG_TRACEPOINTS */
+
+/*
+ * Connect a probe to a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_register(const char *name, void *probe);
+
+/*
+ * Disconnect a probe from a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_unregister(const char *name, void *probe);
+
+struct tracepoint_iter {
+	struct module *module;
+	struct tracepoint *tracepoint;
+};
+
+extern void tracepoint_iter_start(struct tracepoint_iter *iter);
+extern void tracepoint_iter_next(struct tracepoint_iter *iter);
+extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
+extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
+extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end);
+
+/*
+ * tracepoint_synchronize_unregister must be called between the last tracepoint
+ * probe unregistration and the end of module exit to make sure there is no
+ * caller executing a probe when it is freed.
+ */
+static inline void tracepoint_synchronize_unregister(void)
+{
+	synchronize_sched();
+}
+
+#endif
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 328eb402272..4c28c4d564e 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -2,6 +2,7 @@
 #define _LINUX_VMALLOC_H
 
 #include <linux/spinlock.h>
+#include <linux/init.h>
 #include <asm/page.h>		/* pgprot_t */
 
 struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
@@ -23,7 +24,6 @@ struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
 #endif
 
 struct vm_struct {
-	/* keep next,addr,size together to speedup lookups */
 	struct vm_struct	*next;
 	void			*addr;
 	unsigned long		size;
@@ -37,6 +37,19 @@ struct vm_struct {
 /*
  *	Highlevel APIs for driver use
  */
+extern void vm_unmap_ram(const void *mem, unsigned int count);
+extern void *vm_map_ram(struct page **pages, unsigned int count,
+				int node, pgprot_t prot);
+extern void vm_unmap_aliases(void);
+
+#ifdef CONFIG_MMU
+extern void __init vmalloc_init(void);
+#else
+static inline void vmalloc_init(void)
+{
+}
+#endif
+
 extern void *vmalloc(unsigned long size);
 extern void *vmalloc_user(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 58334d43951..9cd3ab0f554 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -41,6 +41,16 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_HUGETLB_PAGE
 		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+		UNEVICTABLE_PGCULLED,	/* culled to noreclaim list */
+		UNEVICTABLE_PGSCANNED,	/* scanned for reclaimability */
+		UNEVICTABLE_PGRESCUED,	/* rescued from noreclaim list */
+		UNEVICTABLE_PGMLOCKED,
+		UNEVICTABLE_PGMUNLOCKED,
+		UNEVICTABLE_PGCLEARED,	/* on COW, page truncate */
+		UNEVICTABLE_PGSTRANDED,	/* unable to isolate on unlock */
+		UNEVICTABLE_MLOCKFREED,
+#endif
 		NR_VM_EVENT_ITEMS
 };
 
@@ -159,6 +169,16 @@ static inline unsigned long zone_page_state(struct zone *zone,
 	return x;
 }
 
+extern unsigned long global_lru_pages(void);
+
+static inline unsigned long zone_lru_pages(struct zone *zone)
+{
+	return (zone_page_state(zone, NR_ACTIVE_ANON)
+		+ zone_page_state(zone, NR_ACTIVE_FILE)
+		+ zone_page_state(zone, NR_INACTIVE_ANON)
+		+ zone_page_state(zone, NR_INACTIVE_FILE));
+}
+
 #ifdef CONFIG_NUMA
 /*
  * Determine the per node value of a stat item. This function
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index fb163e2e0de..d2c60c73619 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -27,8 +27,6 @@
 #ifndef NET_9P_H
 #define NET_9P_H
 
-#ifdef CONFIG_NET_9P_DEBUG
-
 /**
  * enum p9_debug_flags - bits for mount time debug parameter
  * @P9_DEBUG_ERROR: more verbose error messages including original error string
@@ -39,6 +37,7 @@
  * @P9_DEBUG_TRANS: transport tracing
  * @P9_DEBUG_SLABS: memory management tracing
  * @P9_DEBUG_FCALL: verbose dump of protocol messages
+ * @P9_DEBUG_FID: fid allocation/deallocation tracking
  *
  * These flags are passed at mount time to turn on various levels of
  * verbosity and tracing which will be output to the system logs.
@@ -53,24 +52,27 @@ enum p9_debug_flags {
 	P9_DEBUG_TRANS =	(1<<6),
 	P9_DEBUG_SLABS =      	(1<<7),
 	P9_DEBUG_FCALL =	(1<<8),
+	P9_DEBUG_FID =		(1<<9),
+	P9_DEBUG_PKT =		(1<<10),
 };
 
 extern unsigned int p9_debug_level;
 
+#ifdef CONFIG_NET_9P_DEBUG
 #define P9_DPRINTK(level, format, arg...) \
 do {  \
-	if ((p9_debug_level & level) == level) \
-		printk(KERN_NOTICE "-- %s (%d): " \
-		format , __func__, task_pid_nr(current) , ## arg); \
+	if ((p9_debug_level & level) == level) {\
+		if (level == P9_DEBUG_9P) \
+			printk(KERN_NOTICE "(%8.8d) " \
+			format , task_pid_nr(current) , ## arg); \
+		else \
+			printk(KERN_NOTICE "-- %s (%d): " \
+			format , __func__, task_pid_nr(current) , ## arg); \
+	} \
 } while (0)
 
-#define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR,   \
-	"%s: %.*s\n", s, fcall?fcall->params.rerror.error.len:0, \
-	fcall?fcall->params.rerror.error.str:"");
-
 #else
 #define P9_DPRINTK(level, format, arg...)  do { } while (0)
-#define PRINT_FCALL_ERROR(s, fcall) do { } while (0)
 #endif
 
 #define P9_EPRINTK(level, format, arg...) \
@@ -325,33 +327,6 @@ struct p9_qid {
  * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat
  */
 
-struct p9_stat {
-	u16 size;
-	u16 type;
-	u32 dev;
-	struct p9_qid qid;
-	u32 mode;
-	u32 atime;
-	u32 mtime;
-	u64 length;
-	struct p9_str name;
-	struct p9_str uid;
-	struct p9_str gid;
-	struct p9_str muid;
-	struct p9_str extension;	/* 9p2000.u extensions */
-	u32 n_uid;			/* 9p2000.u extensions */
-	u32 n_gid;			/* 9p2000.u extensions */
-	u32 n_muid;			/* 9p2000.u extensions */
-};
-
-/*
- * file metadata (stat) structure used to create Twstat message
- * The is identical to &p9_stat, but the strings don't point to
- * the same memory block and should be freed separately
- *
- * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat
- */
-
 struct p9_wstat {
 	u16 size;
 	u16 type;
@@ -493,12 +468,12 @@ struct p9_tstat {
 };
 
 struct p9_rstat {
-	struct p9_stat stat;
+	struct p9_wstat stat;
 };
 
 struct p9_twstat {
 	u32 fid;
-	struct p9_stat stat;
+	struct p9_wstat stat;
 };
 
 struct p9_rwstat {
@@ -509,8 +484,9 @@ struct p9_rwstat {
  * @size: prefixed length of the structure
  * @id: protocol operating identifier of type &p9_msg_t
  * @tag: transaction id of the request
+ * @offset: used by marshalling routines to track currentposition in buffer
+ * @capacity: used by marshalling routines to track total capacity
  * @sdata: payload
- * @params: per-operation parameters
  *
  * &p9_fcall represents the structure for all 9P RPC
  * transactions.  Requests are packaged into fcalls, and reponses
@@ -523,68 +499,15 @@ struct p9_fcall {
 	u32 size;
 	u8 id;
 	u16 tag;
-	void *sdata;
-
-	union {
-		struct p9_tversion tversion;
-		struct p9_rversion rversion;
-		struct p9_tauth tauth;
-		struct p9_rauth rauth;
-		struct p9_rerror rerror;
-		struct p9_tflush tflush;
-		struct p9_rflush rflush;
-		struct p9_tattach tattach;
-		struct p9_rattach rattach;
-		struct p9_twalk twalk;
-		struct p9_rwalk rwalk;
-		struct p9_topen topen;
-		struct p9_ropen ropen;
-		struct p9_tcreate tcreate;
-		struct p9_rcreate rcreate;
-		struct p9_tread tread;
-		struct p9_rread rread;
-		struct p9_twrite twrite;
-		struct p9_rwrite rwrite;
-		struct p9_tclunk tclunk;
-		struct p9_rclunk rclunk;
-		struct p9_tremove tremove;
-		struct p9_rremove rremove;
-		struct p9_tstat tstat;
-		struct p9_rstat rstat;
-		struct p9_twstat twstat;
-		struct p9_rwstat rwstat;
-	} params;
+
+	size_t offset;
+	size_t capacity;
+
+	uint8_t *sdata;
 };
 
 struct p9_idpool;
 
-int p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat,
-	int dotu);
-int p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *fc, int dotu);
-void p9_set_tag(struct p9_fcall *fc, u16 tag);
-struct p9_fcall *p9_create_tversion(u32 msize, char *version);
-struct p9_fcall *p9_create_tattach(u32 fid, u32 afid, char *uname,
-	char *aname, u32 n_uname, int dotu);
-struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname,
-	u32 n_uname, int dotu);
-struct p9_fcall *p9_create_tflush(u16 oldtag);
-struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname,
-	char **wnames);
-struct p9_fcall *p9_create_topen(u32 fid, u8 mode);
-struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-	char *extension, int dotu);
-struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count);
-struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count,
-	const char *data);
-struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count,
-	const char __user *data);
-struct p9_fcall *p9_create_tclunk(u32 fid);
-struct p9_fcall *p9_create_tremove(u32 fid);
-struct p9_fcall *p9_create_tstat(u32 fid);
-struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat,
-	int dotu);
-
-int p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int dotu);
 int p9_errstr2errno(char *errstr, int len);
 
 struct p9_idpool *p9_idpool_create(void);
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c936dd14de4..1f17f3d9372 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -26,6 +26,87 @@
 #ifndef NET_9P_CLIENT_H
 #define NET_9P_CLIENT_H
 
+/* Number of requests per row */
+#define P9_ROW_MAXTAG 255
+
+/**
+ * enum p9_trans_status - different states of underlying transports
+ * @Connected: transport is connected and healthy
+ * @Disconnected: transport has been disconnected
+ * @Hung: transport is connected by wedged
+ *
+ * This enumeration details the various states a transport
+ * instatiation can be in.
+ */
+
+enum p9_trans_status {
+	Connected,
+	Disconnected,
+	Hung,
+};
+
+/**
+ * enum p9_req_status_t - virtio request status
+ * @REQ_STATUS_IDLE: request slot unused
+ * @REQ_STATUS_ALLOC: request has been allocated but not sent
+ * @REQ_STATUS_UNSENT: request waiting to be sent
+ * @REQ_STATUS_SENT: request sent to server
+ * @REQ_STATUS_FLSH: a flush has been sent for this request
+ * @REQ_STATUS_RCVD: response received from server
+ * @REQ_STATUS_FLSHD: request has been flushed
+ * @REQ_STATUS_ERROR: request encountered an error on the client side
+ *
+ * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
+ * but use is actually tracked by the idpool structure which handles tag
+ * id allocation.
+ *
+ */
+
+enum p9_req_status_t {
+	REQ_STATUS_IDLE,
+	REQ_STATUS_ALLOC,
+	REQ_STATUS_UNSENT,
+	REQ_STATUS_SENT,
+	REQ_STATUS_FLSH,
+	REQ_STATUS_RCVD,
+	REQ_STATUS_FLSHD,
+	REQ_STATUS_ERROR,
+};
+
+/**
+ * struct p9_req_t - request slots
+ * @status: status of this request slot
+ * @t_err: transport error
+ * @flush_tag: tag of request being flushed (for flush requests)
+ * @wq: wait_queue for the client to block on for this request
+ * @tc: the request fcall structure
+ * @rc: the response fcall structure
+ * @aux: transport specific data (provided for trans_fd migration)
+ * @req_list: link for higher level objects to chain requests
+ *
+ * Transport use an array to track outstanding requests
+ * instead of a list.  While this may incurr overhead during initial
+ * allocation or expansion, it makes request lookup much easier as the
+ * tag id is a index into an array.  (We use tag+1 so that we can accomodate
+ * the -1 tag for the T_VERSION request).
+ * This also has the nice effect of only having to allocate wait_queues
+ * once, instead of constantly allocating and freeing them.  Its possible
+ * other resources could benefit from this scheme as well.
+ *
+ */
+
+struct p9_req_t {
+	int status;
+	int t_err;
+	u16 flush_tag;
+	wait_queue_head_t *wq;
+	struct p9_fcall *tc;
+	struct p9_fcall *rc;
+	void *aux;
+
+	struct list_head req_list;
+};
+
 /**
  * struct p9_client - per client instance state
  * @lock: protect @fidlist
@@ -36,9 +117,20 @@
  * @conn: connection state information used by trans_fd
  * @fidpool: fid handle accounting for session
  * @fidlist: List of active fid handles
+ * @tagpool - transaction id accounting for session
+ * @reqs - 2D array of requests
+ * @max_tag - current maximum tag id allocated
  *
  * The client structure is used to keep track of various per-client
  * state that has been instantiated.
+ * In order to minimize per-transaction overhead we use a
+ * simple array to lookup requests instead of a hash table
+ * or linked list.  In order to support larger number of
+ * transactions, we make this a 2D array, allocating new rows
+ * when we need to grow the total number of the transactions.
+ *
+ * Each row is 256 requests and we'll support up to 256 rows for
+ * a total of 64k concurrent requests per session.
  *
  * Bugs: duplicated data and potentially unnecessary elements.
  */
@@ -48,11 +140,16 @@ struct p9_client {
 	int msize;
 	unsigned char dotu;
 	struct p9_trans_module *trans_mod;
-	struct p9_trans *trans;
+	enum p9_trans_status status;
+	void *trans;
 	struct p9_conn *conn;
 
 	struct p9_idpool *fidpool;
 	struct list_head fidlist;
+
+	struct p9_idpool *tagpool;
+	struct p9_req_t *reqs[P9_ROW_MAXTAG];
+	int max_tag;
 };
 
 /**
@@ -65,8 +162,6 @@ struct p9_client {
  * @uid: the numeric uid of the local user who owns this handle
  * @aux: transport specific information (unused?)
  * @rdir_fpos: tracks offset of file position when reading directory contents
- * @rdir_pos: (unused?)
- * @rdir_fcall: holds response of last directory read request
  * @flist: per-client-instance fid tracking
  * @dlist: per-dentry fid tracking
  *
@@ -83,8 +178,6 @@ struct p9_fid {
 	void *aux;
 
 	int rdir_fpos;
-	int rdir_pos;
-	struct p9_fcall *rdir_fcall;
 	struct list_head flist;
 	struct list_head dlist;	/* list of all fids attached to a dentry */
 };
@@ -103,15 +196,18 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 							char *extension);
 int p9_client_clunk(struct p9_fid *fid);
 int p9_client_remove(struct p9_fid *fid);
-int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count);
-int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count);
-int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count);
-int p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset,
-								u32 count);
-int p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
-								u32 count);
-struct p9_stat *p9_client_stat(struct p9_fid *fid);
+int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
+							u64 offset, u32 count);
+int p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
+							u64 offset, u32 count);
+struct p9_wstat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
-struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset);
+
+struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
+
+int p9stat_read(char *, int, struct p9_wstat *, int);
+void p9stat_free(struct p9_wstat *);
+
 
 #endif /* NET_9P_CLIENT_H */
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 3ca737120a9..6d5886efb10 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,52 +26,6 @@
 #ifndef NET_9P_TRANSPORT_H
 #define NET_9P_TRANSPORT_H
 
-#include <linux/module.h>
-
-/**
- * enum p9_trans_status - different states of underlying transports
- * @Connected: transport is connected and healthy
- * @Disconnected: transport has been disconnected
- * @Hung: transport is connected by wedged
- *
- * This enumeration details the various states a transport
- * instatiation can be in.
- */
-
-enum p9_trans_status {
-	Connected,
-	Disconnected,
-	Hung,
-};
-
-/**
- * struct p9_trans - per-transport state and API
- * @status: transport &p9_trans_status
- * @msize: negotiated maximum packet size (duplicate from client)
- * @extended: negotiated protocol extensions (duplicate from client)
- * @priv: transport private data
- * @close: member function to disconnect and close the transport
- * @rpc: member function to issue a request to the transport
- *
- * This is the basic API for a transport instance.  It is used as
- * a handle by the client to issue requests.  This interface is currently
- * in flux during reorganization.
- *
- * Bugs: there is lots of duplicated data here and its not clear that
- * the member functions need to be per-instance versus per transport
- * module.
- */
-
-struct p9_trans {
-	enum p9_trans_status status;
-	int msize;
-	unsigned char extended;
-	void *priv;
-	void (*close) (struct p9_trans *);
-	int (*rpc) (struct p9_trans *t, struct p9_fcall *tc,
-							struct p9_fcall **rc);
-};
-
 /**
  * struct p9_trans_module - transport module interface
  * @list: used to maintain a list of currently available transports
@@ -79,12 +33,14 @@ struct p9_trans {
  * @maxsize: transport provided maximum packet size
  * @def: set if this transport should be considered the default
  * @create: member function to create a new connection on this transport
+ * @request: member function to issue a request to the transport
+ * @cancel: member function to cancel a request (if it hasn't been sent)
  *
  * This is the basic API for a transport module which is registered by the
  * transport module with the 9P core network module and used by the client
  * to instantiate a new connection on a transport.
  *
- * Bugs: the transport module list isn't protected.
+ * BUGS: the transport module list isn't protected.
  */
 
 struct p9_trans_module {
@@ -92,8 +48,11 @@ struct p9_trans_module {
 	char *name;		/* name of transport */
 	int maxsize;		/* max message size of transport */
 	int def;		/* this transport should be default */
-	struct p9_trans * (*create)(const char *, char *, int, unsigned char);
 	struct module *owner;
+	int (*create)(struct p9_client *, const char *, char *);
+	void (*close) (struct p9_client *);
+	int (*request) (struct p9_client *, struct p9_req_t *req);
+	int (*cancel) (struct p9_client *, struct p9_req_t *req);
 };
 
 void v9fs_register_trans(struct p9_trans_module *m);
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 0cb63ed2c1f..b8093971ccb 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -2,9 +2,9 @@
 #define __NETNS_X_TABLES_H
 
 #include <linux/list.h>
-#include <linux/net.h>
+#include <linux/netfilter.h>
 
 struct netns_xt {
-	struct list_head tables[NPROTO];
+	struct list_head tables[NFPROTO_NUMPROTO];
 };
 #endif
diff --git a/include/trace/sched.h b/include/trace/sched.h
new file mode 100644
index 00000000000..ad47369d01b
--- /dev/null
+++ b/include/trace/sched.h
@@ -0,0 +1,56 @@
+#ifndef _TRACE_SCHED_H
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+	TPPROTO(struct task_struct *t),
+		TPARGS(t));
+
+DEFINE_TRACE(sched_kthread_stop_ret,
+	TPPROTO(int ret),
+		TPARGS(ret));
+
+DEFINE_TRACE(sched_wait_task,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+		TPARGS(rq, p));
+
+DEFINE_TRACE(sched_wakeup,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+		TPARGS(rq, p));
+
+DEFINE_TRACE(sched_wakeup_new,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+		TPARGS(rq, p));
+
+DEFINE_TRACE(sched_switch,
+	TPPROTO(struct rq *rq, struct task_struct *prev,
+		struct task_struct *next),
+		TPARGS(rq, prev, next));
+
+DEFINE_TRACE(sched_migrate_task,
+	TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
+		TPARGS(rq, p, dest_cpu));
+
+DEFINE_TRACE(sched_process_free,
+	TPPROTO(struct task_struct *p),
+		TPARGS(p));
+
+DEFINE_TRACE(sched_process_exit,
+	TPPROTO(struct task_struct *p),
+		TPARGS(p));
+
+DEFINE_TRACE(sched_process_wait,
+	TPPROTO(struct pid *pid),
+		TPARGS(pid));
+
+DEFINE_TRACE(sched_process_fork,
+	TPPROTO(struct task_struct *parent, struct task_struct *child),
+		TPARGS(parent, child));
+
+DEFINE_TRACE(sched_signal_send,
+	TPPROTO(int sig, struct task_struct *p),
+		TPARGS(sig, p));
+
+#endif
diff --git a/arch/sh/include/asm/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
index 130102f663f..1a4bc6ada60 100644
--- a/arch/sh/include/asm/sh_mobile_lcdc.h
+++ b/include/video/sh_mobile_lcdc.h
@@ -28,6 +28,12 @@ enum { LCDC_CHAN_DISABLED = 0,
 
 enum { LCDC_CLK_BUS, LCDC_CLK_PERIPHERAL, LCDC_CLK_EXTERNAL };
 
+#define LCDC_FLAGS_DWPOL (1 << 0) /* Rising edge dot clock data latch */
+#define LCDC_FLAGS_DIPOL (1 << 1) /* Active low display enable polarity */
+#define LCDC_FLAGS_DAPOL (1 << 2) /* Active low display data polarity */
+#define LCDC_FLAGS_HSCNT (1 << 3) /* Disable HSYNC during VBLANK */
+#define LCDC_FLAGS_DWCNT (1 << 4) /* Disable dotclock during blanking */
+
 struct sh_mobile_lcdc_sys_bus_cfg {
 	unsigned long ldmt2r;
 	unsigned long ldmt3r;
@@ -57,6 +63,7 @@ struct sh_mobile_lcdc_chan_cfg {
 	int bpp;
 	int interface_type; /* selects RGBn or SYSn I/F, see above */
 	int clock_divider;
+	unsigned long flags; /* LCDC_FLAGS_... */
 	struct fb_videomode lcd_cfg;
 	struct sh_mobile_lcdc_lcd_size_cfg lcd_size_cfg;
 	struct sh_mobile_lcdc_board_cfg board_cfg;
@@ -64,7 +71,6 @@ struct sh_mobile_lcdc_chan_cfg {
 };
 
 struct sh_mobile_lcdc_info {
-	unsigned long lddckr;
 	int clock_source;
 	struct sh_mobile_lcdc_chan_cfg ch[2];
 };
diff --git a/init/Kconfig b/init/Kconfig
index 5ceff3249a2..113c74c07da 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -299,6 +299,13 @@ config CGROUP_NS
           for instance virtual servers and checkpoint/restart
           jobs.
 
+config CGROUP_FREEZER
+        bool "control group freezer subsystem"
+        depends on CGROUPS
+        help
+          Provides a way to freeze and unfreeze all tasks in a
+	  cgroup.
+
 config CGROUP_DEVICE
 	bool "Device controller for cgroups"
 	depends on CGROUPS && EXPERIMENTAL
@@ -730,6 +737,14 @@ config VM_EVENT_COUNTERS
 	  on EMBEDDED systems.  /proc/vmstat will only show page counts
 	  if VM event counters are disabled.
 
+config PCI_QUIRKS
+	default y
+	bool "Enable PCI quirk workarounds" if EMBEDDED && PCI
+	help
+	  This enables workarounds for various PCI chipset
+          bugs/quirks. Disable this only if your target machine is
+          unaffected by PCI quirks.
+
 config SLUB_DEBUG
 	default y
 	bool "Enable SLUB debugging support" if EMBEDDED
@@ -779,6 +794,13 @@ config PROFILING
 	  Say Y here to enable the extended profiling support mechanisms used
 	  by profilers such as OProfile.
 
+#
+# Place an empty function call at each tracepoint site. Can be
+# dynamically changed for a probe function.
+#
+config TRACEPOINTS
+	bool
+
 config MARKERS
 	bool "Activate markers"
 	help
diff --git a/init/main.c b/init/main.c
index 27f6bf6108e..3e17a3bafe6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -27,6 +27,7 @@
 #include <linux/gfp.h>
 #include <linux/percpu.h>
 #include <linux/kmod.h>
+#include <linux/vmalloc.h>
 #include <linux/kernel_stat.h>
 #include <linux/start_kernel.h>
 #include <linux/security.h>
@@ -60,6 +61,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
+#include <linux/ftrace.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -642,6 +644,7 @@ asmlinkage void __init start_kernel(void)
 		initrd_start = 0;
 	}
 #endif
+	vmalloc_init();
 	vfs_caches_init_early();
 	cpuset_init_early();
 	mem_init();
@@ -687,6 +690,8 @@ asmlinkage void __init start_kernel(void)
 
 	acpi_early_init(); /* before LAPIC and SMP init */
 
+	ftrace_init();
+
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }
@@ -703,30 +708,31 @@ __setup("initcall_debug", initcall_debug_setup);
 int do_one_initcall(initcall_t fn)
 {
 	int count = preempt_count();
-	ktime_t t0, t1, delta;
+	ktime_t delta;
 	char msgbuf[64];
-	int result;
+	struct boot_trace it;
 
 	if (initcall_debug) {
-		printk("calling  %pF @ %i\n", fn, task_pid_nr(current));
-		t0 = ktime_get();
+		it.caller = task_pid_nr(current);
+		printk("calling  %pF @ %i\n", fn, it.caller);
+		it.calltime = ktime_get();
 	}
 
-	result = fn();
+	it.result = fn();
 
 	if (initcall_debug) {
-		t1 = ktime_get();
-		delta = ktime_sub(t1, t0);
-
-		printk("initcall %pF returned %d after %Ld msecs\n",
-			fn, result,
-			(unsigned long long) delta.tv64 >> 20);
+		it.rettime = ktime_get();
+		delta = ktime_sub(it.rettime, it.calltime);
+		it.duration = (unsigned long long) delta.tv64 >> 10;
+		printk("initcall %pF returned %d after %Ld usecs\n", fn,
+			it.result, it.duration);
+		trace_boot(&it, fn);
 	}
 
 	msgbuf[0] = 0;
 
-	if (result && result != -ENODEV && initcall_debug)
-		sprintf(msgbuf, "error code %d ", result);
+	if (it.result && it.result != -ENODEV && initcall_debug)
+		sprintf(msgbuf, "error code %d ", it.result);
 
 	if (preempt_count() != count) {
 		strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -740,7 +746,7 @@ int do_one_initcall(initcall_t fn)
 		printk("initcall %pF returned with %s\n", fn, msgbuf);
 	}
 
-	return result;
+	return it.result;
 }
 
 
@@ -855,6 +861,7 @@ static int __init kernel_init(void * unused)
 	smp_prepare_cpus(setup_max_cpus);
 
 	do_pre_smp_initcalls();
+	start_boot_trace();
 
 	smp_init();
 	sched_init_smp();
@@ -881,6 +888,7 @@ static int __init kernel_init(void * unused)
 	 * we're essentially up and running. Get rid of the
 	 * initmem segments and start the user-mode stuff..
 	 */
+	stop_boot_trace();
 	init_post();
 	return 0;
 }
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 96fb36cd987..68eb857cfde 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -52,6 +52,14 @@
 #define HARD_MSGMAX 	(131072/sizeof(void*))
 #define DFLT_MSGSIZEMAX 8192	/* max message size */
 
+/*
+ * Define the ranges various user-specified maximum values can
+ * be set to.
+ */
+#define MIN_MSGMAX	1		/* min value for msg_max */
+#define MAX_MSGMAX	HARD_MSGMAX	/* max value for msg_max */
+#define MIN_MSGSIZEMAX	128		/* min value for msgsize_max */
+#define MAX_MSGSIZEMAX	(8192*128)	/* max value for msgsize_max */
 
 struct ext_wait_queue {		/* queue of sleeping tasks */
 	struct task_struct *task;
@@ -134,8 +142,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 			info->qsize = 0;
 			info->user = NULL;	/* set when all is ok */
 			memset(&info->attr, 0, sizeof(info->attr));
-			info->attr.mq_maxmsg = DFLT_MSGMAX;
-			info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
+			info->attr.mq_maxmsg = msg_max;
+			info->attr.mq_msgsize = msgsize_max;
 			if (attr) {
 				info->attr.mq_maxmsg = attr->mq_maxmsg;
 				info->attr.mq_msgsize = attr->mq_msgsize;
@@ -1191,11 +1199,11 @@ static struct file_system_type mqueue_fs_type = {
 	.kill_sb = kill_litter_super,
 };
 
-static int msg_max_limit_min = DFLT_MSGMAX;
-static int msg_max_limit_max = HARD_MSGMAX;
+static int msg_max_limit_min = MIN_MSGMAX;
+static int msg_max_limit_max = MAX_MSGMAX;
 
-static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX;
-static int msg_maxsize_limit_max = INT_MAX;
+static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
+static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
 
 static ctl_table mq_sysctls[] = {
 	{
diff --git a/ipc/shm.c b/ipc/shm.c
index e77ec698cf4..0add3fa5f54 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -737,6 +737,10 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 	{
+		struct file *uninitialized_var(shm_file);
+
+		lru_add_drain_all();  /* drain pagevecs to lru lists */
+
 		shp = shm_lock_check(ns, shmid);
 		if (IS_ERR(shp)) {
 			err = PTR_ERR(shp);
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
new file mode 100644
index 00000000000..a3bb4cb5253
--- /dev/null
+++ b/kernel/Kconfig.freezer
@@ -0,0 +1,2 @@
+config FREEZER
+	def_bool PM_SLEEP || CGROUP_FREEZER
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e..305f11dbef2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 
+obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o
@@ -83,6 +85,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FTRACE) += trace/
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c6e1c17e6d..046c1609606 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
 	struct cg_cgroup_link *link;
 	struct cg_cgroup_link *saved_link;
 
-	write_lock(&css_set_lock);
 	hlist_del(&cg->hlist);
 	css_set_count--;
 
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
 		list_del(&link->cgrp_link_list);
 		kfree(link);
 	}
-
-	write_unlock(&css_set_lock);
 }
 
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
 {
 	int i;
-	struct css_set *cg = container_of(k, struct css_set, ref);
-
+	/*
+	 * Ensure that the refcount doesn't hit zero while any readers
+	 * can see it. Similar to atomic_dec_and_lock(), but for an
+	 * rwlock
+	 */
+	if (atomic_add_unless(&cg->refcount, -1, 1))
+		return;
+	write_lock(&css_set_lock);
+	if (!atomic_dec_and_test(&cg->refcount)) {
+		write_unlock(&css_set_lock);
+		return;
+	}
 	unlink_css_set(cg);
+	write_unlock(&css_set_lock);
 
 	rcu_read_lock();
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
 	kfree(cg);
 }
 
-static void release_css_set(struct kref *k)
-{
-	__release_css_set(k, 0);
-}
-
-static void release_css_set_taskexit(struct kref *k)
-{
-	__release_css_set(k, 1);
-}
-
 /*
  * refcounted get/put for css_set objects
  */
 static inline void get_css_set(struct css_set *cg)
 {
-	kref_get(&cg->ref);
+	atomic_inc(&cg->refcount);
 }
 
 static inline void put_css_set(struct css_set *cg)
 {
-	kref_put(&cg->ref, release_css_set);
+	__put_css_set(cg, 0);
 }
 
 static inline void put_css_set_taskexit(struct css_set *cg)
 {
-	kref_put(&cg->ref, release_css_set_taskexit);
+	__put_css_set(cg, 1);
 }
 
 /*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
 		return NULL;
 	}
 
-	kref_init(&res->ref);
+	atomic_set(&res->refcount, 1);
 	INIT_LIST_HEAD(&res->cg_links);
 	INIT_LIST_HEAD(&res->tasks);
 	INIT_HLIST_NODE(&res->hlist);
@@ -870,6 +868,14 @@ static struct super_operations cgroup_ops = {
 	.remount_fs = cgroup_remount,
 };
 
+static void init_cgroup_housekeeping(struct cgroup *cgrp)
+{
+	INIT_LIST_HEAD(&cgrp->sibling);
+	INIT_LIST_HEAD(&cgrp->children);
+	INIT_LIST_HEAD(&cgrp->css_sets);
+	INIT_LIST_HEAD(&cgrp->release_list);
+	init_rwsem(&cgrp->pids_mutex);
+}
 static void init_cgroup_root(struct cgroupfs_root *root)
 {
 	struct cgroup *cgrp = &root->top_cgroup;
@@ -878,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
 	root->number_of_cgroups = 1;
 	cgrp->root = root;
 	cgrp->top_cgroup = cgrp;
-	INIT_LIST_HEAD(&cgrp->sibling);
-	INIT_LIST_HEAD(&cgrp->children);
-	INIT_LIST_HEAD(&cgrp->css_sets);
-	INIT_LIST_HEAD(&cgrp->release_list);
+	init_cgroup_housekeeping(cgrp);
 }
 
 static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1728,7 +1731,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
 
 	read_lock(&css_set_lock);
 	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-		count += atomic_read(&link->cg->ref.refcount);
+		count += atomic_read(&link->cg->refcount);
 	}
 	read_unlock(&css_set_lock);
 	return count;
@@ -1997,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  * but we cannot guarantee that the information we produce is correct
  * unless we produce it entirely atomically.
  *
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here).  The struct
- * ctr_struct * is stored in file->private_data.  Its resources will
- * be freed by release() when the file is closed.  The array is used
- * to sprintf the PIDs and then used by read().
  */
-struct ctr_struct {
-	char *buf;
-	int bufsz;
-};
 
 /*
  * Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2088,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
 	return *(pid_t *)a - *(pid_t *)b;
 }
 
+
 /*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
+ * seq_file methods for the "tasks" file. The seq_file position is the
+ * next pid to display; the seq_file iterator is a pointer to the pid
+ * in the cgroup->tasks_pids array.
  */
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+
+static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 {
-	int cnt = 0;
-	int i;
+	/*
+	 * Initially we receive a position value that corresponds to
+	 * one more than the last pid shown (or 0 on the first call or
+	 * after a seek to the start). Use a binary-search to find the
+	 * next pid to display, if any
+	 */
+	struct cgroup *cgrp = s->private;
+	int index = 0, pid = *pos;
+	int *iter;
 
-	for (i = 0; i < npids; i++)
-		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
-	return cnt;
+	down_read(&cgrp->pids_mutex);
+	if (pid) {
+		int end = cgrp->pids_length;
+		int i;
+		while (index < end) {
+			int mid = (index + end) / 2;
+			if (cgrp->tasks_pids[mid] == pid) {
+				index = mid;
+				break;
+			} else if (cgrp->tasks_pids[mid] <= pid)
+				index = mid + 1;
+			else
+				end = mid;
+		}
+	}
+	/* If we're off the end of the array, we're done */
+	if (index >= cgrp->pids_length)
+		return NULL;
+	/* Update the abstract position to be the actual pid that we found */
+	iter = cgrp->tasks_pids + index;
+	*pos = *iter;
+	return iter;
+}
+
+static void cgroup_tasks_stop(struct seq_file *s, void *v)
+{
+	struct cgroup *cgrp = s->private;
+	up_read(&cgrp->pids_mutex);
 }
 
+static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct cgroup *cgrp = s->private;
+	int *p = v;
+	int *end = cgrp->tasks_pids + cgrp->pids_length;
+
+	/*
+	 * Advance to the next pid in the array. If this goes off the
+	 * end, we're done
+	 */
+	p++;
+	if (p >= end) {
+		return NULL;
+	} else {
+		*pos = *p;
+		return p;
+	}
+}
+
+static int cgroup_tasks_show(struct seq_file *s, void *v)
+{
+	return seq_printf(s, "%d\n", *(int *)v);
+}
+
+static struct seq_operations cgroup_tasks_seq_operations = {
+	.start = cgroup_tasks_start,
+	.stop = cgroup_tasks_stop,
+	.next = cgroup_tasks_next,
+	.show = cgroup_tasks_show,
+};
+
+static void release_cgroup_pid_array(struct cgroup *cgrp)
+{
+	down_write(&cgrp->pids_mutex);
+	BUG_ON(!cgrp->pids_use_count);
+	if (!--cgrp->pids_use_count) {
+		kfree(cgrp->tasks_pids);
+		cgrp->tasks_pids = NULL;
+		cgrp->pids_length = 0;
+	}
+	up_write(&cgrp->pids_mutex);
+}
+
+static int cgroup_tasks_release(struct inode *inode, struct file *file)
+{
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+	if (!(file->f_mode & FMODE_READ))
+		return 0;
+
+	release_cgroup_pid_array(cgrp);
+	return seq_release(inode, file);
+}
+
+static struct file_operations cgroup_tasks_operations = {
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = cgroup_file_write,
+	.release = cgroup_tasks_release,
+};
+
 /*
- * Handle an open on 'tasks' file.  Prepare a buffer listing the
+ * Handle an open on 'tasks' file.  Prepare an array containing the
  * process id's of tasks currently attached to the cgroup being opened.
- *
- * Does not require any specific cgroup mutexes, and does not take any.
  */
+
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	struct ctr_struct *ctr;
 	pid_t *pidarray;
 	int npids;
-	char c;
+	int retval;
 
+	/* Nothing to do for write-only files */
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
 
-	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
-	if (!ctr)
-		goto err0;
-
 	/*
 	 * If cgroup gets more users after we read count, we won't have
 	 * enough space - tough.  This race is indistinguishable to the
@@ -2131,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
 	 * show up until sometime later on.
 	 */
 	npids = cgroup_task_count(cgrp);
-	if (npids) {
-		pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-		if (!pidarray)
-			goto err1;
-
-		npids = pid_array_load(pidarray, npids, cgrp);
-		sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-		/* Call pid_array_to_buf() twice, first just to get bufsz */
-		ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
-		ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
-		if (!ctr->buf)
-			goto err2;
-		ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
-		kfree(pidarray);
-	} else {
-		ctr->buf = NULL;
-		ctr->bufsz = 0;
-	}
-	file->private_data = ctr;
-	return 0;
-
-err2:
-	kfree(pidarray);
-err1:
-	kfree(ctr);
-err0:
-	return -ENOMEM;
-}
+	pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+	if (!pidarray)
+		return -ENOMEM;
+	npids = pid_array_load(pidarray, npids, cgrp);
+	sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
 
-static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
-				    struct cftype *cft,
-				    struct file *file, char __user *buf,
-				    size_t nbytes, loff_t *ppos)
-{
-	struct ctr_struct *ctr = file->private_data;
+	/*
+	 * Store the array in the cgroup, freeing the old
+	 * array if necessary
+	 */
+	down_write(&cgrp->pids_mutex);
+	kfree(cgrp->tasks_pids);
+	cgrp->tasks_pids = pidarray;
+	cgrp->pids_length = npids;
+	cgrp->pids_use_count++;
+	up_write(&cgrp->pids_mutex);
 
-	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
+	file->f_op = &cgroup_tasks_operations;
 
-static int cgroup_tasks_release(struct inode *unused_inode,
-					struct file *file)
-{
-	struct ctr_struct *ctr;
-
-	if (file->f_mode & FMODE_READ) {
-		ctr = file->private_data;
-		kfree(ctr->buf);
-		kfree(ctr);
+	retval = seq_open(file, &cgroup_tasks_seq_operations);
+	if (retval) {
+		release_cgroup_pid_array(cgrp);
+		return retval;
 	}
+	((struct seq_file *)file->private_data)->private = cgrp;
 	return 0;
 }
 
@@ -2210,7 +2268,6 @@ static struct cftype files[] = {
 	{
 		.name = "tasks",
 		.open = cgroup_tasks_open,
-		.read = cgroup_tasks_read,
 		.write_u64 = cgroup_tasks_write,
 		.release = cgroup_tasks_release,
 		.private = FILE_TASKLIST,
@@ -2300,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
 	mutex_lock(&cgroup_mutex);
 
-	INIT_LIST_HEAD(&cgrp->sibling);
-	INIT_LIST_HEAD(&cgrp->children);
-	INIT_LIST_HEAD(&cgrp->css_sets);
-	INIT_LIST_HEAD(&cgrp->release_list);
+	init_cgroup_housekeeping(cgrp);
 
 	cgrp->parent = parent;
 	cgrp->root = parent->root;
@@ -2495,8 +2549,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 int __init cgroup_init_early(void)
 {
 	int i;
-	kref_init(&init_css_set.ref);
-	kref_get(&init_css_set.ref);
+	atomic_set(&init_css_set.refcount, 1);
 	INIT_LIST_HEAD(&init_css_set.cg_links);
 	INIT_LIST_HEAD(&init_css_set.tasks);
 	INIT_HLIST_NODE(&init_css_set.hlist);
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
index c3dc3aba4c0..daca6209202 100644
--- a/kernel/cgroup_debug.c
+++ b/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
 	u64 count;
 
 	rcu_read_lock();
-	count = atomic_read(&current->cgroups->ref.refcount);
+	count = atomic_read(&current->cgroups->refcount);
 	rcu_read_unlock();
 	return count;
 }
@@ -90,7 +90,7 @@ static struct cftype files[] =  {
 	{
 		.name = "releasable",
 		.read_u64 = releasable_read,
-	}
+	},
 };
 
 static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
new file mode 100644
index 00000000000..e9505695449
--- /dev/null
+++ b/kernel/cgroup_freezer.c
@@ -0,0 +1,379 @@
+/*
+ * cgroup_freezer.c -  control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+
+enum freezer_state {
+	CGROUP_THAWED = 0,
+	CGROUP_FREEZING,
+	CGROUP_FROZEN,
+};
+
+struct freezer {
+	struct cgroup_subsys_state css;
+	enum freezer_state state;
+	spinlock_t lock; /* protects _writes_ to state */
+};
+
+static inline struct freezer *cgroup_freezer(
+		struct cgroup *cgroup)
+{
+	return container_of(
+		cgroup_subsys_state(cgroup, freezer_subsys_id),
+		struct freezer, css);
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, freezer_subsys_id),
+			    struct freezer, css);
+}
+
+int cgroup_frozen(struct task_struct *task)
+{
+	struct freezer *freezer;
+	enum freezer_state state;
+
+	task_lock(task);
+	freezer = task_freezer(task);
+	state = freezer->state;
+	task_unlock(task);
+
+	return state == CGROUP_FROZEN;
+}
+
+/*
+ * cgroups_write_string() limits the size of freezer state strings to
+ * CGROUP_LOCAL_BUFFER_SIZE
+ */
+static const char *freezer_state_strs[] = {
+	"THAWED",
+	"FREEZING",
+	"FROZEN",
+};
+
+/*
+ * State diagram
+ * Transitions are caused by userspace writes to the freezer.state file.
+ * The values in parenthesis are state labels. The rest are edge labels.
+ *
+ * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
+ *    ^ ^                    |                     |
+ *    | \_______THAWED_______/                     |
+ *    \__________________________THAWED____________/
+ */
+
+struct cgroup_subsys freezer_subsys;
+
+/* Locks taken and their ordering
+ * ------------------------------
+ * css_set_lock
+ * cgroup_mutex (AKA cgroup_lock)
+ * task->alloc_lock (AKA task_lock)
+ * freezer->lock
+ * task->sighand->siglock
+ *
+ * cgroup code forces css_set_lock to be taken before task->alloc_lock
+ *
+ * freezer_create(), freezer_destroy():
+ * cgroup_mutex [ by cgroup core ]
+ *
+ * can_attach():
+ * cgroup_mutex
+ *
+ * cgroup_frozen():
+ * task->alloc_lock (to get task's cgroup)
+ *
+ * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
+ * task->alloc_lock (to get task's cgroup)
+ * freezer->lock
+ *  sighand->siglock (if the cgroup is freezing)
+ *
+ * freezer_read():
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *
+ * freezer_write() (freeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    sighand->siglock
+ *
+ * freezer_write() (unfreeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock (to prevent races with freeze_task())
+ *     sighand->siglock
+ */
+static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
+						  struct cgroup *cgroup)
+{
+	struct freezer *freezer;
+
+	freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+	if (!freezer)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&freezer->lock);
+	freezer->state = CGROUP_THAWED;
+	return &freezer->css;
+}
+
+static void freezer_destroy(struct cgroup_subsys *ss,
+			    struct cgroup *cgroup)
+{
+	kfree(cgroup_freezer(cgroup));
+}
+
+/* Task is frozen or will freeze immediately when next it gets woken */
+static bool is_task_frozen_enough(struct task_struct *task)
+{
+	return frozen(task) ||
+		(task_is_stopped_or_traced(task) && freezing(task));
+}
+
+/*
+ * The call to cgroup_lock() in the freezer.state write method prevents
+ * a write to that file racing against an attach, and hence the
+ * can_attach() result will remain valid until the attach completes.
+ */
+static int freezer_can_attach(struct cgroup_subsys *ss,
+			      struct cgroup *new_cgroup,
+			      struct task_struct *task)
+{
+	struct freezer *freezer;
+	int retval;
+
+	/* Anything frozen can't move or be moved to/from */
+
+	if (is_task_frozen_enough(task))
+		return -EBUSY;
+
+	freezer = cgroup_freezer(new_cgroup);
+	if (freezer->state == CGROUP_FROZEN)
+		return -EBUSY;
+
+	retval = 0;
+	task_lock(task);
+	freezer = task_freezer(task);
+	if (freezer->state == CGROUP_FROZEN)
+		retval = -EBUSY;
+	task_unlock(task);
+	return retval;
+}
+
+static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+{
+	struct freezer *freezer;
+
+	task_lock(task);
+	freezer = task_freezer(task);
+	task_unlock(task);
+
+	BUG_ON(freezer->state == CGROUP_FROZEN);
+	spin_lock_irq(&freezer->lock);
+	/* Locking avoids race with FREEZING -> THAWED transitions. */
+	if (freezer->state == CGROUP_FREEZING)
+		freeze_task(task, true);
+	spin_unlock_irq(&freezer->lock);
+}
+
+/*
+ * caller must hold freezer->lock
+ */
+static void update_freezer_state(struct cgroup *cgroup,
+				 struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+	unsigned int nfrozen = 0, ntotal = 0;
+
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		ntotal++;
+		if (is_task_frozen_enough(task))
+			nfrozen++;
+	}
+
+	/*
+	 * Transition to FROZEN when no new tasks can be added ensures
+	 * that we never exist in the FROZEN state while there are unfrozen
+	 * tasks.
+	 */
+	if (nfrozen == ntotal)
+		freezer->state = CGROUP_FROZEN;
+	else if (nfrozen > 0)
+		freezer->state = CGROUP_FREEZING;
+	else
+		freezer->state = CGROUP_THAWED;
+	cgroup_iter_end(cgroup, &it);
+}
+
+static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+			struct seq_file *m)
+{
+	struct freezer *freezer;
+	enum freezer_state state;
+
+	if (!cgroup_lock_live_group(cgroup))
+		return -ENODEV;
+
+	freezer = cgroup_freezer(cgroup);
+	spin_lock_irq(&freezer->lock);
+	state = freezer->state;
+	if (state == CGROUP_FREEZING) {
+		/* We change from FREEZING to FROZEN lazily if the cgroup was
+		 * only partially frozen when we exitted write. */
+		update_freezer_state(cgroup, freezer);
+		state = freezer->state;
+	}
+	spin_unlock_irq(&freezer->lock);
+	cgroup_unlock();
+
+	seq_puts(m, freezer_state_strs[state]);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+	unsigned int num_cant_freeze_now = 0;
+
+	freezer->state = CGROUP_FREEZING;
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		if (!freeze_task(task, true))
+			continue;
+		if (is_task_frozen_enough(task))
+			continue;
+		if (!freezing(task) && !freezer_should_skip(task))
+			num_cant_freeze_now++;
+	}
+	cgroup_iter_end(cgroup, &it);
+
+	return num_cant_freeze_now ? -EBUSY : 0;
+}
+
+static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		int do_wake;
+
+		task_lock(task);
+		do_wake = __thaw_process(task);
+		task_unlock(task);
+		if (do_wake)
+			wake_up_process(task);
+	}
+	cgroup_iter_end(cgroup, &it);
+	freezer->state = CGROUP_THAWED;
+
+	return 0;
+}
+
+static int freezer_change_state(struct cgroup *cgroup,
+				enum freezer_state goal_state)
+{
+	struct freezer *freezer;
+	int retval = 0;
+
+	freezer = cgroup_freezer(cgroup);
+	spin_lock_irq(&freezer->lock);
+	update_freezer_state(cgroup, freezer);
+	if (goal_state == freezer->state)
+		goto out;
+	switch (freezer->state) {
+	case CGROUP_THAWED:
+		retval = try_to_freeze_cgroup(cgroup, freezer);
+		break;
+	case CGROUP_FREEZING:
+		if (goal_state == CGROUP_FROZEN) {
+			/* Userspace is retrying after
+			 * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
+			retval = try_to_freeze_cgroup(cgroup, freezer);
+			break;
+		}
+		/* state == FREEZING and goal_state == THAWED, so unfreeze */
+	case CGROUP_FROZEN:
+		retval = unfreeze_cgroup(cgroup, freezer);
+		break;
+	default:
+		break;
+	}
+out:
+	spin_unlock_irq(&freezer->lock);
+
+	return retval;
+}
+
+static int freezer_write(struct cgroup *cgroup,
+			 struct cftype *cft,
+			 const char *buffer)
+{
+	int retval;
+	enum freezer_state goal_state;
+
+	if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
+		goal_state = CGROUP_THAWED;
+	else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
+		goal_state = CGROUP_FROZEN;
+	else
+		return -EIO;
+
+	if (!cgroup_lock_live_group(cgroup))
+		return -ENODEV;
+	retval = freezer_change_state(cgroup, goal_state);
+	cgroup_unlock();
+	return retval;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "state",
+		.read_seq_string = freezer_read,
+		.write_string = freezer_write,
+	},
+};
+
+static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
+{
+	return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys freezer_subsys = {
+	.name		= "freezer",
+	.create		= freezer_create,
+	.destroy	= freezer_destroy,
+	.populate	= freezer_populate,
+	.subsys_id	= freezer_subsys_id,
+	.can_attach	= freezer_can_attach,
+	.attach		= NULL,
+	.fork		= freezer_fork,
+	.exit		= NULL,
+};
diff --git a/kernel/compat.c b/kernel/compat.c
index 143990e48cb..8eafe3eb50d 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,6 +23,7 @@
 #include <linux/timex.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
+#include <linux/times.h>
 
 #include <asm/uaccess.h>
 
@@ -208,49 +209,23 @@ asmlinkage long compat_sys_setitimer(int which,
 	return 0;
 }
 
+static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
+{
+	return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
+}
+
 asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
 {
-	/*
-	 *	In the SMP world we might just be unlucky and have one of
-	 *	the times increment as we use it. Since the value is an
-	 *	atomically safe type this is just fine. Conceptually its
-	 *	as if the syscall took an instant longer to occur.
-	 */
 	if (tbuf) {
+		struct tms tms;
 		struct compat_tms tmp;
-		struct task_struct *tsk = current;
-		struct task_struct *t;
-		cputime_t utime, stime, cutime, cstime;
-
-		read_lock(&tasklist_lock);
-		utime = tsk->signal->utime;
-		stime = tsk->signal->stime;
-		t = tsk;
-		do {
-			utime = cputime_add(utime, t->utime);
-			stime = cputime_add(stime, t->stime);
-			t = next_thread(t);
-		} while (t != tsk);
-
-		/*
-		 * While we have tasklist_lock read-locked, no dying thread
-		 * can be updating current->signal->[us]time.  Instead,
-		 * we got their counts included in the live thread loop.
-		 * However, another thread can come in right now and
-		 * do a wait call that updates current->signal->c[us]time.
-		 * To make sure we always see that pair updated atomically,
-		 * we take the siglock around fetching them.
-		 */
-		spin_lock_irq(&tsk->sighand->siglock);
-		cutime = tsk->signal->cutime;
-		cstime = tsk->signal->cstime;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
-
-		tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
-		tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
-		tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
-		tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
+
+		do_sys_times(&tms);
+		/* Convert our struct tms to the compat version. */
+		tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
+		tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
+		tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
+		tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
 		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
 			return -EFAULT;
 	}
diff --git a/kernel/configs.c b/kernel/configs.c
index 4c345210ed8..abaee684ecb 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -54,9 +54,6 @@
 
 #ifdef CONFIG_IKCONFIG_PROC
 
-/**************************************************/
-/* globals and useful constants                   */
-
 static ssize_t
 ikconfig_read_current(struct file *file, char __user *buf,
 		      size_t len, loff_t * offset)
@@ -71,9 +68,6 @@ static const struct file_operations ikconfig_file_ops = {
 	.read = ikconfig_read_current,
 };
 
-/***************************************************/
-/* ikconfig_init: start up everything we need to */
-
 static int __init ikconfig_init(void)
 {
 	struct proc_dir_entry *entry;
@@ -89,9 +83,6 @@ static int __init ikconfig_init(void)
 	return 0;
 }
 
-/***************************************************/
-/* ikconfig_cleanup: clean up our mess           */
-
 static void __exit ikconfig_cleanup(void)
 {
 	remove_proc_entry("config.gz", NULL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index eab7bd6628e..3e00526f52e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1172,7 +1172,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 {
 	struct cpuset trialcs;
 	int err;
-	int cpus_nonempty, balance_flag_changed;
+	int balance_flag_changed;
 
 	trialcs = *cs;
 	if (turning_on)
@@ -1184,7 +1184,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 	if (err < 0)
 		return err;
 
-	cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
 	balance_flag_changed = (is_sched_load_balance(cs) !=
 		 			is_sched_load_balance(&trialcs));
 
@@ -1192,7 +1191,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 	cs->flags = trialcs.flags;
 	mutex_unlock(&callback_mutex);
 
-	if (cpus_nonempty && balance_flag_changed)
+	if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
 		async_rebuild_sched_domains();
 
 	return 0;
@@ -2437,19 +2436,15 @@ const struct file_operations proc_cpuset_operations = {
 void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
 {
 	seq_printf(m, "Cpus_allowed:\t");
-	m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
-					task->cpus_allowed);
+	seq_cpumask(m, &task->cpus_allowed);
 	seq_printf(m, "\n");
 	seq_printf(m, "Cpus_allowed_list:\t");
-	m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
-					task->cpus_allowed);
+	seq_cpumask_list(m, &task->cpus_allowed);
 	seq_printf(m, "\n");
 	seq_printf(m, "Mems_allowed:\t");
-	m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
-					task->mems_allowed);
+	seq_nodemask(m, &task->mems_allowed);
 	seq_printf(m, "\n");
 	seq_printf(m, "Mems_allowed_list:\t");
-	m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
-					task->mems_allowed);
+	seq_nodemask_list(m, &task->mems_allowed);
 	seq_printf(m, "\n");
 }
diff --git a/kernel/exit.c b/kernel/exit.c
index 0ef4673e351..80137a5d946 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,6 +47,7 @@
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
+#include <trace/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -112,8 +113,6 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, task_utime(tsk));
-		sig->stime = cputime_add(sig->stime, task_stime(tsk));
 		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
@@ -122,7 +121,6 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
 		task_io_accounting_add(&sig->ioac, &tsk->ioac);
-		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 		sig = NULL; /* Marker for below. */
 	}
 
@@ -149,7 +147,10 @@ static void __exit_signal(struct task_struct *tsk)
 
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
-	put_task_struct(container_of(rhp, struct task_struct, rcu));
+	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+	trace_sched_process_free(tsk);
+	put_task_struct(tsk);
 }
 
 
@@ -1073,6 +1074,8 @@ NORET_TYPE void do_exit(long code)
 
 	if (group_dead)
 		acct_process();
+	trace_sched_process_exit(tsk);
+
 	exit_sem(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
@@ -1301,6 +1304,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	if (likely(!traced)) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
+		struct task_cputime cputime;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1316,20 +1320,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		 * need to protect the access to p->parent->signal fields,
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
+		 *
+		 * We use thread_group_cputime() to get times for the thread
+		 * group, which consolidates times for all threads in the
+		 * group including the group leader.
 		 */
 		spin_lock_irq(&p->parent->sighand->siglock);
 		psig = p->parent->signal;
 		sig = p->signal;
+		thread_group_cputime(p, &cputime);
 		psig->cutime =
 			cputime_add(psig->cutime,
-			cputime_add(p->utime,
-			cputime_add(sig->utime,
-				    sig->cutime)));
+			cputime_add(cputime.utime,
+				    sig->cutime));
 		psig->cstime =
 			cputime_add(psig->cstime,
-			cputime_add(p->stime,
-			cputime_add(sig->stime,
-				    sig->cstime)));
+			cputime_add(cputime.stime,
+				    sig->cstime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
@@ -1674,6 +1681,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
 	struct task_struct *tsk;
 	int retval;
 
+	trace_sched_process_wait(pid);
+
 	add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
 	/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 30de644a40c..4d093552dd6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <trace/sched.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -759,15 +760,44 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 		kmem_cache_free(sighand_cachep, sighand);
 }
 
+
+/*
+ * Initialize POSIX timer handling for a thread group.
+ */
+static void posix_cpu_timers_init_group(struct signal_struct *sig)
+{
+	/* Thread group counters. */
+	thread_group_cputime_init(sig);
+
+	/* Expiration times and increments. */
+	sig->it_virt_expires = cputime_zero;
+	sig->it_virt_incr = cputime_zero;
+	sig->it_prof_expires = cputime_zero;
+	sig->it_prof_incr = cputime_zero;
+
+	/* Cached expiration times. */
+	sig->cputime_expires.prof_exp = cputime_zero;
+	sig->cputime_expires.virt_exp = cputime_zero;
+	sig->cputime_expires.sched_exp = 0;
+
+	/* The timer lists. */
+	INIT_LIST_HEAD(&sig->cpu_timers[0]);
+	INIT_LIST_HEAD(&sig->cpu_timers[1]);
+	INIT_LIST_HEAD(&sig->cpu_timers[2]);
+}
+
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 	int ret;
 
 	if (clone_flags & CLONE_THREAD) {
-		atomic_inc(&current->signal->count);
-		atomic_inc(&current->signal->live);
-		return 0;
+		ret = thread_group_cputime_clone_thread(current);
+		if (likely(!ret)) {
+			atomic_inc(&current->signal->count);
+			atomic_inc(&current->signal->live);
+		}
+		return ret;
 	}
 	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
 	tsk->signal = sig;
@@ -795,40 +825,25 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
 
-	sig->it_virt_expires = cputime_zero;
-	sig->it_virt_incr = cputime_zero;
-	sig->it_prof_expires = cputime_zero;
-	sig->it_prof_incr = cputime_zero;
-
 	sig->leader = 0;	/* session leadership doesn't inherit */
 	sig->tty_old_pgrp = NULL;
 	sig->tty = NULL;
 
-	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+	sig->cutime = sig->cstime = cputime_zero;
 	sig->gtime = cputime_zero;
 	sig->cgtime = cputime_zero;
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
 	task_io_accounting_init(&sig->ioac);
-	sig->sum_sched_runtime = 0;
-	INIT_LIST_HEAD(&sig->cpu_timers[0]);
-	INIT_LIST_HEAD(&sig->cpu_timers[1]);
-	INIT_LIST_HEAD(&sig->cpu_timers[2]);
 	taskstats_tgid_init(sig);
 
 	task_lock(current->group_leader);
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
 	task_unlock(current->group_leader);
 
-	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-		/*
-		 * New sole thread in the process gets an expiry time
-		 * of the whole CPU time limit.
-		 */
-		tsk->it_prof_expires =
-			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
-	}
+	posix_cpu_timers_init_group(sig);
+
 	acct_init_pacct(&sig->pacct);
 
 	tty_audit_fork(sig);
@@ -838,6 +853,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_signal(struct signal_struct *sig)
 {
+	thread_group_cputime_free(sig);
 	exit_thread_group_keys(sig);
 	tty_kref_put(sig->tty);
 	kmem_cache_free(signal_cachep, sig);
@@ -888,6 +904,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 #endif /* CONFIG_MM_OWNER */
 
 /*
+ * Initialize POSIX timer handling for a single task.
+ */
+static void posix_cpu_timers_init(struct task_struct *tsk)
+{
+	tsk->cputime_expires.prof_exp = cputime_zero;
+	tsk->cputime_expires.virt_exp = cputime_zero;
+	tsk->cputime_expires.sched_exp = 0;
+	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
+	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
+	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
+}
+
+/*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
  *
@@ -997,12 +1026,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
-	p->it_virt_expires = cputime_zero;
-	p->it_prof_expires = cputime_zero;
-	p->it_sched_expires = 0;
-	INIT_LIST_HEAD(&p->cpu_timers[0]);
-	INIT_LIST_HEAD(&p->cpu_timers[1]);
-	INIT_LIST_HEAD(&p->cpu_timers[2]);
+	posix_cpu_timers_init(p);
 
 	p->lock_depth = -1;		/* -1 = no lock */
 	do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1203,21 +1227,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (clone_flags & CLONE_THREAD) {
 		p->group_leader = current->group_leader;
 		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
-
-		if (!cputime_eq(current->signal->it_virt_expires,
-				cputime_zero) ||
-		    !cputime_eq(current->signal->it_prof_expires,
-				cputime_zero) ||
-		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
-		    !list_empty(&current->signal->cpu_timers[0]) ||
-		    !list_empty(&current->signal->cpu_timers[1]) ||
-		    !list_empty(&current->signal->cpu_timers[2])) {
-			/*
-			 * Have child wake up on its first tick to check
-			 * for process CPU timers.
-			 */
-			p->it_prof_expires = jiffies_to_cputime(1);
-		}
 	}
 
 	if (likely(p->pid)) {
@@ -1364,6 +1373,8 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		trace_sched_process_fork(current, p);
+
 		nr = task_pid_vnr(p);
 
 		if (clone_flags & CLONE_PARENT_SETTID)
diff --git a/kernel/freezer.c b/kernel/freezer.c
new file mode 100644
index 00000000000..ba6248b323e
--- /dev/null
+++ b/kernel/freezer.c
@@ -0,0 +1,154 @@
+/*
+ * kernel/freezer.c - Function to freeze a process
+ *
+ * Originally from kernel/power/process.c
+ */
+
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+	if (!unlikely(current->flags & PF_NOFREEZE)) {
+		current->flags |= PF_FROZEN;
+		wmb();
+	}
+	clear_freeze_flag(current);
+}
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+	/* Hmm, should we be allowed to suspend when there are realtime
+	   processes around? */
+	long save;
+
+	task_lock(current);
+	if (freezing(current)) {
+		frozen_process();
+		task_unlock(current);
+	} else {
+		task_unlock(current);
+		return;
+	}
+	save = current->state;
+	pr_debug("%s entered refrigerator\n", current->comm);
+
+	spin_lock_irq(&current->sighand->siglock);
+	recalc_sigpending(); /* We sent fake signal, clean it up */
+	spin_unlock_irq(&current->sighand->siglock);
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!frozen(current))
+			break;
+		schedule();
+	}
+	pr_debug("%s left refrigerator\n", current->comm);
+	__set_current_state(save);
+}
+EXPORT_SYMBOL(refrigerator);
+
+static void fake_signal_wake_up(struct task_struct *p)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->sighand->siglock, flags);
+	signal_wake_up(p, 0);
+	spin_unlock_irqrestore(&p->sighand->siglock, flags);
+}
+
+/**
+ *	freeze_task - send a freeze request to given task
+ *	@p: task to send the request to
+ *	@sig_only: if set, the request will only be sent if the task has the
+ *		PF_FREEZER_NOSIG flag unset
+ *	Return value: 'false', if @sig_only is set and the task has
+ *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ *
+ *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and
+ *	either sending a fake signal to it or waking it up, depending on whether
+ *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
+ *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ *	TIF_FREEZE flag will not be set.
+ */
+bool freeze_task(struct task_struct *p, bool sig_only)
+{
+	/*
+	 * We first check if the task is freezing and next if it has already
+	 * been frozen to avoid the race with frozen_process() which first marks
+	 * the task as frozen and next clears its TIF_FREEZE.
+	 */
+	if (!freezing(p)) {
+		rmb();
+		if (frozen(p))
+			return false;
+
+		if (!sig_only || should_send_signal(p))
+			set_freeze_flag(p);
+		else
+			return false;
+	}
+
+	if (should_send_signal(p)) {
+		if (!signal_pending(p))
+			fake_signal_wake_up(p);
+	} else if (sig_only) {
+		return false;
+	} else {
+		wake_up_state(p, TASK_INTERRUPTIBLE);
+	}
+
+	return true;
+}
+
+void cancel_freezing(struct task_struct *p)
+{
+	unsigned long flags;
+
+	if (freezing(p)) {
+		pr_debug("  clean up: %s\n", p->comm);
+		clear_freeze_flag(p);
+		spin_lock_irqsave(&p->sighand->siglock, flags);
+		recalc_sigpending_and_wake(p);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	}
+}
+
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails.  Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
+int __thaw_process(struct task_struct *p)
+{
+	if (frozen(p)) {
+		p->flags &= ~PF_FROZEN;
+		return 1;
+	}
+	clear_freeze_flag(p);
+	return 0;
+}
+
+int thaw_process(struct task_struct *p)
+{
+	task_lock(p);
+	if (__thaw_process(p) == 1) {
+		task_unlock(p);
+		wake_up_process(p);
+		return 1;
+	}
+	task_unlock(p);
+	return 0;
+}
+EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cdec83e722f..95978f48e03 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1403,9 +1403,7 @@ void hrtimer_run_queues(void)
 		if (!base->first)
 			continue;
 
-		if (base->get_softirq_time)
-			base->softirq_time = base->get_softirq_time();
-		else if (gettime) {
+		if (gettime) {
 			hrtimer_get_softirq_time(cpu_base);
 			gettime = 0;
 		}
@@ -1688,9 +1686,11 @@ static void migrate_hrtimers(int cpu)
 	new_base = &get_cpu_var(hrtimer_bases);
 
 	tick_cancel_sched_timer(cpu);
-
-	local_irq_disable();
-	spin_lock(&new_base->lock);
+	/*
+	 * The caller is globally serialized and nobody else
+	 * takes two locks at once, deadlock is not possible.
+	 */
+	spin_lock_irq(&new_base->lock);
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
@@ -1703,8 +1703,7 @@ static void migrate_hrtimers(int cpu)
 		raise = 1;
 
 	spin_unlock(&old_base->lock);
-	spin_unlock(&new_base->lock);
-	local_irq_enable();
+	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(hrtimer_bases);
 
 	if (raise)
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 533068cfb60..cc0f7321b8c 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -30,17 +30,16 @@ static DEFINE_MUTEX(probing_active);
 unsigned long probe_irq_on(void)
 {
 	struct irq_desc *desc;
-	unsigned long mask;
-	unsigned int i;
+	unsigned long mask = 0;
+	unsigned int status;
+	int i;
 
 	mutex_lock(&probing_active);
 	/*
 	 * something may have generated an irq long ago and we want to
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
-		desc = irq_desc + i;
-
+	for_each_irq_desc_reverse(i, desc) {
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
@@ -68,9 +67,7 @@ unsigned long probe_irq_on(void)
 	 * (we must startup again here because if a longstanding irq
 	 * happened in the previous stage, it may have masked itself)
 	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
-		desc = irq_desc + i;
-
+	for_each_irq_desc_reverse(i, desc) {
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -88,11 +85,7 @@ unsigned long probe_irq_on(void)
 	/*
 	 * Now filter out any obviously spurious interrupts
 	 */
-	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		unsigned int status;
-
-		desc = irq_desc + i;
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -126,14 +119,11 @@ EXPORT_SYMBOL(probe_irq_on);
  */
 unsigned int probe_irq_mask(unsigned long val)
 {
-	unsigned int mask;
+	unsigned int status, mask = 0;
+	struct irq_desc *desc;
 	int i;
 
-	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		struct irq_desc *desc = irq_desc + i;
-		unsigned int status;
-
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -171,20 +161,19 @@ EXPORT_SYMBOL(probe_irq_mask);
  */
 int probe_irq_off(unsigned long val)
 {
-	int i, irq_found = 0, nr_irqs = 0;
-
-	for (i = 0; i < NR_IRQS; i++) {
-		struct irq_desc *desc = irq_desc + i;
-		unsigned int status;
+	int i, irq_found = 0, nr_of_irqs = 0;
+	struct irq_desc *desc;
+	unsigned int status;
 
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
 		if (status & IRQ_AUTODETECT) {
 			if (!(status & IRQ_WAITING)) {
-				if (!nr_irqs)
+				if (!nr_of_irqs)
 					irq_found = i;
-				nr_irqs++;
+				nr_of_irqs++;
 			}
 			desc->status = status & ~IRQ_AUTODETECT;
 			desc->chip->shutdown(i);
@@ -193,7 +182,7 @@ int probe_irq_off(unsigned long val)
 	}
 	mutex_unlock(&probing_active);
 
-	if (nr_irqs > 1)
+	if (nr_of_irqs > 1)
 		irq_found = -irq_found;
 
 	return irq_found;
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3cd441ebf5d..4895fde4eb9 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -24,16 +24,15 @@
  */
 void dynamic_irq_init(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
 	}
 
 	/* Ensure we don't have left over values from a previous use of this irq */
-	desc = irq_desc + irq;
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status = IRQ_DISABLED;
 	desc->chip = &no_irq_chip;
@@ -57,15 +56,14 @@ void dynamic_irq_init(unsigned int irq)
  */
 void dynamic_irq_cleanup(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
 	}
 
-	desc = irq_desc + irq;
 	spin_lock_irqsave(&desc->lock, flags);
 	if (desc->action) {
 		spin_unlock_irqrestore(&desc->lock, flags);
@@ -89,10 +87,10 @@ void dynamic_irq_cleanup(unsigned int irq)
  */
 int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
 	}
@@ -100,7 +98,6 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	if (!chip)
 		chip = &no_irq_chip;
 
-	desc = irq_desc + irq;
 	spin_lock_irqsave(&desc->lock, flags);
 	irq_chip_set_defaults(chip);
 	desc->chip = chip;
@@ -111,27 +108,27 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 EXPORT_SYMBOL(set_irq_chip);
 
 /**
- *	set_irq_type - set the irq type for an irq
+ *	set_irq_type - set the irq trigger type for an irq
  *	@irq:	irq number
- *	@type:	interrupt type - see include/linux/interrupt.h
+ *	@type:	IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
  */
 int set_irq_type(unsigned int irq, unsigned int type)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
 	}
 
-	desc = irq_desc + irq;
-	if (desc->chip->set_type) {
-		spin_lock_irqsave(&desc->lock, flags);
-		ret = desc->chip->set_type(irq, type);
-		spin_unlock_irqrestore(&desc->lock, flags);
-	}
+	if (type == IRQ_TYPE_NONE)
+		return 0;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	ret = __irq_set_trigger(desc, irq, flags);
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(set_irq_type);
@@ -145,16 +142,15 @@ EXPORT_SYMBOL(set_irq_type);
  */
 int set_irq_data(unsigned int irq, void *data)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
 		return -EINVAL;
 	}
 
-	desc = irq_desc + irq;
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->handler_data = data;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -171,15 +167,15 @@ EXPORT_SYMBOL(set_irq_data);
  */
 int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
 		return -EINVAL;
 	}
-	desc = irq_desc + irq;
+
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->msi_desc = entry;
 	if (entry)
@@ -197,10 +193,16 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
  */
 int set_irq_chip_data(unsigned int irq, void *data)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS || !desc->chip) {
+	if (!desc) {
+		printk(KERN_ERR
+		       "Trying to install chip data for IRQ%d\n", irq);
+		return -EINVAL;
+	}
+
+	if (!desc->chip) {
 		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
 		return -EINVAL;
 	}
@@ -218,7 +220,7 @@ EXPORT_SYMBOL(set_irq_chip_data);
  */
 static void default_enable(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	desc->chip->unmask(irq);
 	desc->status &= ~IRQ_MASKED;
@@ -236,8 +238,9 @@ static void default_disable(unsigned int irq)
  */
 static unsigned int default_startup(unsigned int irq)
 {
-	irq_desc[irq].chip->enable(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
 
+	desc->chip->enable(irq);
 	return 0;
 }
 
@@ -246,7 +249,7 @@ static unsigned int default_startup(unsigned int irq)
  */
 static void default_shutdown(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	desc->chip->mask(irq);
 	desc->status |= IRQ_MASKED;
@@ -305,14 +308,13 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 {
 	struct irqaction *action;
 	irqreturn_t action_ret;
-	const unsigned int cpu = smp_processor_id();
 
 	spin_lock(&desc->lock);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -344,7 +346,6 @@ out_unlock:
 void
 handle_level_irq(unsigned int irq, struct irq_desc *desc)
 {
-	unsigned int cpu = smp_processor_id();
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
@@ -354,7 +355,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/*
 	 * If its disabled or no action available
@@ -392,7 +393,6 @@ out_unlock:
 void
 handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 {
-	unsigned int cpu = smp_processor_id();
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
@@ -402,7 +402,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 		goto out;
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/*
 	 * If its disabled or no action available
@@ -451,8 +451,6 @@ out:
 void
 handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 {
-	const unsigned int cpu = smp_processor_id();
-
 	spin_lock(&desc->lock);
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
@@ -468,8 +466,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		mask_ack_irq(desc, irq);
 		goto out_unlock;
 	}
-
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
@@ -524,7 +521,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
 	irqreturn_t action_ret;
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
@@ -541,17 +538,15 @@ void
 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		  const char *name)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
 		return;
 	}
 
-	desc = irq_desc + irq;
-
 	if (!handle)
 		handle = handle_bad_irq;
 	else if (desc->chip == &no_irq_chip) {
@@ -583,7 +578,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		desc->status &= ~IRQ_DISABLED;
 		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
 		desc->depth = 0;
-		desc->chip->unmask(irq);
+		desc->chip->startup(irq);
 	}
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
@@ -606,17 +601,14 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
 
 void __init set_irq_noprobe(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
-
 		return;
 	}
 
-	desc = irq_desc + irq;
-
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_NOPROBE;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -624,17 +616,14 @@ void __init set_irq_noprobe(unsigned int irq)
 
 void __init set_irq_probe(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
-
 		return;
 	}
 
-	desc = irq_desc + irq;
-
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status &= ~IRQ_NOPROBE;
 	spin_unlock_irqrestore(&desc->lock, flags);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 5fa6198e913..c815b42d0f5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -25,11 +25,10 @@
  *
  * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
  */
-void
-handle_bad_irq(unsigned int irq, struct irq_desc *desc)
+void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
 	print_irq_desc(irq, desc);
-	kstat_this_cpu.irqs[irq]++;
+	kstat_incr_irqs_this_cpu(irq, desc);
 	ack_bad_irq(irq);
 }
 
@@ -47,6 +46,9 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
  *
  * Controller mappings for all interrupt sources:
  */
+int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL_GPL(nr_irqs);
+
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -66,7 +68,9 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
  */
 static void ack_bad(unsigned int irq)
 {
-	print_irq_desc(irq, irq_desc + irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	print_irq_desc(irq, desc);
 	ack_bad_irq(irq);
 }
 
@@ -131,8 +135,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
 	irqreturn_t ret, retval = IRQ_NONE;
 	unsigned int status = 0;
 
-	handle_dynamic_tick(action);
-
 	if (!(action->flags & IRQF_DISABLED))
 		local_irq_enable_in_hardirq();
 
@@ -165,11 +167,12 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
  */
 unsigned int __do_IRQ(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 	unsigned int status;
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_incr_irqs_this_cpu(irq, desc);
+
 	if (CHECK_IRQ_PER_CPU(desc->status)) {
 		irqreturn_t action_ret;
 
@@ -256,8 +259,8 @@ out:
 }
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS
 
+#ifdef CONFIG_TRACE_IRQFLAGS
 /*
  * lockdep: we want to handle all irq_desc locks as a single lock-class:
  */
@@ -265,10 +268,10 @@ static struct lock_class_key irq_desc_lock_class;
 
 void early_init_irq_lock_class(void)
 {
+	struct irq_desc *desc;
 	int i;
 
-	for (i = 0; i < NR_IRQS; i++)
-		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+	for_each_irq_desc(i, desc)
+		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 }
-
 #endif
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 08a849a2244..c9767e64198 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -10,12 +10,15 @@ extern void irq_chip_set_defaults(struct irq_chip *chip);
 /* Set default handler: */
 extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
 
+extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
+		unsigned long flags);
+
 #ifdef CONFIG_PROC_FS
-extern void register_irq_proc(unsigned int irq);
+extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
 extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
 #else
-static inline void register_irq_proc(unsigned int irq) { }
+static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
 static inline void register_handler_proc(unsigned int irq,
 					 struct irqaction *action) { }
 static inline void unregister_handler_proc(unsigned int irq,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 60c49e32439..c498a1b8c62 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -31,10 +31,10 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
  */
 void synchronize_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned int status;
 
-	if (irq >= NR_IRQS)
+	if (!desc)
 		return;
 
 	do {
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(synchronize_irq);
  */
 int irq_can_set_affinity(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip ||
 	    !desc->chip->set_affinity)
@@ -81,18 +81,17 @@ int irq_can_set_affinity(unsigned int irq)
  */
 int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (!desc->chip->set_affinity)
 		return -EINVAL;
 
-	set_balance_irq_affinity(irq, cpumask);
-
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (desc->status & IRQ_MOVE_PCNTXT) {
+	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&desc->lock, flags);
+		desc->affinity = cpumask;
 		desc->chip->set_affinity(irq, cpumask);
 		spin_unlock_irqrestore(&desc->lock, flags);
 	} else
@@ -111,16 +110,17 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 int irq_select_affinity(unsigned int irq)
 {
 	cpumask_t mask;
+	struct irq_desc *desc;
 
 	if (!irq_can_set_affinity(irq))
 		return 0;
 
 	cpus_and(mask, cpu_online_map, irq_default_affinity);
 
-	irq_desc[irq].affinity = mask;
-	irq_desc[irq].chip->set_affinity(irq, mask);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+	desc->chip->set_affinity(irq, mask);
 
-	set_balance_irq_affinity(irq, mask);
 	return 0;
 }
 #endif
@@ -140,10 +140,10 @@ int irq_select_affinity(unsigned int irq)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS)
+	if (!desc)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -169,9 +169,9 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (irq >= NR_IRQS)
+	if (!desc)
 		return;
 
 	disable_irq_nosync(irq);
@@ -211,10 +211,10 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
  */
 void enable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (irq >= NR_IRQS)
+	if (!desc)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -223,9 +223,9 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
 
-int set_irq_wake_real(unsigned int irq, unsigned int on)
+static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	int ret = -ENXIO;
 
 	if (desc->chip->set_wake)
@@ -248,7 +248,7 @@ int set_irq_wake_real(unsigned int irq, unsigned int on)
  */
 int set_irq_wake(unsigned int irq, unsigned int on)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 	int ret = 0;
 
@@ -288,12 +288,16 @@ EXPORT_SYMBOL(set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 
-	if (irq >= NR_IRQS || irq_desc[irq].status & IRQ_NOREQUEST)
+	if (!desc)
+		return 0;
+
+	if (desc->status & IRQ_NOREQUEST)
 		return 0;
 
-	action = irq_desc[irq].action;
+	action = desc->action;
 	if (action)
 		if (irqflags & action->flags & IRQF_SHARED)
 			action = NULL;
@@ -312,10 +316,11 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
 		desc->handle_irq = NULL;
 }
 
-static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
+int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags)
 {
 	int ret;
+	struct irq_chip *chip = desc->chip;
 
 	if (!chip || !chip->set_type) {
 		/*
@@ -333,6 +338,11 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
 		pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
 				(int)(flags & IRQF_TRIGGER_MASK),
 				irq, chip->set_type);
+	else {
+		/* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
+		desc->status &= ~IRQ_TYPE_SENSE_MASK;
+		desc->status |= flags & IRQ_TYPE_SENSE_MASK;
+	}
 
 	return ret;
 }
@@ -341,16 +351,16 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
  */
-int setup_irq(unsigned int irq, struct irqaction *new)
+static int
+__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
 {
-	struct irq_desc *desc = irq_desc + irq;
 	struct irqaction *old, **p;
 	const char *old_name = NULL;
 	unsigned long flags;
 	int shared = 0;
 	int ret;
 
-	if (irq >= NR_IRQS)
+	if (!desc)
 		return -EINVAL;
 
 	if (desc->chip == &no_irq_chip)
@@ -411,7 +421,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 
 		/* Setup the type (level, edge polarity) if configured: */
 		if (new->flags & IRQF_TRIGGER_MASK) {
-			ret = __irq_set_trigger(desc->chip, irq, new->flags);
+			ret = __irq_set_trigger(desc, irq, new->flags);
 
 			if (ret) {
 				spin_unlock_irqrestore(&desc->lock, flags);
@@ -430,16 +440,21 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 		if (!(desc->status & IRQ_NOAUTOEN)) {
 			desc->depth = 0;
 			desc->status &= ~IRQ_DISABLED;
-			if (desc->chip->startup)
-				desc->chip->startup(irq);
-			else
-				desc->chip->enable(irq);
+			desc->chip->startup(irq);
 		} else
 			/* Undo nested disables: */
 			desc->depth = 1;
 
 		/* Set default affinity mask once everything is setup */
 		irq_select_affinity(irq);
+
+	} else if ((new->flags & IRQF_TRIGGER_MASK)
+			&& (new->flags & IRQF_TRIGGER_MASK)
+				!= (desc->status & IRQ_TYPE_SENSE_MASK)) {
+		/* hope the handler works with the actual trigger mode... */
+		pr_warning("IRQ %d uses trigger mode %d; requested %d\n",
+				irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK),
+				(int)(new->flags & IRQF_TRIGGER_MASK));
 	}
 
 	*p = new;
@@ -464,7 +479,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	spin_unlock_irqrestore(&desc->lock, flags);
 
 	new->irq = irq;
-	register_irq_proc(irq);
+	register_irq_proc(irq, desc);
 	new->dir = NULL;
 	register_handler_proc(irq, new);
 
@@ -484,6 +499,20 @@ mismatch:
 }
 
 /**
+ *	setup_irq - setup an interrupt
+ *	@irq: Interrupt line to setup
+ *	@act: irqaction for the interrupt
+ *
+ * Used to statically setup interrupts in the early boot process.
+ */
+int setup_irq(unsigned int irq, struct irqaction *act)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return __setup_irq(irq, desc, act);
+}
+
+/**
  *	free_irq - free an interrupt
  *	@irq: Interrupt line to free
  *	@dev_id: Device identity to free
@@ -499,15 +528,15 @@ mismatch:
  */
 void free_irq(unsigned int irq, void *dev_id)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction **p;
 	unsigned long flags;
 
 	WARN_ON(in_interrupt());
-	if (irq >= NR_IRQS)
+
+	if (!desc)
 		return;
 
-	desc = irq_desc + irq;
 	spin_lock_irqsave(&desc->lock, flags);
 	p = &desc->action;
 	for (;;) {
@@ -596,12 +625,14 @@ EXPORT_SYMBOL(free_irq);
  *	IRQF_SHARED		Interrupt is shared
  *	IRQF_DISABLED	Disable local interrupts while processing
  *	IRQF_SAMPLE_RANDOM	The interrupt can be used for entropy
+ *	IRQF_TRIGGER_*		Specify active edge(s) or level
  *
  */
 int request_irq(unsigned int irq, irq_handler_t handler,
 		unsigned long irqflags, const char *devname, void *dev_id)
 {
 	struct irqaction *action;
+	struct irq_desc *desc;
 	int retval;
 
 #ifdef CONFIG_LOCKDEP
@@ -618,9 +649,12 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	 */
 	if ((irqflags & IRQF_SHARED) && !dev_id)
 		return -EINVAL;
-	if (irq >= NR_IRQS)
+
+	desc = irq_to_desc(irq);
+	if (!desc)
 		return -EINVAL;
-	if (irq_desc[irq].status & IRQ_NOREQUEST)
+
+	if (desc->status & IRQ_NOREQUEST)
 		return -EINVAL;
 	if (!handler)
 		return -EINVAL;
@@ -636,26 +670,29 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	action->next = NULL;
 	action->dev_id = dev_id;
 
+	retval = __setup_irq(irq, desc, action);
+	if (retval)
+		kfree(action);
+
 #ifdef CONFIG_DEBUG_SHIRQ
 	if (irqflags & IRQF_SHARED) {
 		/*
 		 * It's a shared IRQ -- the driver ought to be prepared for it
 		 * to happen immediately, so let's make sure....
-		 * We do this before actually registering it, to make sure that
-		 * a 'real' IRQ doesn't run in parallel with our fake
+		 * We disable the irq to make sure that a 'real' IRQ doesn't
+		 * run in parallel with our fake.
 		 */
 		unsigned long flags;
 
+		disable_irq(irq);
 		local_irq_save(flags);
+
 		handler(irq, dev_id);
+
 		local_irq_restore(flags);
+		enable_irq(irq);
 	}
 #endif
-
-	retval = setup_irq(irq, action);
-	if (retval)
-		kfree(action);
-
 	return retval;
 }
 EXPORT_SYMBOL(request_irq);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 77b7acc875c..90b920d3f52 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -3,18 +3,18 @@
 
 void set_pending_irq(unsigned int irq, cpumask_t mask)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_MOVE_PENDING;
-	irq_desc[irq].pending_mask = mask;
+	desc->pending_mask = mask;
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 void move_masked_irq(int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	cpumask_t tmp;
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
@@ -30,7 +30,7 @@ void move_masked_irq(int irq)
 
 	desc->status &= ~IRQ_MOVE_PENDING;
 
-	if (unlikely(cpus_empty(irq_desc[irq].pending_mask)))
+	if (unlikely(cpus_empty(desc->pending_mask)))
 		return;
 
 	if (!desc->chip->set_affinity)
@@ -38,7 +38,7 @@ void move_masked_irq(int irq)
 
 	assert_spin_locked(&desc->lock);
 
-	cpus_and(tmp, irq_desc[irq].pending_mask, cpu_online_map);
+	cpus_and(tmp, desc->pending_mask, cpu_online_map);
 
 	/*
 	 * If there was a valid mask to work with, please
@@ -55,12 +55,12 @@ void move_masked_irq(int irq)
 	if (likely(!cpus_empty(tmp))) {
 		desc->chip->set_affinity(irq,tmp);
 	}
-	cpus_clear(irq_desc[irq].pending_mask);
+	cpus_clear(desc->pending_mask);
 }
 
 void move_native_irq(int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
 		return;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index a09dd29c2fd..fac014a81b2 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
 
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
-	struct irq_desc *desc = irq_desc + (long)m->private;
+	struct irq_desc *desc = irq_to_desc((long)m->private);
 	cpumask_t *mask = &desc->affinity;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -43,7 +43,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	cpumask_t new_value;
 	int err;
 
-	if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
+	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
@@ -132,20 +132,20 @@ static const struct file_operations default_affinity_proc_fops = {
 static int irq_spurious_read(char *page, char **start, off_t off,
 				  int count, int *eof, void *data)
 {
-	struct irq_desc *d = &irq_desc[(long) data];
+	struct irq_desc *desc = irq_to_desc((long) data);
 	return sprintf(page, "count %u\n"
 			     "unhandled %u\n"
 			     "last_unhandled %u ms\n",
-			d->irq_count,
-			d->irqs_unhandled,
-			jiffies_to_msecs(d->last_unhandled));
+			desc->irq_count,
+			desc->irqs_unhandled,
+			jiffies_to_msecs(desc->last_unhandled));
 }
 
 #define MAX_NAMELEN 128
 
 static int name_unique(unsigned int irq, struct irqaction *new_action)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 	unsigned long flags;
 	int ret = 1;
@@ -165,8 +165,9 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
 void register_handler_proc(unsigned int irq, struct irqaction *action)
 {
 	char name [MAX_NAMELEN];
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (!irq_desc[irq].dir || action->dir || !action->name ||
+	if (!desc->dir || action->dir || !action->name ||
 					!name_unique(irq, action))
 		return;
 
@@ -174,36 +175,34 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 	snprintf(name, MAX_NAMELEN, "%s", action->name);
 
 	/* create /proc/irq/1234/handler/ */
-	action->dir = proc_mkdir(name, irq_desc[irq].dir);
+	action->dir = proc_mkdir(name, desc->dir);
 }
 
 #undef MAX_NAMELEN
 
 #define MAX_NAMELEN 10
 
-void register_irq_proc(unsigned int irq)
+void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 {
 	char name [MAX_NAMELEN];
 	struct proc_dir_entry *entry;
 
-	if (!root_irq_dir ||
-		(irq_desc[irq].chip == &no_irq_chip) ||
-			irq_desc[irq].dir)
+	if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
 		return;
 
 	memset(name, 0, MAX_NAMELEN);
 	sprintf(name, "%d", irq);
 
 	/* create /proc/irq/1234 */
-	irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
+	desc->dir = proc_mkdir(name, root_irq_dir);
 
 #ifdef CONFIG_SMP
 	/* create /proc/irq/<irq>/smp_affinity */
-	proc_create_data("smp_affinity", 0600, irq_desc[irq].dir,
+	proc_create_data("smp_affinity", 0600, desc->dir,
 			 &irq_affinity_proc_fops, (void *)(long)irq);
 #endif
 
-	entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
+	entry = create_proc_entry("spurious", 0444, desc->dir);
 	if (entry) {
 		entry->data = (void *)(long)irq;
 		entry->read_proc = irq_spurious_read;
@@ -214,8 +213,11 @@ void register_irq_proc(unsigned int irq)
 
 void unregister_handler_proc(unsigned int irq, struct irqaction *action)
 {
-	if (action->dir)
-		remove_proc_entry(action->dir->name, irq_desc[irq].dir);
+	if (action->dir) {
+		struct irq_desc *desc = irq_to_desc(irq);
+
+		remove_proc_entry(action->dir->name, desc->dir);
+	}
 }
 
 void register_default_affinity_proc(void)
@@ -228,7 +230,8 @@ void register_default_affinity_proc(void)
 
 void init_irq_proc(void)
 {
-	int i;
+	unsigned int irq;
+	struct irq_desc *desc;
 
 	/* create /proc/irq */
 	root_irq_dir = proc_mkdir("irq", NULL);
@@ -240,7 +243,7 @@ void init_irq_proc(void)
 	/*
 	 * Create entries for all existing IRQs.
 	 */
-	for (i = 0; i < NR_IRQS; i++)
-		register_irq_proc(i);
+	for_each_irq_desc(irq, desc)
+		register_irq_proc(irq, desc);
 }
 
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index a8046791ba2..89c7117acf2 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -33,10 +33,10 @@ static void resend_irqs(unsigned long arg)
 	struct irq_desc *desc;
 	int irq;
 
-	while (!bitmap_empty(irqs_resend, NR_IRQS)) {
-		irq = find_first_bit(irqs_resend, NR_IRQS);
+	while (!bitmap_empty(irqs_resend, nr_irqs)) {
+		irq = find_first_bit(irqs_resend, nr_irqs);
 		clear_bit(irq, irqs_resend);
-		desc = irq_desc + irq;
+		desc = irq_to_desc(irq);
 		local_irq_disable();
 		desc->handle_irq(irq, desc);
 		local_irq_enable();
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index c66d3f10e85..dd364c11e56 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -12,83 +12,122 @@
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
+#include <linux/timer.h>
 
 static int irqfixup __read_mostly;
 
+#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
+static void poll_spurious_irqs(unsigned long dummy);
+static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+
 /*
  * Recovery handler for misrouted interrupts.
  */
-static int misrouted_irq(int irq)
+static int try_one_irq(int irq, struct irq_desc *desc)
 {
-	int i;
-	int ok = 0;
-	int work = 0;	/* Did we do work for a real IRQ */
-
-	for (i = 1; i < NR_IRQS; i++) {
-		struct irq_desc *desc = irq_desc + i;
-		struct irqaction *action;
-
-		if (i == irq)	/* Already tried */
-			continue;
+	struct irqaction *action;
+	int ok = 0, work = 0;
 
-		spin_lock(&desc->lock);
-		/* Already running on another processor */
-		if (desc->status & IRQ_INPROGRESS) {
-			/*
-			 * Already running: If it is shared get the other
-			 * CPU to go looking for our mystery interrupt too
-			 */
-			if (desc->action && (desc->action->flags & IRQF_SHARED))
-				desc->status |= IRQ_PENDING;
-			spin_unlock(&desc->lock);
-			continue;
-		}
-		/* Honour the normal IRQ locking */
-		desc->status |= IRQ_INPROGRESS;
-		action = desc->action;
+	spin_lock(&desc->lock);
+	/* Already running on another processor */
+	if (desc->status & IRQ_INPROGRESS) {
+		/*
+		 * Already running: If it is shared get the other
+		 * CPU to go looking for our mystery interrupt too
+		 */
+		if (desc->action && (desc->action->flags & IRQF_SHARED))
+			desc->status |= IRQ_PENDING;
 		spin_unlock(&desc->lock);
+		return ok;
+	}
+	/* Honour the normal IRQ locking */
+	desc->status |= IRQ_INPROGRESS;
+	action = desc->action;
+	spin_unlock(&desc->lock);
 
-		while (action) {
-			/* Only shared IRQ handlers are safe to call */
-			if (action->flags & IRQF_SHARED) {
-				if (action->handler(i, action->dev_id) ==
-						IRQ_HANDLED)
-					ok = 1;
-			}
-			action = action->next;
+	while (action) {
+		/* Only shared IRQ handlers are safe to call */
+		if (action->flags & IRQF_SHARED) {
+			if (action->handler(irq, action->dev_id) ==
+				IRQ_HANDLED)
+				ok = 1;
 		}
-		local_irq_disable();
-		/* Now clean up the flags */
-		spin_lock(&desc->lock);
-		action = desc->action;
+		action = action->next;
+	}
+	local_irq_disable();
+	/* Now clean up the flags */
+	spin_lock(&desc->lock);
+	action = desc->action;
 
+	/*
+	 * While we were looking for a fixup someone queued a real
+	 * IRQ clashing with our walk:
+	 */
+	while ((desc->status & IRQ_PENDING) && action) {
 		/*
-		 * While we were looking for a fixup someone queued a real
-		 * IRQ clashing with our walk:
-		 */
-		while ((desc->status & IRQ_PENDING) && action) {
-			/*
-			 * Perform real IRQ processing for the IRQ we deferred
-			 */
-			work = 1;
-			spin_unlock(&desc->lock);
-			handle_IRQ_event(i, action);
-			spin_lock(&desc->lock);
-			desc->status &= ~IRQ_PENDING;
-		}
-		desc->status &= ~IRQ_INPROGRESS;
-		/*
-		 * If we did actual work for the real IRQ line we must let the
-		 * IRQ controller clean up too
+		 * Perform real IRQ processing for the IRQ we deferred
 		 */
-		if (work && desc->chip && desc->chip->end)
-			desc->chip->end(i);
+		work = 1;
 		spin_unlock(&desc->lock);
+		handle_IRQ_event(irq, action);
+		spin_lock(&desc->lock);
+		desc->status &= ~IRQ_PENDING;
+	}
+	desc->status &= ~IRQ_INPROGRESS;
+	/*
+	 * If we did actual work for the real IRQ line we must let the
+	 * IRQ controller clean up too
+	 */
+	if (work && desc->chip && desc->chip->end)
+		desc->chip->end(irq);
+	spin_unlock(&desc->lock);
+
+	return ok;
+}
+
+static int misrouted_irq(int irq)
+{
+	struct irq_desc *desc;
+	int i, ok = 0;
+
+	for_each_irq_desc(i, desc) {
+		if (!i)
+			 continue;
+
+		if (i == irq)	/* Already tried */
+			continue;
+
+		if (try_one_irq(i, desc))
+			ok = 1;
 	}
 	/* So the caller can adjust the irq error counts */
 	return ok;
 }
 
+static void poll_spurious_irqs(unsigned long dummy)
+{
+	struct irq_desc *desc;
+	int i;
+
+	for_each_irq_desc(i, desc) {
+		unsigned int status;
+
+		if (!i)
+			 continue;
+
+		/* Racy but it doesn't matter */
+		status = desc->status;
+		barrier();
+		if (!(status & IRQ_SPURIOUS_DISABLED))
+			continue;
+
+		try_one_irq(i, desc);
+	}
+
+	mod_timer(&poll_spurious_irq_timer,
+		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -137,7 +176,9 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
 	}
 }
 
-static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
+static inline int
+try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
+		  irqreturn_t action_ret)
 {
 	struct irqaction *action;
 
@@ -212,6 +253,9 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
 		desc->depth++;
 		desc->chip->disable(irq);
+
+		mod_timer(&poll_spurious_irq_timer,
+			  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 	}
 	desc->irqs_unhandled = 0;
 }
@@ -241,7 +285,7 @@ static int __init irqfixup_setup(char *str)
 
 __setup("irqfixup", irqfixup_setup);
 module_param(irqfixup, int, 0644);
-MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode");
+MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode, 2: irqpoll mode");
 
 static int __init irqpoll_setup(char *str)
 {
diff --git a/kernel/itimer.c b/kernel/itimer.c
index ab982747d9b..db7c358b9a0 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value)
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_virt_expires;
 		cinterval = tsk->signal->it_virt_incr;
 		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_struct *t = tsk;
-			cputime_t utime = tsk->signal->utime;
-			do {
-				utime = cputime_add(utime, t->utime);
-				t = next_thread(t);
-			} while (t != tsk);
+			struct task_cputime cputime;
+			cputime_t utime;
+
+			thread_group_cputime(tsk, &cputime);
+			utime = cputime.utime;
 			if (cputime_le(cval, utime)) { /* about to fire */
 				cval = jiffies_to_cputime(1);
 			} else {
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value)
 			}
 		}
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		cputime_to_timeval(cval, &value->it_value);
 		cputime_to_timeval(cinterval, &value->it_interval);
 		break;
 	case ITIMER_PROF:
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_prof_expires;
 		cinterval = tsk->signal->it_prof_incr;
 		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_struct *t = tsk;
-			cputime_t ptime = cputime_add(tsk->signal->utime,
-						      tsk->signal->stime);
-			do {
-				ptime = cputime_add(ptime,
-						    cputime_add(t->utime,
-								t->stime));
-				t = next_thread(t);
-			} while (t != tsk);
+			struct task_cputime times;
+			cputime_t ptime;
+
+			thread_group_cputime(tsk, &times);
+			ptime = cputime_add(times.utime, times.stime);
 			if (cputime_le(cval, ptime)) { /* about to fire */
 				cval = jiffies_to_cputime(1);
 			} else {
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value)
 			}
 		}
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		cputime_to_timeval(cval, &value->it_value);
 		cputime_to_timeval(cinterval, &value->it_interval);
 		break;
@@ -185,7 +176,6 @@ again:
 	case ITIMER_VIRTUAL:
 		nval = timeval_to_cputime(&value->it_value);
 		ninterval = timeval_to_cputime(&value->it_interval);
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_virt_expires;
 		cinterval = tsk->signal->it_virt_incr;
@@ -200,7 +190,6 @@ again:
 		tsk->signal->it_virt_expires = nval;
 		tsk->signal->it_virt_incr = ninterval;
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		if (ovalue) {
 			cputime_to_timeval(cval, &ovalue->it_value);
 			cputime_to_timeval(cinterval, &ovalue->it_interval);
@@ -209,7 +198,6 @@ again:
 	case ITIMER_PROF:
 		nval = timeval_to_cputime(&value->it_value);
 		ninterval = timeval_to_cputime(&value->it_interval);
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_prof_expires;
 		cinterval = tsk->signal->it_prof_incr;
@@ -224,7 +212,6 @@ again:
 		tsk->signal->it_prof_expires = nval;
 		tsk->signal->it_prof_incr = ninterval;
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		if (ovalue) {
 			cputime_to_timeval(cval, &ovalue->it_value);
 			cputime_to_timeval(cinterval, &ovalue->it_interval);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index aef265325cd..ac0fde7b54d 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -30,6 +30,7 @@
 #include <linux/pm.h>
 #include <linux/cpu.h>
 #include <linux/console.h>
+#include <linux/vmalloc.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1371,6 +1372,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_SYMBOL(node_online_map);
 	VMCOREINFO_SYMBOL(swapper_pg_dir);
 	VMCOREINFO_SYMBOL(_stext);
+	VMCOREINFO_SYMBOL(vmlist);
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 	VMCOREINFO_SYMBOL(mem_map);
@@ -1406,6 +1408,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_OFFSET(free_area, free_list);
 	VMCOREINFO_OFFSET(list_head, next);
 	VMCOREINFO_OFFSET(list_head, prev);
+	VMCOREINFO_OFFSET(vm_struct, addr);
 	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
 	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
 	VMCOREINFO_NUMBER(NR_FREE_PAGES);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710..8e7a7ce3ed0 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,6 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <trace/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -171,12 +172,11 @@ EXPORT_SYMBOL(kthread_create);
  */
 void kthread_bind(struct task_struct *k, unsigned int cpu)
 {
-	if (k->state != TASK_UNINTERRUPTIBLE) {
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
 		WARN_ON(1);
 		return;
 	}
-	/* Must have done schedule() in kthread() before we set_task_cpu */
-	wait_task_inactive(k, 0);
 	set_task_cpu(k, cpu);
 	k->cpus_allowed = cpumask_of_cpu(cpu);
 	k->rt.nr_cpus_allowed = 1;
@@ -206,6 +206,8 @@ int kthread_stop(struct task_struct *k)
 	/* It could exit after stop_info.k set, but before wake_up_process. */
 	get_task_struct(k);
 
+	trace_sched_kthread_stop(k);
+
 	/* Must init completion *before* thread sees kthread_stop_info.k */
 	init_completion(&kthread_stop_info.done);
 	smp_wmb();
@@ -221,6 +223,8 @@ int kthread_stop(struct task_struct *k)
 	ret = kthread_stop_info.err;
 	mutex_unlock(&kthread_stop_lock);
 
+	trace_sched_kthread_stop_ret(ret);
+
 	return ret;
 }
 EXPORT_SYMBOL(kthread_stop);
diff --git a/kernel/marker.c b/kernel/marker.c
index 7d1faecd7a5..e9c6b2bc940 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -62,7 +62,7 @@ struct marker_entry {
 	int refcount;	/* Number of times armed. 0 if disarmed. */
 	struct rcu_head rcu;
 	void *oldptr;
-	unsigned char rcu_pending:1;
+	int rcu_pending;
 	unsigned char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
@@ -103,11 +103,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
 	char ptype;
 
 	/*
-	 * preempt_disable does two things : disabling preemption to make sure
-	 * the teardown of the callbacks can be done correctly when they are in
-	 * modules and they insure RCU read coherency.
+	 * rcu_read_lock_sched does two things : disabling preemption to make
+	 * sure the teardown of the callbacks can be done correctly when they
+	 * are in modules and they insure RCU read coherency.
 	 */
-	preempt_disable();
+	rcu_read_lock_sched();
 	ptype = mdata->ptype;
 	if (likely(!ptype)) {
 		marker_probe_func *func;
@@ -145,7 +145,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
 			va_end(args);
 		}
 	}
-	preempt_enable();
+	rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb);
 
@@ -162,7 +162,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 	va_list args;	/* not initialized */
 	char ptype;
 
-	preempt_disable();
+	rcu_read_lock_sched();
 	ptype = mdata->ptype;
 	if (likely(!ptype)) {
 		marker_probe_func *func;
@@ -195,7 +195,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 			multi[i].func(multi[i].probe_private, call_private,
 				mdata->format, &args);
 	}
-	preempt_enable();
+	rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
@@ -560,7 +560,7 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
  * Disable a marker and its probe callback.
  * Note: only waiting an RCU period after setting elem->call to the empty
  * function insures that the original callback is not used anymore. This insured
- * by preempt_disable around the call site.
+ * by rcu_read_lock_sched around the call site.
  */
 static void disable_marker(struct marker *elem)
 {
@@ -653,11 +653,17 @@ int marker_probe_register(const char *name, const char *format,
 	entry = get_marker(name);
 	if (!entry) {
 		entry = add_marker(name, format);
-		if (IS_ERR(entry)) {
+		if (IS_ERR(entry))
 			ret = PTR_ERR(entry);
-			goto end;
-		}
+	} else if (format) {
+		if (!entry->format)
+			ret = marker_set_format(&entry, format);
+		else if (strcmp(entry->format, format))
+			ret = -EPERM;
 	}
+	if (ret)
+		goto end;
+
 	/*
 	 * If we detect that a call_rcu is pending for this marker,
 	 * make sure it's executed now.
@@ -674,6 +680,8 @@ int marker_probe_register(const char *name, const char *format,
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	entry->oldptr = old;
 	entry->rcu_pending = 1;
 	/* write rcu_pending before calling the RCU callback */
@@ -717,6 +725,8 @@ int marker_probe_unregister(const char *name,
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	entry->oldptr = old;
 	entry->rcu_pending = 1;
 	/* write rcu_pending before calling the RCU callback */
@@ -795,6 +805,8 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
 	mutex_lock(&markers_mutex);
 	entry = get_marker_from_private_data(probe, probe_private);
 	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	entry->oldptr = old;
 	entry->rcu_pending = 1;
 	/* write rcu_pending before calling the RCU callback */
diff --git a/kernel/module.c b/kernel/module.c
index 25bc9ac9e22..0d8d21ee792 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -46,6 +46,8 @@
 #include <asm/cacheflush.h>
 #include <linux/license.h>
 #include <asm/sections.h>
+#include <linux/tracepoint.h>
+#include <linux/ftrace.h>
 
 #if 0
 #define DEBUGP printk
@@ -1430,6 +1432,9 @@ static void free_module(struct module *mod)
 	/* Module unload stuff */
 	module_unload_free(mod);
 
+	/* release any pointers to mcount in this module */
+	ftrace_release(mod->module_core, mod->core_size);
+
 	/* This may be NULL, but that's OK */
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
@@ -1861,9 +1866,13 @@ static noinline struct module *load_module(void __user *umod,
 	unsigned int markersindex;
 	unsigned int markersstringsindex;
 	unsigned int verboseindex;
+	unsigned int tracepointsindex;
+	unsigned int tracepointsstringsindex;
+	unsigned int mcountindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+	void *mseg;
 	struct exception_table_entry *extable;
 	mm_segment_t old_fs;
 
@@ -2156,6 +2165,12 @@ static noinline struct module *load_module(void __user *umod,
  	markersstringsindex = find_sec(hdr, sechdrs, secstrings,
 					"__markers_strings");
 	verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
+	tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+	tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+					"__tracepoints_strings");
+
+	mcountindex = find_sec(hdr, sechdrs, secstrings,
+			       "__mcount_loc");
 
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
@@ -2183,6 +2198,12 @@ static noinline struct module *load_module(void __user *umod,
 	mod->num_markers =
 		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+	mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+	mod->num_tracepoints =
+		sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2201,12 +2222,22 @@ static noinline struct module *load_module(void __user *umod,
 
 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+	if (!mod->taints) {
 #ifdef CONFIG_MARKERS
-	if (!mod->taints)
 		marker_update_probe_range(mod->markers,
 			mod->markers + mod->num_markers);
 #endif
 	dynamic_printk_setup(sechdrs, verboseindex);
+#ifdef CONFIG_TRACEPOINTS
+		tracepoint_update_probe_range(mod->tracepoints,
+			mod->tracepoints + mod->num_tracepoints);
+#endif
+	}
+
+	/* sechdrs[0].sh_size is always zero */
+	mseg = (void *)sechdrs[mcountindex].sh_addr;
+	ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
+
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2276,6 +2307,7 @@ static noinline struct module *load_module(void __user *umod,
  cleanup:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
+	ftrace_release(mod->module_core, mod->core_size);
  free_unload:
 	module_unload_free(mod);
 	module_free(mod, mod->module_init);
@@ -2759,3 +2791,50 @@ void module_update_markers(void)
 	mutex_unlock(&module_mutex);
 }
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(mod, &modules, list)
+		if (!mod->taints)
+			tracepoint_update_probe_range(mod->tracepoints,
+				mod->tracepoints + mod->num_tracepoints);
+	mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module)
+				continue;
+			else if (iter_mod > iter->module)
+				iter->tracepoint = NULL;
+			found = tracepoint_get_iter_range(&iter->tracepoint,
+				iter_mod->tracepoints,
+				iter_mod->tracepoints
+					+ iter_mod->num_tracepoints);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&module_mutex);
+	return found;
+}
+#endif
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 823be11584e..4282c0a40a5 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
 	       struct pt_regs *regs, long err, int trap, int sig)
 {
 	struct die_args args = {
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c42a03aef36..153dcb2639c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -7,6 +7,93 @@
 #include <linux/errno.h>
 #include <linux/math64.h>
 #include <asm/uaccess.h>
+#include <linux/kernel_stat.h>
+
+/*
+ * Allocate the thread_group_cputime structure appropriately and fill in the
+ * current values of the fields.  Called from copy_signal() via
+ * thread_group_cputime_clone_thread() when adding a second or subsequent
+ * thread to a thread group.  Assumes interrupts are enabled when called.
+ */
+int thread_group_cputime_alloc(struct task_struct *tsk)
+{
+	struct signal_struct *sig = tsk->signal;
+	struct task_cputime *cputime;
+
+	/*
+	 * If we have multiple threads and we don't already have a
+	 * per-CPU task_cputime struct (checked in the caller), allocate
+	 * one and fill it in with the times accumulated so far.  We may
+	 * race with another thread so recheck after we pick up the sighand
+	 * lock.
+	 */
+	cputime = alloc_percpu(struct task_cputime);
+	if (cputime == NULL)
+		return -ENOMEM;
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (sig->cputime.totals) {
+		spin_unlock_irq(&tsk->sighand->siglock);
+		free_percpu(cputime);
+		return 0;
+	}
+	sig->cputime.totals = cputime;
+	cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
+	cputime->utime = tsk->utime;
+	cputime->stime = tsk->stime;
+	cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
+	spin_unlock_irq(&tsk->sighand->siglock);
+	return 0;
+}
+
+/**
+ * thread_group_cputime - Sum the thread group time fields across all CPUs.
+ *
+ * @tsk:	The task we use to identify the thread group.
+ * @times:	task_cputime structure in which we return the summed fields.
+ *
+ * Walk the list of CPUs to sum the per-CPU time fields in the thread group
+ * time structure.
+ */
+void thread_group_cputime(
+	struct task_struct *tsk,
+	struct task_cputime *times)
+{
+	struct signal_struct *sig;
+	int i;
+	struct task_cputime *tot;
+
+	sig = tsk->signal;
+	if (unlikely(!sig) || !sig->cputime.totals) {
+		times->utime = tsk->utime;
+		times->stime = tsk->stime;
+		times->sum_exec_runtime = tsk->se.sum_exec_runtime;
+		return;
+	}
+	times->stime = times->utime = cputime_zero;
+	times->sum_exec_runtime = 0;
+	for_each_possible_cpu(i) {
+		tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
+		times->utime = cputime_add(times->utime, tot->utime);
+		times->stime = cputime_add(times->stime, tot->stime);
+		times->sum_exec_runtime += tot->sum_exec_runtime;
+	}
+}
+
+/*
+ * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ */
+void update_rlimit_cpu(unsigned long rlim_new)
+{
+	cputime_t cputime;
+
+	cputime = secs_to_cputime(rlim_new);
+	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
+	    cputime_lt(current->signal->it_prof_expires, cputime)) {
+		spin_lock_irq(&current->sighand->siglock);
+		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+}
 
 static int check_clock(const clockid_t which_clock)
 {
@@ -158,10 +245,6 @@ static inline cputime_t virt_ticks(struct task_struct *p)
 {
 	return p->utime;
 }
-static inline unsigned long long sched_ns(struct task_struct *p)
-{
-	return task_sched_runtime(p);
-}
 
 int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
 {
@@ -211,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = sched_ns(p);
+		cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
 		break;
 	}
 	return 0;
@@ -220,59 +303,30 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 /*
  * Sample a process (thread group) clock for the given group_leader task.
  * Must be called with tasklist_lock held for reading.
- * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
  */
-static int cpu_clock_sample_group_locked(unsigned int clock_idx,
-					 struct task_struct *p,
-					 union cpu_time_count *cpu)
+static int cpu_clock_sample_group(const clockid_t which_clock,
+				  struct task_struct *p,
+				  union cpu_time_count *cpu)
 {
-	struct task_struct *t = p;
- 	switch (clock_idx) {
+	struct task_cputime cputime;
+
+	thread_group_cputime(p, &cputime);
+	switch (which_clock) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		cpu->cpu = cputime_add(p->signal->utime, p->signal->stime);
-		do {
-			cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
-			t = next_thread(t);
-		} while (t != p);
+		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
 		break;
 	case CPUCLOCK_VIRT:
-		cpu->cpu = p->signal->utime;
-		do {
-			cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
-			t = next_thread(t);
-		} while (t != p);
+		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = p->signal->sum_sched_runtime;
-		/* Add in each other live thread.  */
-		while ((t = next_thread(t)) != p) {
-			cpu->sched += t->se.sum_exec_runtime;
-		}
-		cpu->sched += sched_ns(p);
+		cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
 		break;
 	}
 	return 0;
 }
 
-/*
- * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
- */
-static int cpu_clock_sample_group(const clockid_t which_clock,
-				  struct task_struct *p,
-				  union cpu_time_count *cpu)
-{
-	int ret;
-	unsigned long flags;
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
-					    cpu);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	return ret;
-}
-
 
 int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
@@ -471,80 +525,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-	cleanup_timers(tsk->signal->cpu_timers,
-		       cputime_add(tsk->utime, tsk->signal->utime),
-		       cputime_add(tsk->stime, tsk->signal->stime),
-		     tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
-}
+	struct task_cputime cputime;
 
-
-/*
- * Set the expiry times of all the threads in the process so one of them
- * will go off before the process cumulative expiry total is reached.
- */
-static void process_timer_rebalance(struct task_struct *p,
-				    unsigned int clock_idx,
-				    union cpu_time_count expires,
-				    union cpu_time_count val)
-{
-	cputime_t ticks, left;
-	unsigned long long ns, nsleft;
- 	struct task_struct *t = p;
-	unsigned int nthreads = atomic_read(&p->signal->live);
-
-	if (!nthreads)
-		return;
-
-	switch (clock_idx) {
-	default:
-		BUG();
-		break;
-	case CPUCLOCK_PROF:
-		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-				       nthreads);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ticks = cputime_add(prof_ticks(t), left);
-				if (cputime_eq(t->it_prof_expires,
-					       cputime_zero) ||
-				    cputime_gt(t->it_prof_expires, ticks)) {
-					t->it_prof_expires = ticks;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	case CPUCLOCK_VIRT:
-		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-				       nthreads);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ticks = cputime_add(virt_ticks(t), left);
-				if (cputime_eq(t->it_virt_expires,
-					       cputime_zero) ||
-				    cputime_gt(t->it_virt_expires, ticks)) {
-					t->it_virt_expires = ticks;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	case CPUCLOCK_SCHED:
-		nsleft = expires.sched - val.sched;
-		do_div(nsleft, nthreads);
-		nsleft = max_t(unsigned long long, nsleft, 1);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ns = t->se.sum_exec_runtime + nsleft;
-				if (t->it_sched_expires == 0 ||
-				    t->it_sched_expires > ns) {
-					t->it_sched_expires = ns;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	}
+	thread_group_cputime(tsk, &cputime);
+	cleanup_timers(tsk->signal->cpu_timers,
+		       cputime.utime, cputime.stime, cputime.sum_exec_runtime);
 }
 
 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
@@ -608,29 +593,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 			default:
 				BUG();
 			case CPUCLOCK_PROF:
-				if (cputime_eq(p->it_prof_expires,
+				if (cputime_eq(p->cputime_expires.prof_exp,
 					       cputime_zero) ||
-				    cputime_gt(p->it_prof_expires,
+				    cputime_gt(p->cputime_expires.prof_exp,
 					       nt->expires.cpu))
-					p->it_prof_expires = nt->expires.cpu;
+					p->cputime_expires.prof_exp =
+						nt->expires.cpu;
 				break;
 			case CPUCLOCK_VIRT:
-				if (cputime_eq(p->it_virt_expires,
+				if (cputime_eq(p->cputime_expires.virt_exp,
 					       cputime_zero) ||
-				    cputime_gt(p->it_virt_expires,
+				    cputime_gt(p->cputime_expires.virt_exp,
 					       nt->expires.cpu))
-					p->it_virt_expires = nt->expires.cpu;
+					p->cputime_expires.virt_exp =
+						nt->expires.cpu;
 				break;
 			case CPUCLOCK_SCHED:
-				if (p->it_sched_expires == 0 ||
-				    p->it_sched_expires > nt->expires.sched)
-					p->it_sched_expires = nt->expires.sched;
+				if (p->cputime_expires.sched_exp == 0 ||
+				    p->cputime_expires.sched_exp >
+							nt->expires.sched)
+					p->cputime_expires.sched_exp =
+						nt->expires.sched;
 				break;
 			}
 		} else {
 			/*
-			 * For a process timer, we must balance
-			 * all the live threads' expirations.
+			 * For a process timer, set the cached expiration time.
 			 */
 			switch (CPUCLOCK_WHICH(timer->it_clock)) {
 			default:
@@ -641,7 +629,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				    cputime_lt(p->signal->it_virt_expires,
 					       timer->it.cpu.expires.cpu))
 					break;
-				goto rebalance;
+				p->signal->cputime_expires.virt_exp =
+					timer->it.cpu.expires.cpu;
+				break;
 			case CPUCLOCK_PROF:
 				if (!cputime_eq(p->signal->it_prof_expires,
 						cputime_zero) &&
@@ -652,13 +642,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				if (i != RLIM_INFINITY &&
 				    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
 					break;
-				goto rebalance;
+				p->signal->cputime_expires.prof_exp =
+					timer->it.cpu.expires.cpu;
+				break;
 			case CPUCLOCK_SCHED:
-			rebalance:
-				process_timer_rebalance(
-					timer->it.cpu.task,
-					CPUCLOCK_WHICH(timer->it_clock),
-					timer->it.cpu.expires, now);
+				p->signal->cputime_expires.sched_exp =
+					timer->it.cpu.expires.sched;
 				break;
 			}
 		}
@@ -969,13 +958,13 @@ static void check_thread_timers(struct task_struct *tsk,
 	struct signal_struct *const sig = tsk->signal;
 
 	maxfire = 20;
-	tsk->it_prof_expires = cputime_zero;
+	tsk->cputime_expires.prof_exp = cputime_zero;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
-			tsk->it_prof_expires = t->expires.cpu;
+			tsk->cputime_expires.prof_exp = t->expires.cpu;
 			break;
 		}
 		t->firing = 1;
@@ -984,13 +973,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->it_virt_expires = cputime_zero;
+	tsk->cputime_expires.virt_exp = cputime_zero;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
-			tsk->it_virt_expires = t->expires.cpu;
+			tsk->cputime_expires.virt_exp = t->expires.cpu;
 			break;
 		}
 		t->firing = 1;
@@ -999,13 +988,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->it_sched_expires = 0;
+	tsk->cputime_expires.sched_exp = 0;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
-			tsk->it_sched_expires = t->expires.sched;
+			tsk->cputime_expires.sched_exp = t->expires.sched;
 			break;
 		}
 		t->firing = 1;
@@ -1055,10 +1044,10 @@ static void check_process_timers(struct task_struct *tsk,
 {
 	int maxfire;
 	struct signal_struct *const sig = tsk->signal;
-	cputime_t utime, stime, ptime, virt_expires, prof_expires;
+	cputime_t utime, ptime, virt_expires, prof_expires;
 	unsigned long long sum_sched_runtime, sched_expires;
-	struct task_struct *t;
 	struct list_head *timers = sig->cpu_timers;
+	struct task_cputime cputime;
 
 	/*
 	 * Don't sample the current process CPU clocks if there are no timers.
@@ -1074,18 +1063,10 @@ static void check_process_timers(struct task_struct *tsk,
 	/*
 	 * Collect the current process totals.
 	 */
-	utime = sig->utime;
-	stime = sig->stime;
-	sum_sched_runtime = sig->sum_sched_runtime;
-	t = tsk;
-	do {
-		utime = cputime_add(utime, t->utime);
-		stime = cputime_add(stime, t->stime);
-		sum_sched_runtime += t->se.sum_exec_runtime;
-		t = next_thread(t);
-	} while (t != tsk);
-	ptime = cputime_add(utime, stime);
-
+	thread_group_cputime(tsk, &cputime);
+	utime = cputime.utime;
+	ptime = cputime_add(utime, cputime.stime);
+	sum_sched_runtime = cputime.sum_exec_runtime;
 	maxfire = 20;
 	prof_expires = cputime_zero;
 	while (!list_empty(timers)) {
@@ -1193,60 +1174,18 @@ static void check_process_timers(struct task_struct *tsk,
 		}
 	}
 
-	if (!cputime_eq(prof_expires, cputime_zero) ||
-	    !cputime_eq(virt_expires, cputime_zero) ||
-	    sched_expires != 0) {
-		/*
-		 * Rebalance the threads' expiry times for the remaining
-		 * process CPU timers.
-		 */
-
-		cputime_t prof_left, virt_left, ticks;
-		unsigned long long sched_left, sched;
-		const unsigned int nthreads = atomic_read(&sig->live);
-
-		if (!nthreads)
-			return;
-
-		prof_left = cputime_sub(prof_expires, utime);
-		prof_left = cputime_sub(prof_left, stime);
-		prof_left = cputime_div_non_zero(prof_left, nthreads);
-		virt_left = cputime_sub(virt_expires, utime);
-		virt_left = cputime_div_non_zero(virt_left, nthreads);
-		if (sched_expires) {
-			sched_left = sched_expires - sum_sched_runtime;
-			do_div(sched_left, nthreads);
-			sched_left = max_t(unsigned long long, sched_left, 1);
-		} else {
-			sched_left = 0;
-		}
-		t = tsk;
-		do {
-			if (unlikely(t->flags & PF_EXITING))
-				continue;
-
-			ticks = cputime_add(cputime_add(t->utime, t->stime),
-					    prof_left);
-			if (!cputime_eq(prof_expires, cputime_zero) &&
-			    (cputime_eq(t->it_prof_expires, cputime_zero) ||
-			     cputime_gt(t->it_prof_expires, ticks))) {
-				t->it_prof_expires = ticks;
-			}
-
-			ticks = cputime_add(t->utime, virt_left);
-			if (!cputime_eq(virt_expires, cputime_zero) &&
-			    (cputime_eq(t->it_virt_expires, cputime_zero) ||
-			     cputime_gt(t->it_virt_expires, ticks))) {
-				t->it_virt_expires = ticks;
-			}
-
-			sched = t->se.sum_exec_runtime + sched_left;
-			if (sched_expires && (t->it_sched_expires == 0 ||
-					      t->it_sched_expires > sched)) {
-				t->it_sched_expires = sched;
-			}
-		} while ((t = next_thread(t)) != tsk);
-	}
+	if (!cputime_eq(prof_expires, cputime_zero) &&
+	    (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
+	     cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
+		sig->cputime_expires.prof_exp = prof_expires;
+	if (!cputime_eq(virt_expires, cputime_zero) &&
+	    (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
+	     cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
+		sig->cputime_expires.virt_exp = virt_expires;
+	if (sched_expires != 0 &&
+	    (sig->cputime_expires.sched_exp == 0 ||
+	     sig->cputime_expires.sched_exp > sched_expires))
+		sig->cputime_expires.sched_exp = sched_expires;
 }
 
 /*
@@ -1314,6 +1253,86 @@ out:
 	++timer->it_requeue_pending;
 }
 
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:	The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+	if (cputime_eq(cputime->utime, cputime_zero) &&
+	    cputime_eq(cputime->stime, cputime_zero) &&
+	    cputime->sum_exec_runtime == 0)
+		return 1;
+	return 0;
+}
+
+/**
+ * task_cputime_expired - Compare two task_cputime entities.
+ *
+ * @sample:	The task_cputime structure to be checked for expiration.
+ * @expires:	Expiration times, against which @sample will be checked.
+ *
+ * Checks @sample against @expires to see if any field of @sample has expired.
+ * Returns true if any field of the former is greater than the corresponding
+ * field of the latter if the latter field is set.  Otherwise returns false.
+ */
+static inline int task_cputime_expired(const struct task_cputime *sample,
+					const struct task_cputime *expires)
+{
+	if (!cputime_eq(expires->utime, cputime_zero) &&
+	    cputime_ge(sample->utime, expires->utime))
+		return 1;
+	if (!cputime_eq(expires->stime, cputime_zero) &&
+	    cputime_ge(cputime_add(sample->utime, sample->stime),
+		       expires->stime))
+		return 1;
+	if (expires->sum_exec_runtime != 0 &&
+	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
+		return 1;
+	return 0;
+}
+
+/**
+ * fastpath_timer_check - POSIX CPU timers fast path.
+ *
+ * @tsk:	The task (thread) being checked.
+ *
+ * Check the task and thread group timers.  If both are zero (there are no
+ * timers set) return false.  Otherwise snapshot the task and thread group
+ * timers and compare them with the corresponding expiration times.  Return
+ * true if a timer has expired, else return false.
+ */
+static inline int fastpath_timer_check(struct task_struct *tsk)
+{
+	struct signal_struct *sig = tsk->signal;
+
+	if (unlikely(!sig))
+		return 0;
+
+	if (!task_cputime_zero(&tsk->cputime_expires)) {
+		struct task_cputime task_sample = {
+			.utime = tsk->utime,
+			.stime = tsk->stime,
+			.sum_exec_runtime = tsk->se.sum_exec_runtime
+		};
+
+		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+			return 1;
+	}
+	if (!task_cputime_zero(&sig->cputime_expires)) {
+		struct task_cputime group_sample;
+
+		thread_group_cputime(tsk, &group_sample);
+		if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+			return 1;
+	}
+	return 0;
+}
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.
@@ -1326,42 +1345,31 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 	BUG_ON(!irqs_disabled());
 
-#define UNEXPIRED(clock) \
-		(cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \
-		 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))
-
-	if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
-	    (tsk->it_sched_expires == 0 ||
-	     tsk->se.sum_exec_runtime < tsk->it_sched_expires))
+	/*
+	 * The fast path checks that there are no expired thread or thread
+	 * group timers.  If that's so, just return.
+	 */
+	if (!fastpath_timer_check(tsk))
 		return;
 
-#undef	UNEXPIRED
-
+	spin_lock(&tsk->sighand->siglock);
 	/*
-	 * Double-check with locks held.
+	 * Here we take off tsk->signal->cpu_timers[N] and
+	 * tsk->cpu_timers[N] all the timers that are firing, and
+	 * put them on the firing list.
 	 */
-	read_lock(&tasklist_lock);
-	if (likely(tsk->signal != NULL)) {
-		spin_lock(&tsk->sighand->siglock);
+	check_thread_timers(tsk, &firing);
+	check_process_timers(tsk, &firing);
 
-		/*
-		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-		 * all the timers that are firing, and put them on the firing list.
-		 */
-		check_thread_timers(tsk, &firing);
-		check_process_timers(tsk, &firing);
-
-		/*
-		 * We must release these locks before taking any timer's lock.
-		 * There is a potential race with timer deletion here, as the
-		 * siglock now protects our private firing list.  We have set
-		 * the firing flag in each timer, so that a deletion attempt
-		 * that gets the timer lock before we do will give it up and
-		 * spin until we've taken care of that timer below.
-		 */
-		spin_unlock(&tsk->sighand->siglock);
-	}
-	read_unlock(&tasklist_lock);
+	/*
+	 * We must release these locks before taking any timer's lock.
+	 * There is a potential race with timer deletion here, as the
+	 * siglock now protects our private firing list.  We have set
+	 * the firing flag in each timer, so that a deletion attempt
+	 * that gets the timer lock before we do will give it up and
+	 * spin until we've taken care of that timer below.
+	 */
+	spin_unlock(&tsk->sighand->siglock);
 
 	/*
 	 * Now that all the timers on our list have the firing flag,
@@ -1389,10 +1397,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 /*
  * Set one of the process-wide special case CPU timers.
- * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
- * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
- * absolute; non-null for ITIMER_*, where *newval is relative and we update
- * it to be absolute, *oldval is absolute and we update it to be relative.
+ * The tsk->sighand->siglock must be held by the caller.
+ * The *newval argument is relative and we update it to be absolute, *oldval
+ * is absolute and we update it to be relative.
  */
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval)
@@ -1401,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	struct list_head *head;
 
 	BUG_ON(clock_idx == CPUCLOCK_SCHED);
-	cpu_clock_sample_group_locked(clock_idx, tsk, &now);
+	cpu_clock_sample_group(clock_idx, tsk, &now);
 
 	if (oldval) {
 		if (!cputime_eq(*oldval, cputime_zero)) {
@@ -1435,13 +1442,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	    cputime_ge(list_first_entry(head,
 				  struct cpu_timer_list, entry)->expires.cpu,
 		       *newval)) {
-		/*
-		 * Rejigger each thread's expiry time so that one will
-		 * notice before we hit the process-cumulative expiry time.
-		 */
-		union cpu_time_count expires = { .sched = 0 };
-		expires.cpu = *newval;
-		process_timer_rebalance(tsk, clock_idx, expires, now);
+		switch (clock_idx) {
+		case CPUCLOCK_PROF:
+			tsk->signal->cputime_expires.prof_exp = *newval;
+			break;
+		case CPUCLOCK_VIRT:
+			tsk->signal->cputime_expires.virt_exp = *newval;
+			break;
+		}
 	}
 }
 
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5131e547116..b931d7cedbf 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -223,6 +223,15 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
 }
 
 /*
+ * Get monotonic time for posix timers
+ */
+static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
+{
+	getrawmonotonic(tp);
+	return 0;
+}
+
+/*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
 static __init int init_posix_timers(void)
@@ -235,9 +244,15 @@ static __init int init_posix_timers(void)
 		.clock_get = posix_ktime_get_ts,
 		.clock_set = do_posix_clock_nosettime,
 	};
+	struct k_clock clock_monotonic_raw = {
+		.clock_getres = hrtimer_get_res,
+		.clock_get = posix_get_monotonic_raw,
+		.clock_set = do_posix_clock_nosettime,
+	};
 
 	register_posix_clock(CLOCK_REALTIME, &clock_realtime);
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
+	register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, SLAB_PANIC,
@@ -298,6 +313,7 @@ void do_schedule_next_timer(struct siginfo *info)
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
+	int shared, ret;
 	/*
 	 * FIXME: if ->sigq is queued we can race with
 	 * dequeue_signal()->do_schedule_next_timer().
@@ -311,25 +327,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	 */
 	timr->sigq->info.si_sys_private = si_private;
 
-	timr->sigq->info.si_signo = timr->it_sigev_signo;
-	timr->sigq->info.si_code = SI_TIMER;
-	timr->sigq->info.si_tid = timr->it_id;
-	timr->sigq->info.si_value = timr->it_sigev_value;
-
-	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
-		struct task_struct *leader;
-		int ret = send_sigqueue(timr->sigq, timr->it_process, 0);
-
-		if (likely(ret >= 0))
-			return ret;
-
-		timr->it_sigev_notify = SIGEV_SIGNAL;
-		leader = timr->it_process->group_leader;
-		put_task_struct(timr->it_process);
-		timr->it_process = leader;
-	}
-
-	return send_sigqueue(timr->sigq, timr->it_process, 1);
+	shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+	ret = send_sigqueue(timr->sigq, timr->it_process, shared);
+	/* If we failed to send the signal the timer stops. */
+	return ret > 0;
 }
 EXPORT_SYMBOL_GPL(posix_timer_event);
 
@@ -468,11 +469,9 @@ sys_timer_create(const clockid_t which_clock,
 		 struct sigevent __user *timer_event_spec,
 		 timer_t __user * created_timer_id)
 {
-	int error = 0;
-	struct k_itimer *new_timer = NULL;
-	int new_timer_id;
-	struct task_struct *process = NULL;
-	unsigned long flags;
+	struct k_itimer *new_timer;
+	int error, new_timer_id;
+	struct task_struct *process;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
 
@@ -490,12 +489,11 @@ sys_timer_create(const clockid_t which_clock,
 		goto out;
 	}
 	spin_lock_irq(&idr_lock);
-	error = idr_get_new(&posix_timers_id, (void *) new_timer,
-			    &new_timer_id);
+	error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id);
 	spin_unlock_irq(&idr_lock);
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error) {
+	if (error) {
+		if (error == -EAGAIN)
+			goto retry;
 		/*
 		 * Weird looking, but we return EAGAIN if the IDR is
 		 * full (proper POSIX return value for this)
@@ -526,67 +524,43 @@ sys_timer_create(const clockid_t which_clock,
 			error = -EFAULT;
 			goto out;
 		}
-		new_timer->it_sigev_notify = event.sigev_notify;
-		new_timer->it_sigev_signo = event.sigev_signo;
-		new_timer->it_sigev_value = event.sigev_value;
-
-		read_lock(&tasklist_lock);
-		if ((process = good_sigevent(&event))) {
-			/*
-			 * We may be setting up this process for another
-			 * thread.  It may be exiting.  To catch this
-			 * case the we check the PF_EXITING flag.  If
-			 * the flag is not set, the siglock will catch
-			 * him before it is too late (in exit_itimers).
-			 *
-			 * The exec case is a bit more invloved but easy
-			 * to code.  If the process is in our thread
-			 * group (and it must be or we would not allow
-			 * it here) and is doing an exec, it will cause
-			 * us to be killed.  In this case it will wait
-			 * for us to die which means we can finish this
-			 * linkage with our last gasp. I.e. no code :)
-			 */
-			spin_lock_irqsave(&process->sighand->siglock, flags);
-			if (!(process->flags & PF_EXITING)) {
-				new_timer->it_process = process;
-				list_add(&new_timer->list,
-					 &process->signal->posix_timers);
-				if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-					get_task_struct(process);
-				spin_unlock_irqrestore(&process->sighand->siglock, flags);
-			} else {
-				spin_unlock_irqrestore(&process->sighand->siglock, flags);
-				process = NULL;
-			}
-		}
-		read_unlock(&tasklist_lock);
+		rcu_read_lock();
+		process = good_sigevent(&event);
+		if (process)
+			get_task_struct(process);
+		rcu_read_unlock();
 		if (!process) {
 			error = -EINVAL;
 			goto out;
 		}
 	} else {
-		new_timer->it_sigev_notify = SIGEV_SIGNAL;
-		new_timer->it_sigev_signo = SIGALRM;
-		new_timer->it_sigev_value.sival_int = new_timer->it_id;
+		event.sigev_notify = SIGEV_SIGNAL;
+		event.sigev_signo = SIGALRM;
+		event.sigev_value.sival_int = new_timer->it_id;
 		process = current->group_leader;
-		spin_lock_irqsave(&process->sighand->siglock, flags);
-		new_timer->it_process = process;
-		list_add(&new_timer->list, &process->signal->posix_timers);
-		spin_unlock_irqrestore(&process->sighand->siglock, flags);
+		get_task_struct(process);
 	}
 
+	new_timer->it_sigev_notify     = event.sigev_notify;
+	new_timer->sigq->info.si_signo = event.sigev_signo;
+	new_timer->sigq->info.si_value = event.sigev_value;
+	new_timer->sigq->info.si_tid   = new_timer->it_id;
+	new_timer->sigq->info.si_code  = SI_TIMER;
+
+	spin_lock_irq(&current->sighand->siglock);
+	new_timer->it_process = process;
+	list_add(&new_timer->list, &current->signal->posix_timers);
+	spin_unlock_irq(&current->sighand->siglock);
+
+	return 0;
  	/*
 	 * In the case of the timer belonging to another task, after
 	 * the task is unlocked, the timer is owned by the other task
 	 * and may cease to exist at any time.  Don't use or modify
 	 * new_timer after the unlock call.
 	 */
-
 out:
-	if (error)
-		release_posix_timer(new_timer, it_id_set);
-
+	release_posix_timer(new_timer, it_id_set);
 	return error;
 }
 
@@ -597,7 +571,7 @@ out:
  * the find to the timer lock.  To avoid a dead lock, the timer id MUST
  * be release with out holding the timer lock.
  */
-static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
+static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
 {
 	struct k_itimer *timr;
 	/*
@@ -605,23 +579,20 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
 	 * flags part over to the timer lock.  Must not let interrupts in
 	 * while we are moving the lock.
 	 */
-
 	spin_lock_irqsave(&idr_lock, *flags);
-	timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id);
+	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
 		spin_lock(&timr->it_lock);
-
-		if ((timr->it_id != timer_id) || !(timr->it_process) ||
-				!same_thread_group(timr->it_process, current)) {
-			spin_unlock(&timr->it_lock);
-			spin_unlock_irqrestore(&idr_lock, *flags);
-			timr = NULL;
-		} else
+		if (timr->it_process &&
+		    same_thread_group(timr->it_process, current)) {
 			spin_unlock(&idr_lock);
-	} else
-		spin_unlock_irqrestore(&idr_lock, *flags);
+			return timr;
+		}
+		spin_unlock(&timr->it_lock);
+	}
+	spin_unlock_irqrestore(&idr_lock, *flags);
 
-	return timr;
+	return NULL;
 }
 
 /*
@@ -862,8 +833,7 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-		put_task_struct(timer->it_process);
+	put_task_struct(timer->it_process);
 	timer->it_process = NULL;
 
 	unlock_timer(timer, flags);
@@ -890,8 +860,7 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-		put_task_struct(timer->it_process);
+	put_task_struct(timer->it_process);
 	timer->it_process = NULL;
 
 	unlock_timer(timer, flags);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 278946aecaf..ca634019497 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -28,121 +28,6 @@ static inline int freezeable(struct task_struct * p)
 	return 1;
 }
 
-/*
- * freezing is complete, mark current process as frozen
- */
-static inline void frozen_process(void)
-{
-	if (!unlikely(current->flags & PF_NOFREEZE)) {
-		current->flags |= PF_FROZEN;
-		wmb();
-	}
-	clear_freeze_flag(current);
-}
-
-/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
-{
-	/* Hmm, should we be allowed to suspend when there are realtime
-	   processes around? */
-	long save;
-
-	task_lock(current);
-	if (freezing(current)) {
-		frozen_process();
-		task_unlock(current);
-	} else {
-		task_unlock(current);
-		return;
-	}
-	save = current->state;
-	pr_debug("%s entered refrigerator\n", current->comm);
-
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
-	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!frozen(current))
-			break;
-		schedule();
-	}
-	pr_debug("%s left refrigerator\n", current->comm);
-	__set_current_state(save);
-}
-
-static void fake_signal_wake_up(struct task_struct *p)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	signal_wake_up(p, 0);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
-}
-
-static inline bool should_send_signal(struct task_struct *p)
-{
-	return !(p->flags & PF_FREEZER_NOSIG);
-}
-
-/**
- *	freeze_task - send a freeze request to given task
- *	@p: task to send the request to
- *	@sig_only: if set, the request will only be sent if the task has the
- *		PF_FREEZER_NOSIG flag unset
- *	Return value: 'false', if @sig_only is set and the task has
- *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
- *
- *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- *	either sending a fake signal to it or waking it up, depending on whether
- *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
- *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- *	TIF_FREEZE flag will not be set.
- */
-static bool freeze_task(struct task_struct *p, bool sig_only)
-{
-	/*
-	 * We first check if the task is freezing and next if it has already
-	 * been frozen to avoid the race with frozen_process() which first marks
-	 * the task as frozen and next clears its TIF_FREEZE.
-	 */
-	if (!freezing(p)) {
-		rmb();
-		if (frozen(p))
-			return false;
-
-		if (!sig_only || should_send_signal(p))
-			set_freeze_flag(p);
-		else
-			return false;
-	}
-
-	if (should_send_signal(p)) {
-		if (!signal_pending(p))
-			fake_signal_wake_up(p);
-	} else if (sig_only) {
-		return false;
-	} else {
-		wake_up_state(p, TASK_INTERRUPTIBLE);
-	}
-
-	return true;
-}
-
-static void cancel_freezing(struct task_struct *p)
-{
-	unsigned long flags;
-
-	if (freezing(p)) {
-		pr_debug("  clean up: %s\n", p->comm);
-		clear_freeze_flag(p);
-		spin_lock_irqsave(&p->sighand->siglock, flags);
-		recalc_sigpending_and_wake(p);
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	}
-}
-
 static int try_to_freeze_tasks(bool sig_only)
 {
 	struct task_struct *g, *p;
@@ -250,6 +135,9 @@ static void thaw_tasks(bool nosig_only)
 		if (nosig_only && should_send_signal(p))
 			continue;
 
+		if (cgroup_frozen(p))
+			continue;
+
 		thaw_process(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
@@ -264,4 +152,3 @@ void thaw_processes(void)
 	printk("done.\n");
 }
 
-EXPORT_SYMBOL(refrigerator);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 356699a96d5..1e68e4c39e2 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -45,7 +45,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
  * TASK_TRACED, resume it now.
  * Requires that irqs be disabled.
  */
-void ptrace_untrace(struct task_struct *child)
+static void ptrace_untrace(struct task_struct *child)
 {
 	spin_lock(&child->sighand->siglock);
 	if (task_is_traced(child)) {
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ca4bbbe04aa..59236e8b9da 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -54,9 +54,9 @@
 #include <linux/cpu.h>
 #include <linux/random.h>
 #include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/cpumask.h>
 #include <linux/rcupreempt_trace.h>
+#include <asm/byteorder.h>
 
 /*
  * PREEMPT_RCU data structures.
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 90b5b123f7a..85cb90588a5 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -42,10 +42,10 @@
 #include <linux/freezer.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/stat.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <asm/byteorder.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f230596bd0..d906f72b42d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -1936,6 +1937,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		 * just go back and repeat.
 		 */
 		rq = task_rq_lock(p, &flags);
+		trace_sched_wait_task(rq, p);
 		running = task_running(rq, p);
 		on_rq = p->se.on_rq;
 		ncsw = 0;
@@ -2297,9 +2299,7 @@ out_activate:
 	success = 1;
 
 out_running:
-	trace_mark(kernel_sched_wakeup,
-		"pid %d state %ld ## rq %p task %p rq->curr %p",
-		p->pid, p->state, rq, p, rq->curr);
+	trace_sched_wakeup(rq, p);
 	check_preempt_curr(rq, p, sync);
 
 	p->state = TASK_RUNNING;
@@ -2432,9 +2432,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		p->sched_class->task_new(rq, p);
 		inc_nr_running(rq);
 	}
-	trace_mark(kernel_sched_wakeup_new,
-		"pid %d state %ld ## rq %p task %p rq->curr %p",
-		p->pid, p->state, rq, p, rq->curr);
+	trace_sched_wakeup_new(rq, p);
 	check_preempt_curr(rq, p, 0);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
@@ -2607,11 +2605,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	struct mm_struct *mm, *oldmm;
 
 	prepare_task_switch(rq, prev, next);
-	trace_mark(kernel_sched_schedule,
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		prev->pid, next->pid, prev->state,
-		rq, prev, next);
+	trace_sched_switch(rq, prev, next);
 	mm = next->mm;
 	oldmm = prev->active_mm;
 	/*
@@ -2851,6 +2845,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 	    || unlikely(!cpu_active(dest_cpu)))
 		goto out;
 
+	trace_sched_migrate_task(rq, p, dest_cpu);
 	/* force the process onto the specified CPU */
 	if (migrate_task(p, dest_cpu, &req)) {
 		/* Need to wait for migration thread (might exit: take ref). */
@@ -4052,23 +4047,26 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
 EXPORT_PER_CPU_SYMBOL(kstat);
 
 /*
- * Return p->sum_exec_runtime plus any more ns on the sched_clock
- * that have not yet been banked in case the task is currently running.
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
  */
-unsigned long long task_sched_runtime(struct task_struct *p)
+unsigned long long task_delta_exec(struct task_struct *p)
 {
 	unsigned long flags;
-	u64 ns, delta_exec;
 	struct rq *rq;
+	u64 ns = 0;
 
 	rq = task_rq_lock(p, &flags);
-	ns = p->se.sum_exec_runtime;
+
 	if (task_current(rq, p)) {
+		u64 delta_exec;
+
 		update_rq_clock(rq);
 		delta_exec = rq->clock - p->se.exec_start;
 		if ((s64)delta_exec > 0)
-			ns += delta_exec;
+			ns = delta_exec;
 	}
+
 	task_rq_unlock(rq, &flags);
 
 	return ns;
@@ -4085,6 +4083,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 	cputime64_t tmp;
 
 	p->utime = cputime_add(p->utime, cputime);
+	account_group_user_time(p, cputime);
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
@@ -4109,6 +4108,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
 	tmp = cputime_to_cputime64(cputime);
 
 	p->utime = cputime_add(p->utime, cputime);
+	account_group_user_time(p, cputime);
 	p->gtime = cputime_add(p->gtime, cputime);
 
 	cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -4144,6 +4144,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	}
 
 	p->stime = cputime_add(p->stime, cputime);
+	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
@@ -4185,6 +4186,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 
 	if (p == rq->idle) {
 		p->stime = cputime_add(p->stime, steal);
+		account_group_system_time(p, steal);
 		if (atomic_read(&rq->nr_iowait) > 0)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 		else
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 18fd17172eb..f604dae7131 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -449,6 +449,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 		struct task_struct *curtask = task_of(curr);
 
 		cpuacct_charge(curtask, delta_exec);
+		account_group_exec_runtime(curtask, delta_exec);
 	}
 }
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index cdf5740ab03..b446dc87494 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -526,6 +526,8 @@ static void update_curr_rt(struct rq *rq)
 	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
+	account_group_exec_runtime(curr, delta_exec);
+
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
 
@@ -1458,7 +1460,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 		p->rt.timeout++;
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		if (p->rt.timeout > next)
-			p->it_sched_expires = p->se.sum_exec_runtime;
+			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
 	}
 }
 
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8385d43987e..b8c156979cf 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -270,3 +270,89 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 #define sched_info_switch(t, next)		do { } while (0)
 #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick.  None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+/**
+ * account_group_user_time - Maintain utime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the utime field of the
+ *		thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+					   cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+		times->utime = cputime_add(times->utime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * account_group_system_time - Maintain stime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the stime field of the
+ *		thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void account_group_system_time(struct task_struct *tsk,
+					     cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+		times->stime = cputime_add(times->stime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of the thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					      unsigned long long ns)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+		times->sum_exec_runtime += ns;
+		put_cpu_no_resched();
+	}
+}
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340..105217da5c8 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,6 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
+#include <trace/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
 	struct sigpending *pending;
 	struct sigqueue *q;
 
+	trace_sched_signal_send(sig, t);
+
 	assert_spin_locked(&t->sighand->siglock);
 	if (!prepare_signal(sig, t))
 		return 0;
@@ -1338,6 +1341,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 	struct siginfo info;
 	unsigned long flags;
 	struct sighand_struct *psig;
+	struct task_cputime cputime;
 	int ret = sig;
 
 	BUG_ON(sig == -1);
@@ -1368,10 +1372,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 
 	info.si_uid = tsk->uid;
 
-	info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
-						       tsk->signal->utime));
-	info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
-						       tsk->signal->stime));
+	thread_group_cputime(tsk, &cputime);
+	info.si_utime = cputime_to_jiffies(cputime.utime);
+	info.si_stime = cputime_to_jiffies(cputime.stime);
 
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 83ba21a13bd..7110daeb9a9 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -267,16 +267,12 @@ asmlinkage void do_softirq(void)
  */
 void irq_enter(void)
 {
-#ifdef CONFIG_NO_HZ
 	int cpu = smp_processor_id();
+
 	if (idle_cpu(cpu) && !in_interrupt())
-		tick_nohz_stop_idle(cpu);
-#endif
+		tick_check_idle(cpu);
+
 	__irq_enter();
-#ifdef CONFIG_NO_HZ
-	if (idle_cpu(cpu))
-		tick_nohz_update_jiffies();
-#endif
 }
 
 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/sys.c b/kernel/sys.c
index 0bc8fa3c228..53879cdae48 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid)
 	return old_fsgid;
 }
 
+void do_sys_times(struct tms *tms)
+{
+	struct task_cputime cputime;
+	cputime_t cutime, cstime;
+
+	spin_lock_irq(&current->sighand->siglock);
+	thread_group_cputime(current, &cputime);
+	cutime = current->signal->cutime;
+	cstime = current->signal->cstime;
+	spin_unlock_irq(&current->sighand->siglock);
+	tms->tms_utime = cputime_to_clock_t(cputime.utime);
+	tms->tms_stime = cputime_to_clock_t(cputime.stime);
+	tms->tms_cutime = cputime_to_clock_t(cutime);
+	tms->tms_cstime = cputime_to_clock_t(cstime);
+}
+
 asmlinkage long sys_times(struct tms __user * tbuf)
 {
-	/*
-	 *	In the SMP world we might just be unlucky and have one of
-	 *	the times increment as we use it. Since the value is an
-	 *	atomically safe type this is just fine. Conceptually its
-	 *	as if the syscall took an instant longer to occur.
-	 */
 	if (tbuf) {
 		struct tms tmp;
-		struct task_struct *tsk = current;
-		struct task_struct *t;
-		cputime_t utime, stime, cutime, cstime;
-
-		spin_lock_irq(&tsk->sighand->siglock);
-		utime = tsk->signal->utime;
-		stime = tsk->signal->stime;
-		t = tsk;
-		do {
-			utime = cputime_add(utime, t->utime);
-			stime = cputime_add(stime, t->stime);
-			t = next_thread(t);
-		} while (t != tsk);
-
-		cutime = tsk->signal->cutime;
-		cstime = tsk->signal->cstime;
-		spin_unlock_irq(&tsk->sighand->siglock);
-
-		tmp.tms_utime = cputime_to_clock_t(utime);
-		tmp.tms_stime = cputime_to_clock_t(stime);
-		tmp.tms_cutime = cputime_to_clock_t(cutime);
-		tmp.tms_cstime = cputime_to_clock_t(cstime);
+
+		do_sys_times(&tmp);
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
@@ -1449,7 +1439,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 {
 	struct rlimit new_rlim, *old_rlim;
-	unsigned long it_prof_secs;
 	int retval;
 
 	if (resource >= RLIM_NLIMITS)
@@ -1503,18 +1492,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 	if (new_rlim.rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
-	if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
-		unsigned long rlim_cur = new_rlim.rlim_cur;
-		cputime_t cputime;
-
-		cputime = secs_to_cputime(rlim_cur);
-		read_lock(&tasklist_lock);
-		spin_lock_irq(&current->sighand->siglock);
-		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
-		spin_unlock_irq(&current->sighand->siglock);
-		read_unlock(&tasklist_lock);
-	}
+	update_rlimit_cpu(new_rlim.rlim_cur);
 out:
 	return 0;
 }
@@ -1552,11 +1530,8 @@ out:
  *
  */
 
-static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r,
-				     cputime_t *utimep, cputime_t *stimep)
+static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
 {
-	*utimep = cputime_add(*utimep, t->utime);
-	*stimep = cputime_add(*stimep, t->stime);
 	r->ru_nvcsw += t->nvcsw;
 	r->ru_nivcsw += t->nivcsw;
 	r->ru_minflt += t->min_flt;
@@ -1570,12 +1545,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	struct task_struct *t;
 	unsigned long flags;
 	cputime_t utime, stime;
+	struct task_cputime cputime;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
 
 	if (who == RUSAGE_THREAD) {
-		accumulate_thread_rusage(p, r, &utime, &stime);
+		accumulate_thread_rusage(p, r);
 		goto out;
 	}
 
@@ -1598,8 +1574,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 				break;
 
 		case RUSAGE_SELF:
-			utime = cputime_add(utime, p->signal->utime);
-			stime = cputime_add(stime, p->signal->stime);
+			thread_group_cputime(p, &cputime);
+			utime = cputime_add(utime, cputime.utime);
+			stime = cputime_add(stime, cputime.stime);
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
@@ -1608,7 +1585,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_oublock += p->signal->oublock;
 			t = p;
 			do {
-				accumulate_thread_rusage(t, r, &utime, &stime);
+				accumulate_thread_rusage(t, r);
 				t = next_thread(t);
 			} while (t != p);
 			break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a..b3cc73931d1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = {
 		.proc_handler   = &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "scan_unevictable_pages",
+		.data		= &scan_unevictable_pages,
+		.maxlen		= sizeof(scan_unevictable_pages),
+		.mode		= 0644,
+		.proc_handler	= &scan_unevictable_handler,
+	},
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 093d4acf993..9ed2eec9752 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -325,6 +325,9 @@ int clocksource_register(struct clocksource *c)
 	unsigned long flags;
 	int ret;
 
+	/* save mult_orig on registration */
+	c->mult_orig = c->mult;
+
 	spin_lock_irqsave(&clocksource_lock, flags);
 	ret = clocksource_enqueue(c);
 	if (!ret)
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 4c256fdb887..1ca99557e92 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -61,6 +61,7 @@ struct clocksource clocksource_jiffies = {
 	.read		= jiffies_read,
 	.mask		= 0xffffffff, /*32bits*/
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
+	.mult_orig	= NSEC_PER_JIFFY << JIFFIES_SHIFT,
 	.shift		= JIFFIES_SHIFT,
 };
 
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 1ad46f3df6e..1a20715bfd6 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -10,13 +10,13 @@
 
 #include <linux/mm.h>
 #include <linux/time.h>
-#include <linux/timer.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/hrtimer.h>
 #include <linux/capability.h>
 #include <linux/math64.h>
 #include <linux/clocksource.h>
+#include <linux/workqueue.h>
 #include <asm/timex.h>
 
 /*
@@ -218,11 +218,11 @@ void second_overflow(void)
 /* Disable the cmos update - used by virtualization and embedded */
 int no_sync_cmos_clock  __read_mostly;
 
-static void sync_cmos_clock(unsigned long dummy);
+static void sync_cmos_clock(struct work_struct *work);
 
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
+static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
 
-static void sync_cmos_clock(unsigned long dummy)
+static void sync_cmos_clock(struct work_struct *work)
 {
 	struct timespec now, next;
 	int fail = 1;
@@ -258,13 +258,13 @@ static void sync_cmos_clock(unsigned long dummy)
 		next.tv_sec++;
 		next.tv_nsec -= NSEC_PER_SEC;
 	}
-	mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next));
+	schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
 }
 
 static void notify_cmos_timer(void)
 {
 	if (!no_sync_cmos_clock)
-		mod_timer(&sync_cmos_timer, jiffies + 1);
+		schedule_delayed_work(&sync_cmos_work, 0);
 }
 
 #else
@@ -277,38 +277,50 @@ static inline void notify_cmos_timer(void) { }
 int do_adjtimex(struct timex *txc)
 {
 	struct timespec ts;
-	long save_adjust, sec;
 	int result;
 
-	/* In order to modify anything, you gotta be super-user! */
-	if (txc->modes && !capable(CAP_SYS_TIME))
-		return -EPERM;
-
-	/* Now we validate the data before disabling interrupts */
-
-	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) {
+	/* Validate the data before disabling interrupts */
+	if (txc->modes & ADJ_ADJTIME) {
 		/* singleshot must not be used with any other mode bits */
-		if (txc->modes & ~ADJ_OFFSET_SS_READ)
+		if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
 			return -EINVAL;
+		if (!(txc->modes & ADJ_OFFSET_READONLY) &&
+		    !capable(CAP_SYS_TIME))
+			return -EPERM;
+	} else {
+		/* In order to modify anything, you gotta be super-user! */
+		 if (txc->modes && !capable(CAP_SYS_TIME))
+			return -EPERM;
+
+		/* if the quartz is off by more than 10% something is VERY wrong! */
+		if (txc->modes & ADJ_TICK &&
+		    (txc->tick <  900000/USER_HZ ||
+		     txc->tick > 1100000/USER_HZ))
+				return -EINVAL;
+
+		if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
+			hrtimer_cancel(&leap_timer);
 	}
 
-	/* if the quartz is off by more than 10% something is VERY wrong ! */
-	if (txc->modes & ADJ_TICK)
-		if (txc->tick <  900000/USER_HZ ||
-		    txc->tick > 1100000/USER_HZ)
-			return -EINVAL;
-
-	if (time_state != TIME_OK && txc->modes & ADJ_STATUS)
-		hrtimer_cancel(&leap_timer);
 	getnstimeofday(&ts);
 
 	write_seqlock_irq(&xtime_lock);
 
-	/* Save for later - semantics of adjtime is to return old value */
-	save_adjust = time_adjust;
-
 	/* If there are input parameters, then process them */
+	if (txc->modes & ADJ_ADJTIME) {
+		long save_adjust = time_adjust;
+
+		if (!(txc->modes & ADJ_OFFSET_READONLY)) {
+			/* adjtime() is independent from ntp_adjtime() */
+			time_adjust = txc->offset;
+			ntp_update_frequency();
+		}
+		txc->offset = save_adjust;
+		goto adj_done;
+	}
 	if (txc->modes) {
+		long sec;
+
 		if (txc->modes & ADJ_STATUS) {
 			if ((time_status & STA_PLL) &&
 			    !(txc->status & STA_PLL)) {
@@ -375,13 +387,8 @@ int do_adjtimex(struct timex *txc)
 		if (txc->modes & ADJ_TAI && txc->constant > 0)
 			time_tai = txc->constant;
 
-		if (txc->modes & ADJ_OFFSET) {
-			if (txc->modes == ADJ_OFFSET_SINGLESHOT)
-				/* adjtime() is independent from ntp_adjtime() */
-				time_adjust = txc->offset;
-			else
-				ntp_update_offset(txc->offset);
-		}
+		if (txc->modes & ADJ_OFFSET)
+			ntp_update_offset(txc->offset);
 		if (txc->modes & ADJ_TICK)
 			tick_usec = txc->tick;
 
@@ -389,22 +396,18 @@ int do_adjtimex(struct timex *txc)
 			ntp_update_frequency();
 	}
 
+	txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
+				  NTP_SCALE_SHIFT);
+	if (!(time_status & STA_NANO))
+		txc->offset /= NSEC_PER_USEC;
+
+adj_done:
 	result = time_state;	/* mostly `TIME_OK' */
 	if (time_status & (STA_UNSYNC|STA_CLOCKERR))
 		result = TIME_ERROR;
 
-	if ((txc->modes == ADJ_OFFSET_SINGLESHOT) ||
-	    (txc->modes == ADJ_OFFSET_SS_READ))
-		txc->offset = save_adjust;
-	else {
-		txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
-					  NTP_SCALE_SHIFT);
-		if (!(time_status & STA_NANO))
-			txc->offset /= NSEC_PER_USEC;
-	}
-	txc->freq	   = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) *
-					 (s64)PPM_SCALE_INV,
-					 NTP_SCALE_SHIFT);
+	txc->freq	   = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
+					 (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
 	txc->maxerror	   = time_maxerror;
 	txc->esterror	   = time_esterror;
 	txc->status	   = time_status;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index cb01cd8f919..f98a1b7b16e 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -384,6 +384,19 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 }
 
 /*
+ * Called from irq_enter() when idle was interrupted to reenable the
+ * per cpu device.
+ */
+void tick_check_oneshot_broadcast(int cpu)
+{
+	if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
+		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
+
+		clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
+	}
+}
+
+/*
  * Handle oneshot mode broadcasting
  */
 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 469248782c2..b1c05bf75ee 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -36,6 +36,7 @@ extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
 extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_active(void);
+extern void tick_check_oneshot_broadcast(int cpu);
 # else /* BROADCAST */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
@@ -45,6 +46,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
+static inline void tick_check_oneshot_broadcast(int cpu) { }
 # endif /* !BROADCAST */
 
 #else /* !ONESHOT */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b711ffcb106..0581c11fe6c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -155,7 +155,7 @@ void tick_nohz_update_jiffies(void)
 	touch_softlockup_watchdog();
 }
 
-void tick_nohz_stop_idle(int cpu)
+static void tick_nohz_stop_idle(int cpu)
 {
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 
@@ -377,6 +377,32 @@ ktime_t tick_nohz_get_sleep_length(void)
 	return ts->sleep_length;
 }
 
+static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
+{
+	hrtimer_cancel(&ts->sched_timer);
+	ts->sched_timer.expires = ts->idle_tick;
+
+	while (1) {
+		/* Forward the time to expire in the future */
+		hrtimer_forward(&ts->sched_timer, now, tick_period);
+
+		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+			hrtimer_start(&ts->sched_timer,
+				      ts->sched_timer.expires,
+				      HRTIMER_MODE_ABS);
+			/* Check, if the timer was already in the past */
+			if (hrtimer_active(&ts->sched_timer))
+				break;
+		} else {
+			if (!tick_program_event(ts->sched_timer.expires, 0))
+				break;
+		}
+		/* Update jiffies and reread time */
+		tick_do_update_jiffies64(now);
+		now = ktime_get();
+	}
+}
+
 /**
  * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
  *
@@ -430,28 +456,7 @@ void tick_nohz_restart_sched_tick(void)
 	 */
 	ts->tick_stopped  = 0;
 	ts->idle_exittime = now;
-	hrtimer_cancel(&ts->sched_timer);
-	ts->sched_timer.expires = ts->idle_tick;
-
-	while (1) {
-		/* Forward the time to expire in the future */
-		hrtimer_forward(&ts->sched_timer, now, tick_period);
-
-		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-			hrtimer_start(&ts->sched_timer,
-				      ts->sched_timer.expires,
-				      HRTIMER_MODE_ABS);
-			/* Check, if the timer was already in the past */
-			if (hrtimer_active(&ts->sched_timer))
-				break;
-		} else {
-			if (!tick_program_event(ts->sched_timer.expires, 0))
-				break;
-		}
-		/* Update jiffies and reread time */
-		tick_do_update_jiffies64(now);
-		now = ktime_get();
-	}
+	tick_nohz_restart(ts, now);
 	local_irq_enable();
 }
 
@@ -503,10 +508,6 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 	update_process_times(user_mode(regs));
 	profile_tick(CPU_PROFILING);
 
-	/* Do not restart, when we are in the idle loop */
-	if (ts->tick_stopped)
-		return;
-
 	while (tick_nohz_reprogram(ts, now)) {
 		now = ktime_get();
 		tick_do_update_jiffies64(now);
@@ -552,6 +553,27 @@ static void tick_nohz_switch_to_nohz(void)
 	       smp_processor_id());
 }
 
+/*
+ * When NOHZ is enabled and the tick is stopped, we need to kick the
+ * tick timer from irq_enter() so that the jiffies update is kept
+ * alive during long running softirqs. That's ugly as hell, but
+ * correctness is key even if we need to fix the offending softirq in
+ * the first place.
+ *
+ * Note, this is different to tick_nohz_restart. We just kick the
+ * timer and do not touch the other magic bits which need to be done
+ * when idle is left.
+ */
+static void tick_nohz_kick_tick(int cpu)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+	if (!ts->tick_stopped)
+		return;
+
+	tick_nohz_restart(ts, ktime_get());
+}
+
 #else
 
 static inline void tick_nohz_switch_to_nohz(void) { }
@@ -559,6 +581,19 @@ static inline void tick_nohz_switch_to_nohz(void) { }
 #endif /* NO_HZ */
 
 /*
+ * Called from irq_enter to notify about the possible interruption of idle()
+ */
+void tick_check_idle(int cpu)
+{
+	tick_check_oneshot_broadcast(cpu);
+#ifdef CONFIG_NO_HZ
+	tick_nohz_stop_idle(cpu);
+	tick_nohz_update_jiffies();
+	tick_nohz_kick_tick(cpu);
+#endif
+}
+
+/*
  * High resolution timer specific code
  */
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -611,10 +646,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 		profile_tick(CPU_PROFILING);
 	}
 
-	/* Do not restart, when we are in the idle loop */
-	if (ts->tick_stopped)
-		return HRTIMER_NORESTART;
-
 	hrtimer_forward(timer, now, tick_period);
 
 	return HRTIMER_RESTART;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e91c29f961c..e7acfb482a6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -58,27 +58,26 @@ struct clocksource *clock;
 
 #ifdef CONFIG_GENERIC_TIME
 /**
- * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
+ * clocksource_forward_now - update clock to the current time
  *
- * private function, must hold xtime_lock lock when being
- * called. Returns the number of nanoseconds since the
- * last call to update_wall_time() (adjusted by NTP scaling)
+ * Forward the current clock to update its state since the last call to
+ * update_wall_time(). This is useful before significant clock changes,
+ * as it avoids having to deal with this time offset explicitly.
  */
-static inline s64 __get_nsec_offset(void)
+static void clocksource_forward_now(void)
 {
 	cycle_t cycle_now, cycle_delta;
-	s64 ns_offset;
+	s64 nsec;
 
-	/* read clocksource: */
 	cycle_now = clocksource_read(clock);
-
-	/* calculate the delta since the last update_wall_time: */
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+	clock->cycle_last = cycle_now;
 
-	/* convert to nanoseconds: */
-	ns_offset = cyc2ns(clock, cycle_delta);
+	nsec = cyc2ns(clock, cycle_delta);
+	timespec_add_ns(&xtime, nsec);
 
-	return ns_offset;
+	nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+	clock->raw_time.tv_nsec += nsec;
 }
 
 /**
@@ -89,6 +88,7 @@ static inline s64 __get_nsec_offset(void)
  */
 void getnstimeofday(struct timespec *ts)
 {
+	cycle_t cycle_now, cycle_delta;
 	unsigned long seq;
 	s64 nsecs;
 
@@ -96,7 +96,15 @@ void getnstimeofday(struct timespec *ts)
 		seq = read_seqbegin(&xtime_lock);
 
 		*ts = xtime;
-		nsecs = __get_nsec_offset();
+
+		/* read clocksource: */
+		cycle_now = clocksource_read(clock);
+
+		/* calculate the delta since the last update_wall_time: */
+		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+		/* convert to nanoseconds: */
+		nsecs = cyc2ns(clock, cycle_delta);
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -129,22 +137,22 @@ EXPORT_SYMBOL(do_gettimeofday);
  */
 int do_settimeofday(struct timespec *tv)
 {
+	struct timespec ts_delta;
 	unsigned long flags;
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
 
 	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
-	nsec -= __get_nsec_offset();
+	clocksource_forward_now();
+
+	ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
+	ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
+	wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta);
 
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+	xtime = *tv;
 
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
 	update_xtime_cache(0);
 
 	clock->error = 0;
@@ -170,22 +178,19 @@ EXPORT_SYMBOL(do_settimeofday);
 static void change_clocksource(void)
 {
 	struct clocksource *new;
-	cycle_t now;
-	u64 nsec;
 
 	new = clocksource_get_next();
 
 	if (clock == new)
 		return;
 
-	new->cycle_last = 0;
-	now = clocksource_read(new);
-	nsec =  __get_nsec_offset();
-	timespec_add_ns(&xtime, nsec);
+	clocksource_forward_now();
 
-	clock = new;
-	clock->cycle_last = now;
+	new->raw_time = clock->raw_time;
 
+	clock = new;
+	clock->cycle_last = 0;
+	clock->cycle_last = clocksource_read(new);
 	clock->error = 0;
 	clock->xtime_nsec = 0;
 	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -200,11 +205,44 @@ static void change_clocksource(void)
 	 */
 }
 #else
+static inline void clocksource_forward_now(void) { }
 static inline void change_clocksource(void) { }
-static inline s64 __get_nsec_offset(void) { return 0; }
 #endif
 
 /**
+ * getrawmonotonic - Returns the raw monotonic time in a timespec
+ * @ts:		pointer to the timespec to be set
+ *
+ * Returns the raw monotonic time (completely un-modified by ntp)
+ */
+void getrawmonotonic(struct timespec *ts)
+{
+	unsigned long seq;
+	s64 nsecs;
+	cycle_t cycle_now, cycle_delta;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+
+		/* read clocksource: */
+		cycle_now = clocksource_read(clock);
+
+		/* calculate the delta since the last update_wall_time: */
+		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+		/* convert to nanoseconds: */
+		nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+
+		*ts = clock->raw_time;
+
+	} while (read_seqretry(&xtime_lock, seq));
+
+	timespec_add_ns(ts, nsecs);
+}
+EXPORT_SYMBOL(getrawmonotonic);
+
+
+/**
  * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
  */
 int timekeeping_valid_for_hres(void)
@@ -265,8 +303,6 @@ void __init timekeeping_init(void)
 static int timekeeping_suspended;
 /* time in seconds when suspend began */
 static unsigned long timekeeping_suspend_time;
-/* xtime offset when we went into suspend */
-static s64 timekeeping_suspend_nsecs;
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -292,8 +328,6 @@ static int timekeeping_resume(struct sys_device *dev)
 		wall_to_monotonic.tv_sec -= sleep_length;
 		total_sleep_time += sleep_length;
 	}
-	/* Make sure that we have the correct xtime reference */
-	timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
 	clock->cycle_last = 0;
@@ -319,8 +353,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 	timekeeping_suspend_time = read_persistent_clock();
 
 	write_seqlock_irqsave(&xtime_lock, flags);
-	/* Get the current xtime offset */
-	timekeeping_suspend_nsecs = __get_nsec_offset();
+	clocksource_forward_now();
 	timekeeping_suspended = 1;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -454,23 +487,29 @@ void update_wall_time(void)
 #else
 	offset = clock->cycle_interval;
 #endif
-	clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
+	clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
 
 	/* normally this loop will run just once, however in the
 	 * case of lost or late ticks, it will accumulate correctly.
 	 */
 	while (offset >= clock->cycle_interval) {
 		/* accumulate one interval */
-		clock->xtime_nsec += clock->xtime_interval;
-		clock->cycle_last += clock->cycle_interval;
 		offset -= clock->cycle_interval;
+		clock->cycle_last += clock->cycle_interval;
 
+		clock->xtime_nsec += clock->xtime_interval;
 		if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
 			clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
 			xtime.tv_sec++;
 			second_overflow();
 		}
 
+		clock->raw_time.tv_nsec += clock->raw_interval;
+		if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) {
+			clock->raw_time.tv_nsec -= NSEC_PER_SEC;
+			clock->raw_time.tv_sec++;
+		}
+
 		/* accumulate error between NTP and clock interval */
 		clock->error += tick_length;
 		clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
@@ -479,9 +518,12 @@ void update_wall_time(void)
 	/* correct the clock when NTP error is too big */
 	clocksource_adjust(offset);
 
-	/* store full nanoseconds into xtime */
-	xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
+	/* store full nanoseconds into xtime after rounding it up and
+	 * add the remainder to the error difference.
+	 */
+	xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1;
 	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
+	clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift);
 
 	update_xtime_cache(cyc2ns(clock, offset));
 
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index a40e20fd000..f6426911e35 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -47,13 +47,14 @@ static void print_name_offset(struct seq_file *m, void *sym)
 }
 
 static void
-print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now)
+print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
+	    int idx, u64 now)
 {
 #ifdef CONFIG_TIMER_STATS
 	char tmp[TASK_COMM_LEN + 1];
 #endif
 	SEQ_printf(m, " #%d: ", idx);
-	print_name_offset(m, timer);
+	print_name_offset(m, taddr);
 	SEQ_printf(m, ", ");
 	print_name_offset(m, timer->function);
 	SEQ_printf(m, ", S:%02lx", timer->state);
@@ -99,7 +100,7 @@ next_one:
 		tmp = *timer;
 		spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 
-		print_timer(m, &tmp, i, now);
+		print_timer(m, timer, &tmp, i, now);
 		next++;
 		goto next_one;
 	}
@@ -109,6 +110,7 @@ next_one:
 static void
 print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
 {
+	SEQ_printf(m, "  .base:       %p\n", base);
 	SEQ_printf(m, "  .index:      %d\n",
 			base->index);
 	SEQ_printf(m, "  .resolution: %Lu nsecs\n",
@@ -183,12 +185,16 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 static void
-print_tickdevice(struct seq_file *m, struct tick_device *td)
+print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 {
 	struct clock_event_device *dev = td->evtdev;
 
 	SEQ_printf(m, "\n");
 	SEQ_printf(m, "Tick Device: mode:     %d\n", td->mode);
+	if (cpu < 0)
+		SEQ_printf(m, "Broadcast device\n");
+	else
+		SEQ_printf(m, "Per CPU device: %d\n", cpu);
 
 	SEQ_printf(m, "Clock Event Device: ");
 	if (!dev) {
@@ -222,7 +228,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 	int cpu;
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	print_tickdevice(m, tick_get_broadcast_device());
+	print_tickdevice(m, tick_get_broadcast_device(), -1);
 	SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
 		   tick_get_broadcast_mask()->bits[0]);
 #ifdef CONFIG_TICK_ONESHOT
@@ -232,7 +238,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 	SEQ_printf(m, "\n");
 #endif
 	for_each_online_cpu(cpu)
-		   print_tickdevice(m, tick_get_device(cpu));
+		print_tickdevice(m, tick_get_device(cpu), cpu);
 	SEQ_printf(m, "\n");
 }
 #else
@@ -244,7 +250,7 @@ static int timer_list_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Timer List Version: v0.3\n");
+	SEQ_printf(m, "Timer List Version: v0.4\n");
 	SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
 	SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 
diff --git a/kernel/timer.c b/kernel/timer.c
index 510fe69351c..56becf373c5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1436,9 +1436,11 @@ static void __cpuinit migrate_timers(int cpu)
 	BUG_ON(cpu_online(cpu));
 	old_base = per_cpu(tvec_bases, cpu);
 	new_base = get_cpu_var(tvec_bases);
-
-	local_irq_disable();
-	spin_lock(&new_base->lock);
+	/*
+	 * The caller is globally serialized and nobody else
+	 * takes two locks at once, deadlock is not possible.
+	 */
+	spin_lock_irq(&new_base->lock);
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	BUG_ON(old_base->running_timer);
@@ -1453,8 +1455,7 @@ static void __cpuinit migrate_timers(int cpu)
 	}
 
 	spin_unlock(&old_base->lock);
-	spin_unlock(&new_base->lock);
-	local_irq_enable();
+	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(tvec_bases);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd6..1cb3e1f616a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,23 +1,37 @@
 #
 # Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
 #
+
+config NOP_TRACER
+	bool
+
 config HAVE_FTRACE
 	bool
+	select NOP_TRACER
 
 config HAVE_DYNAMIC_FTRACE
 	bool
 
+config HAVE_FTRACE_MCOUNT_RECORD
+	bool
+
 config TRACER_MAX_TRACE
 	bool
 
+config RING_BUFFER
+	bool
+
 config TRACING
 	bool
 	select DEBUG_FS
+	select RING_BUFFER
 	select STACKTRACE
+	select TRACEPOINTS
 
 config FTRACE
 	bool "Kernel Function Tracer"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select FRAME_POINTER
 	select TRACING
 	select CONTEXT_SWITCH_TRACER
@@ -36,6 +50,7 @@ config IRQSOFF_TRACER
 	depends on TRACE_IRQFLAGS_SUPPORT
 	depends on GENERIC_TIME
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACE_IRQFLAGS
 	select TRACING
 	select TRACER_MAX_TRACE
@@ -59,6 +74,7 @@ config PREEMPT_TRACER
 	depends on GENERIC_TIME
 	depends on PREEMPT
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select TRACER_MAX_TRACE
 	help
@@ -86,6 +102,7 @@ config SYSPROF_TRACER
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select CONTEXT_SWITCH_TRACER
 	select TRACER_MAX_TRACE
@@ -96,16 +113,56 @@ config SCHED_TRACER
 config CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select MARKERS
 	help
 	  This tracer gets called from the context switch and records
 	  all switching of tasks.
 
+config BOOT_TRACER
+	bool "Trace boot initcalls"
+	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
+	select TRACING
+	help
+	  This tracer helps developers to optimize boot times: it records
+	  the timings of the initcalls and traces key events and the identity
+	  of tasks that can cause boot delays, such as context-switches.
+
+	  Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+	  produce pretty graphics about boot inefficiencies, giving a visual
+	  representation of the delays during initcalls - but the raw
+	  /debug/tracing/trace text output is readable too.
+
+	  ( Note that tracing self tests can't be enabled if this tracer is
+	    selected, because the self-tests are an initcall as well and that
+	    would invalidate the boot trace. )
+
+config STACK_TRACER
+	bool "Trace max stack"
+	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
+	select FTRACE
+	select STACKTRACE
+	help
+	  This special tracer records the maximum stack footprint of the
+	  kernel and displays it in debugfs/tracing/stack_trace.
+
+	  This tracer works by hooking into every function call that the
+	  kernel executes, and keeping a maximum stack depth value and
+	  stack-trace saved. Because this logic has to execute in every
+	  kernel function, all the time, this option can slow down the
+	  kernel measurably and is generally intended for kernel
+	  developers only.
+
+	  Say N if unsure.
+
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
 	depends on FTRACE
 	depends on HAVE_DYNAMIC_FTRACE
+	depends on DEBUG_KERNEL
 	default y
 	help
          This option will modify all the calls to ftrace dynamically
@@ -121,12 +178,17 @@ config DYNAMIC_FTRACE
 	 were made. If so, it runs stop_machine (stops all CPUS)
 	 and modifies the code to jump over the call to ftrace.
 
+config FTRACE_MCOUNT_RECORD
+	def_bool y
+	depends on DYNAMIC_FTRACE
+	depends on HAVE_FTRACE_MCOUNT_RECORD
+
 config FTRACE_SELFTEST
 	bool
 
 config FTRACE_STARTUP_TEST
 	bool "Perform a startup test on ftrace"
-	depends on TRACING
+	depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
 	select FTRACE_SELFTEST
 	help
 	  This option performs a series of startup tests on ftrace. On bootup
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de1728..a85dfba88ba 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -11,6 +11,7 @@ obj-y += trace_selftest_dynamic.o
 endif
 
 obj-$(CONFIG_FTRACE) += libftrace.o
+obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
@@ -19,6 +20,9 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_NOP_TRACER) += trace_nop.o
+obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b40..4dda4f60a2a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -81,7 +81,7 @@ void clear_ftrace_function(void)
 
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
-	/* Should never be called by interrupts */
+	/* should not be called from interrupt context */
 	spin_lock(&ftrace_lock);
 
 	ops->next = ftrace_list;
@@ -115,6 +115,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	struct ftrace_ops **p;
 	int ret = 0;
 
+	/* should not be called from interrupt context */
 	spin_lock(&ftrace_lock);
 
 	/*
@@ -153,6 +154,30 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+/*
+ * The hash lock is only needed when the recording of the mcount
+ * callers are dynamic. That is, by the caller themselves and
+ * not recorded via the compilation.
+ */
+static DEFINE_SPINLOCK(ftrace_hash_lock);
+#define ftrace_hash_lock(flags)	  spin_lock_irqsave(&ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) \
+			spin_unlock_irqrestore(&ftrace_hash_lock, flags)
+#else
+/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
+#define ftrace_hash_lock(flags)   do { (void)(flags); } while (0)
+#define ftrace_hash_unlock(flags) do { } while(0)
+#endif
+
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
+
 static struct task_struct *ftraced_task;
 
 enum {
@@ -171,7 +196,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
 
 static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
 
-static DEFINE_SPINLOCK(ftrace_shutdown_lock);
 static DEFINE_MUTEX(ftraced_lock);
 static DEFINE_MUTEX(ftrace_regex_lock);
 
@@ -294,13 +318,37 @@ static inline void ftrace_del_hash(struct dyn_ftrace *node)
 
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
-	/* no locking, only called from kstop_machine */
-
 	rec->ip = (unsigned long)ftrace_free_records;
 	ftrace_free_records = rec;
 	rec->flags |= FTRACE_FL_FREE;
 }
 
+void ftrace_release(void *start, unsigned long size)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+	unsigned long s = (unsigned long)start;
+	unsigned long e = s + size;
+	int i;
+
+	if (ftrace_disabled || !start)
+		return;
+
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
+
+	for (pg = ftrace_pages_start; pg; pg = pg->next) {
+		for (i = 0; i < pg->index; i++) {
+			rec = &pg->records[i];
+
+			if ((rec->ip >= s) && (rec->ip < e))
+				ftrace_free_rec(rec);
+		}
+	}
+	spin_unlock(&ftrace_lock);
+
+}
+
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
 	struct dyn_ftrace *rec;
@@ -338,7 +386,6 @@ ftrace_record_ip(unsigned long ip)
 	unsigned long flags;
 	unsigned long key;
 	int resched;
-	int atomic;
 	int cpu;
 
 	if (!ftrace_enabled || ftrace_disabled)
@@ -368,9 +415,7 @@ ftrace_record_ip(unsigned long ip)
 	if (ftrace_ip_in_hash(ip, key))
 		goto out;
 
-	atomic = irqs_disabled();
-
-	spin_lock_irqsave(&ftrace_shutdown_lock, flags);
+	ftrace_hash_lock(flags);
 
 	/* This ip may have hit the hash before the lock */
 	if (ftrace_ip_in_hash(ip, key))
@@ -387,7 +432,7 @@ ftrace_record_ip(unsigned long ip)
 	ftraced_trigger = 1;
 
  out_unlock:
-	spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
+	ftrace_hash_unlock(flags);
  out:
 	per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
 
@@ -531,6 +576,16 @@ static void ftrace_shutdown_replenish(void)
 	ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
 }
 
+static void print_ip_ins(const char *fmt, unsigned char *p)
+{
+	int i;
+
+	printk(KERN_CONT "%s", fmt);
+
+	for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+}
+
 static int
 ftrace_code_disable(struct dyn_ftrace *rec)
 {
@@ -541,10 +596,27 @@ ftrace_code_disable(struct dyn_ftrace *rec)
 	ip = rec->ip;
 
 	nop = ftrace_nop_replace();
-	call = ftrace_call_replace(ip, MCOUNT_ADDR);
+	call = ftrace_call_replace(ip, mcount_addr);
 
 	failed = ftrace_modify_code(ip, call, nop);
 	if (failed) {
+		switch (failed) {
+		case 1:
+			WARN_ON_ONCE(1);
+			pr_info("ftrace faulted on modifying ");
+			print_ip_sym(ip);
+			break;
+		case 2:
+			WARN_ON_ONCE(1);
+			pr_info("ftrace failed to modify ");
+			print_ip_sym(ip);
+			print_ip_ins(" expected: ", call);
+			print_ip_ins(" actual: ", (unsigned char *)ip);
+			print_ip_ins(" replace: ", nop);
+			printk(KERN_CONT "\n");
+			break;
+		}
+
 		rec->flags |= FTRACE_FL_FAILED;
 		return 0;
 	}
@@ -792,47 +864,7 @@ static int ftrace_update_code(void)
 	return 1;
 }
 
-static int ftraced(void *ignore)
-{
-	unsigned long usecs;
-
-	while (!kthread_should_stop()) {
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		/* check once a second */
-		schedule_timeout(HZ);
-
-		if (unlikely(ftrace_disabled))
-			continue;
-
-		mutex_lock(&ftrace_sysctl_lock);
-		mutex_lock(&ftraced_lock);
-		if (!ftraced_suspend && !ftraced_stop &&
-		    ftrace_update_code()) {
-			usecs = nsecs_to_usecs(ftrace_update_time);
-			if (ftrace_update_tot_cnt > 100000) {
-				ftrace_update_tot_cnt = 0;
-				pr_info("hm, dftrace overflow: %lu change%s"
-					" (%lu total) in %lu usec%s\n",
-					ftrace_update_cnt,
-					ftrace_update_cnt != 1 ? "s" : "",
-					ftrace_update_tot_cnt,
-					usecs, usecs != 1 ? "s" : "");
-				ftrace_disabled = 1;
-				WARN_ON_ONCE(1);
-			}
-		}
-		mutex_unlock(&ftraced_lock);
-		mutex_unlock(&ftrace_sysctl_lock);
-
-		ftrace_shutdown_replenish();
-	}
-	__set_current_state(TASK_RUNNING);
-	return 0;
-}
-
-static int __init ftrace_dyn_table_alloc(void)
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 {
 	struct ftrace_page *pg;
 	int cnt;
@@ -859,7 +891,9 @@ static int __init ftrace_dyn_table_alloc(void)
 
 	pg = ftrace_pages = ftrace_pages_start;
 
-	cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+	cnt = num_to_init / ENTRIES_PER_PAGE;
+	pr_info("ftrace: allocating %ld hash entries in %d pages\n",
+		num_to_init, cnt);
 
 	for (i = 0; i < cnt; i++) {
 		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -901,6 +935,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 	(*pos)++;
 
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
  retry:
 	if (iter->idx >= iter->pg->index) {
 		if (iter->pg->next) {
@@ -910,15 +946,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		}
 	} else {
 		rec = &iter->pg->records[iter->idx++];
-		if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
+		if ((rec->flags & FTRACE_FL_FREE) ||
+
+		    (!(iter->flags & FTRACE_ITER_FAILURES) &&
 		     (rec->flags & FTRACE_FL_FAILED)) ||
 
 		    ((iter->flags & FTRACE_ITER_FAILURES) &&
-		     (!(rec->flags & FTRACE_FL_FAILED) ||
-		      (rec->flags & FTRACE_FL_FREE))) ||
-
-		    ((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(rec->flags & FTRACE_FL_FILTER)) ||
+		     !(rec->flags & FTRACE_FL_FAILED)) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
 		     !(rec->flags & FTRACE_FL_NOTRACE))) {
@@ -926,6 +960,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 			goto retry;
 		}
 	}
+	spin_unlock(&ftrace_lock);
 
 	iter->pos = *pos;
 
@@ -1039,8 +1074,8 @@ static void ftrace_filter_reset(int enable)
 	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
 	unsigned i;
 
-	/* keep kstop machine from running */
-	preempt_disable();
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
 	if (enable)
 		ftrace_filtered = 0;
 	pg = ftrace_pages_start;
@@ -1053,7 +1088,7 @@ static void ftrace_filter_reset(int enable)
 		}
 		pg = pg->next;
 	}
-	preempt_enable();
+	spin_unlock(&ftrace_lock);
 }
 
 static int
@@ -1165,8 +1200,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
 		}
 	}
 
-	/* keep kstop machine from running */
-	preempt_disable();
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
 	if (enable)
 		ftrace_filtered = 1;
 	pg = ftrace_pages_start;
@@ -1203,7 +1238,7 @@ ftrace_match(unsigned char *buff, int len, int enable)
 		}
 		pg = pg->next;
 	}
-	preempt_enable();
+	spin_unlock(&ftrace_lock);
 }
 
 static ssize_t
@@ -1556,6 +1591,114 @@ static __init int ftrace_init_debugfs(void)
 
 fs_initcall(ftrace_init_debugfs);
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+static int ftrace_convert_nops(unsigned long *start,
+			       unsigned long *end)
+{
+	unsigned long *p;
+	unsigned long addr;
+	unsigned long flags;
+
+	p = start;
+	while (p < end) {
+		addr = ftrace_call_adjust(*p++);
+		/* should not be called from interrupt context */
+		spin_lock(&ftrace_lock);
+		ftrace_record_ip(addr);
+		spin_unlock(&ftrace_lock);
+		ftrace_shutdown_replenish();
+	}
+
+	/* p is ignored */
+	local_irq_save(flags);
+	__ftrace_update_code(p);
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+void ftrace_init_module(unsigned long *start, unsigned long *end)
+{
+	if (ftrace_disabled || start == end)
+		return;
+	ftrace_convert_nops(start, end);
+}
+
+extern unsigned long __start_mcount_loc[];
+extern unsigned long __stop_mcount_loc[];
+
+void __init ftrace_init(void)
+{
+	unsigned long count, addr, flags;
+	int ret;
+
+	/* Keep the ftrace pointer to the stub */
+	addr = (unsigned long)ftrace_stub;
+
+	local_irq_save(flags);
+	ftrace_dyn_arch_init(&addr);
+	local_irq_restore(flags);
+
+	/* ftrace_dyn_arch_init places the return code in addr */
+	if (addr)
+		goto failed;
+
+	count = __stop_mcount_loc - __start_mcount_loc;
+
+	ret = ftrace_dyn_table_alloc(count);
+	if (ret)
+		goto failed;
+
+	last_ftrace_enabled = ftrace_enabled = 1;
+
+	ret = ftrace_convert_nops(__start_mcount_loc,
+				  __stop_mcount_loc);
+
+	return;
+ failed:
+	ftrace_disabled = 1;
+}
+#else /* CONFIG_FTRACE_MCOUNT_RECORD */
+static int ftraced(void *ignore)
+{
+	unsigned long usecs;
+
+	while (!kthread_should_stop()) {
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/* check once a second */
+		schedule_timeout(HZ);
+
+		if (unlikely(ftrace_disabled))
+			continue;
+
+		mutex_lock(&ftrace_sysctl_lock);
+		mutex_lock(&ftraced_lock);
+		if (!ftraced_suspend && !ftraced_stop &&
+		    ftrace_update_code()) {
+			usecs = nsecs_to_usecs(ftrace_update_time);
+			if (ftrace_update_tot_cnt > 100000) {
+				ftrace_update_tot_cnt = 0;
+				pr_info("hm, dftrace overflow: %lu change%s"
+					" (%lu total) in %lu usec%s\n",
+					ftrace_update_cnt,
+					ftrace_update_cnt != 1 ? "s" : "",
+					ftrace_update_tot_cnt,
+					usecs, usecs != 1 ? "s" : "");
+				ftrace_disabled = 1;
+				WARN_ON_ONCE(1);
+			}
+		}
+		mutex_unlock(&ftraced_lock);
+		mutex_unlock(&ftrace_sysctl_lock);
+
+		ftrace_shutdown_replenish();
+	}
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
 static int __init ftrace_dynamic_init(void)
 {
 	struct task_struct *p;
@@ -1572,7 +1715,7 @@ static int __init ftrace_dynamic_init(void)
 		goto failed;
 	}
 
-	ret = ftrace_dyn_table_alloc();
+	ret = ftrace_dyn_table_alloc(NR_TO_INIT);
 	if (ret)
 		goto failed;
 
@@ -1593,6 +1736,8 @@ static int __init ftrace_dynamic_init(void)
 }
 
 core_initcall(ftrace_dynamic_init);
+#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
+
 #else
 # define ftrace_startup()		do { } while (0)
 # define ftrace_shutdown()		do { } while (0)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 00000000000..94af1fe56bb
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,2014 @@
+/*
+ * Generic ring buffer
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>	/* used for sched_clock() (for now) */
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+
+/* Up this if you want to test the TIME_EXTENTS and normalization */
+#define DEBUG_SHIFT 0
+
+/* FIXME!!! */
+u64 ring_buffer_time_stamp(int cpu)
+{
+	/* shift to debug/test normalization and TIME_EXTENTS */
+	return sched_clock() << DEBUG_SHIFT;
+}
+
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
+{
+	/* Just stupid testing the normalize function and deltas */
+	*ts >>= DEBUG_SHIFT;
+}
+
+#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
+#define RB_ALIGNMENT_SHIFT	2
+#define RB_ALIGNMENT		(1 << RB_ALIGNMENT_SHIFT)
+#define RB_MAX_SMALL_DATA	28
+
+enum {
+	RB_LEN_TIME_EXTEND = 8,
+	RB_LEN_TIME_STAMP = 16,
+};
+
+/* inline for ring buffer fast paths */
+static inline unsigned
+rb_event_length(struct ring_buffer_event *event)
+{
+	unsigned length;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		/* undefined */
+		return -1;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		return RB_LEN_TIME_EXTEND;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		return RB_LEN_TIME_STAMP;
+
+	case RINGBUF_TYPE_DATA:
+		if (event->len)
+			length = event->len << RB_ALIGNMENT_SHIFT;
+		else
+			length = event->array[0];
+		return length + RB_EVNT_HDR_SIZE;
+	default:
+		BUG();
+	}
+	/* not hit */
+	return 0;
+}
+
+/**
+ * ring_buffer_event_length - return the length of the event
+ * @event: the event to get the length of
+ */
+unsigned ring_buffer_event_length(struct ring_buffer_event *event)
+{
+	return rb_event_length(event);
+}
+
+/* inline for ring buffer fast paths */
+static inline void *
+rb_event_data(struct ring_buffer_event *event)
+{
+	BUG_ON(event->type != RINGBUF_TYPE_DATA);
+	/* If length is in len field, then array[0] has the data */
+	if (event->len)
+		return (void *)&event->array[0];
+	/* Otherwise length is in array[0] and array[1] has the data */
+	return (void *)&event->array[1];
+}
+
+/**
+ * ring_buffer_event_data - return the data of the event
+ * @event: the event to get the data from
+ */
+void *ring_buffer_event_data(struct ring_buffer_event *event)
+{
+	return rb_event_data(event);
+}
+
+#define for_each_buffer_cpu(buffer, cpu)		\
+	for_each_cpu_mask(cpu, buffer->cpumask)
+
+#define TS_SHIFT	27
+#define TS_MASK		((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST	(~TS_MASK)
+
+/*
+ * This hack stolen from mm/slob.c.
+ * We can store per page timing information in the page frame of the page.
+ * Thanks to Peter Zijlstra for suggesting this idea.
+ */
+struct buffer_page {
+	u64		 time_stamp;	/* page time stamp */
+	local_t		 write;		/* index for next write */
+	local_t		 commit;	/* write commited index */
+	unsigned	 read;		/* index for next read */
+	struct list_head list;		/* list of free pages */
+	void *page;			/* Actual data page */
+};
+
+/*
+ * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
+ * this issue out.
+ */
+static inline void free_buffer_page(struct buffer_page *bpage)
+{
+	if (bpage->page)
+		__free_page(bpage->page);
+	kfree(bpage);
+}
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline int test_time_stamp(u64 delta)
+{
+	if (delta & TS_DELTA_TEST)
+		return 1;
+	return 0;
+}
+
+#define BUF_PAGE_SIZE PAGE_SIZE
+
+/*
+ * head_page == tail_page && head == tail then buffer is empty.
+ */
+struct ring_buffer_per_cpu {
+	int				cpu;
+	struct ring_buffer		*buffer;
+	spinlock_t			lock;
+	struct lock_class_key		lock_key;
+	struct list_head		pages;
+	struct buffer_page		*head_page;	/* read from head */
+	struct buffer_page		*tail_page;	/* write to tail */
+	struct buffer_page		*commit_page;	/* commited pages */
+	struct buffer_page		*reader_page;
+	unsigned long			overrun;
+	unsigned long			entries;
+	u64				write_stamp;
+	u64				read_stamp;
+	atomic_t			record_disabled;
+};
+
+struct ring_buffer {
+	unsigned long			size;
+	unsigned			pages;
+	unsigned			flags;
+	int				cpus;
+	cpumask_t			cpumask;
+	atomic_t			record_disabled;
+
+	struct mutex			mutex;
+
+	struct ring_buffer_per_cpu	**buffers;
+};
+
+struct ring_buffer_iter {
+	struct ring_buffer_per_cpu	*cpu_buffer;
+	unsigned long			head;
+	struct buffer_page		*head_page;
+	u64				read_stamp;
+};
+
+#define RB_WARN_ON(buffer, cond)				\
+	do {							\
+		if (unlikely(cond)) {				\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+		}						\
+	} while (0)
+
+#define RB_WARN_ON_RET(buffer, cond)				\
+	do {							\
+		if (unlikely(cond)) {				\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+			return -1;				\
+		}						\
+	} while (0)
+
+#define RB_WARN_ON_ONCE(buffer, cond)				\
+	do {							\
+		static int once;				\
+		if (unlikely(cond) && !once) {			\
+			once++;					\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+		}						\
+	} while (0)
+
+/**
+ * check_pages - integrity check of buffer pages
+ * @cpu_buffer: CPU buffer with pages to test
+ *
+ * As a safty measure we check to make sure the data pages have not
+ * been corrupted.
+ */
+static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+
+	RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
+	RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
+
+	list_for_each_entry_safe(page, tmp, head, list) {
+		RB_WARN_ON_RET(cpu_buffer,
+			       page->list.next->prev != &page->list);
+		RB_WARN_ON_RET(cpu_buffer,
+			       page->list.prev->next != &page->list);
+	}
+
+	return 0;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+			     unsigned nr_pages)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+	unsigned long addr;
+	LIST_HEAD(pages);
+	unsigned i;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+				    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
+		if (!page)
+			goto free_pages;
+		list_add(&page->list, &pages);
+
+		addr = __get_free_page(GFP_KERNEL);
+		if (!addr)
+			goto free_pages;
+		page->page = (void *)addr;
+	}
+
+	list_splice(&pages, head);
+
+	rb_check_pages(cpu_buffer);
+
+	return 0;
+
+ free_pages:
+	list_for_each_entry_safe(page, tmp, &pages, list) {
+		list_del_init(&page->list);
+		free_buffer_page(page);
+	}
+	return -ENOMEM;
+}
+
+static struct ring_buffer_per_cpu *
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct buffer_page *page;
+	unsigned long addr;
+	int ret;
+
+	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
+				  GFP_KERNEL, cpu_to_node(cpu));
+	if (!cpu_buffer)
+		return NULL;
+
+	cpu_buffer->cpu = cpu;
+	cpu_buffer->buffer = buffer;
+	spin_lock_init(&cpu_buffer->lock);
+	INIT_LIST_HEAD(&cpu_buffer->pages);
+
+	page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+			    GFP_KERNEL, cpu_to_node(cpu));
+	if (!page)
+		goto fail_free_buffer;
+
+	cpu_buffer->reader_page = page;
+	addr = __get_free_page(GFP_KERNEL);
+	if (!addr)
+		goto fail_free_reader;
+	page->page = (void *)addr;
+
+	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+
+	ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+	if (ret < 0)
+		goto fail_free_reader;
+
+	cpu_buffer->head_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
+
+	return cpu_buffer;
+
+ fail_free_reader:
+	free_buffer_page(cpu_buffer->reader_page);
+
+ fail_free_buffer:
+	kfree(cpu_buffer);
+	return NULL;
+}
+
+static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+
+	list_del_init(&cpu_buffer->reader_page->list);
+	free_buffer_page(cpu_buffer->reader_page);
+
+	list_for_each_entry_safe(page, tmp, head, list) {
+		list_del_init(&page->list);
+		free_buffer_page(page);
+	}
+	kfree(cpu_buffer);
+}
+
+/*
+ * Causes compile errors if the struct buffer_page gets bigger
+ * than the struct page.
+ */
+extern int ring_buffer_page_too_big(void);
+
+/**
+ * ring_buffer_alloc - allocate a new ring_buffer
+ * @size: the size in bytes that is needed.
+ * @flags: attributes to set for the ring buffer.
+ *
+ * Currently the only flag that is available is the RB_FL_OVERWRITE
+ * flag. This flag means that the buffer will overwrite old data
+ * when the buffer wraps. If this flag is not set, the buffer will
+ * drop data when the tail hits the head.
+ */
+struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+{
+	struct ring_buffer *buffer;
+	int bsize;
+	int cpu;
+
+	/* Paranoid! Optimizes out when all is well */
+	if (sizeof(struct buffer_page) > sizeof(struct page))
+		ring_buffer_page_too_big();
+
+
+	/* keep it in its own cache line */
+	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
+			 GFP_KERNEL);
+	if (!buffer)
+		return NULL;
+
+	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+	buffer->flags = flags;
+
+	/* need at least two pages */
+	if (buffer->pages == 1)
+		buffer->pages++;
+
+	buffer->cpumask = cpu_possible_map;
+	buffer->cpus = nr_cpu_ids;
+
+	bsize = sizeof(void *) * nr_cpu_ids;
+	buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
+				  GFP_KERNEL);
+	if (!buffer->buffers)
+		goto fail_free_buffer;
+
+	for_each_buffer_cpu(buffer, cpu) {
+		buffer->buffers[cpu] =
+			rb_allocate_cpu_buffer(buffer, cpu);
+		if (!buffer->buffers[cpu])
+			goto fail_free_buffers;
+	}
+
+	mutex_init(&buffer->mutex);
+
+	return buffer;
+
+ fail_free_buffers:
+	for_each_buffer_cpu(buffer, cpu) {
+		if (buffer->buffers[cpu])
+			rb_free_cpu_buffer(buffer->buffers[cpu]);
+	}
+	kfree(buffer->buffers);
+
+ fail_free_buffer:
+	kfree(buffer);
+	return NULL;
+}
+
+/**
+ * ring_buffer_free - free a ring buffer.
+ * @buffer: the buffer to free.
+ */
+void
+ring_buffer_free(struct ring_buffer *buffer)
+{
+	int cpu;
+
+	for_each_buffer_cpu(buffer, cpu)
+		rb_free_cpu_buffer(buffer->buffers[cpu]);
+
+	kfree(buffer);
+}
+
+static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
+
+static void
+rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
+{
+	struct buffer_page *page;
+	struct list_head *p;
+	unsigned i;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	for (i = 0; i < nr_pages; i++) {
+		BUG_ON(list_empty(&cpu_buffer->pages));
+		p = cpu_buffer->pages.next;
+		page = list_entry(p, struct buffer_page, list);
+		list_del_init(&page->list);
+		free_buffer_page(page);
+	}
+	BUG_ON(list_empty(&cpu_buffer->pages));
+
+	rb_reset_cpu(cpu_buffer);
+
+	rb_check_pages(cpu_buffer);
+
+	atomic_dec(&cpu_buffer->record_disabled);
+
+}
+
+static void
+rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
+		struct list_head *pages, unsigned nr_pages)
+{
+	struct buffer_page *page;
+	struct list_head *p;
+	unsigned i;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	for (i = 0; i < nr_pages; i++) {
+		BUG_ON(list_empty(pages));
+		p = pages->next;
+		page = list_entry(p, struct buffer_page, list);
+		list_del_init(&page->list);
+		list_add_tail(&page->list, &cpu_buffer->pages);
+	}
+	rb_reset_cpu(cpu_buffer);
+
+	rb_check_pages(cpu_buffer);
+
+	atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_resize - resize the ring buffer
+ * @buffer: the buffer to resize.
+ * @size: the new size.
+ *
+ * The tracer is responsible for making sure that the buffer is
+ * not being used while changing the size.
+ * Note: We may be able to change the above requirement by using
+ *  RCU synchronizations.
+ *
+ * Minimum size is 2 * BUF_PAGE_SIZE.
+ *
+ * Returns -1 on failure.
+ */
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned nr_pages, rm_pages, new_pages;
+	struct buffer_page *page, *tmp;
+	unsigned long buffer_size;
+	unsigned long addr;
+	LIST_HEAD(pages);
+	int i, cpu;
+
+	size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+	size *= BUF_PAGE_SIZE;
+	buffer_size = buffer->pages * BUF_PAGE_SIZE;
+
+	/* we need a minimum of two pages */
+	if (size < BUF_PAGE_SIZE * 2)
+		size = BUF_PAGE_SIZE * 2;
+
+	if (size == buffer_size)
+		return size;
+
+	mutex_lock(&buffer->mutex);
+
+	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+
+	if (size < buffer_size) {
+
+		/* easy case, just free pages */
+		BUG_ON(nr_pages >= buffer->pages);
+
+		rm_pages = buffer->pages - nr_pages;
+
+		for_each_buffer_cpu(buffer, cpu) {
+			cpu_buffer = buffer->buffers[cpu];
+			rb_remove_pages(cpu_buffer, rm_pages);
+		}
+		goto out;
+	}
+
+	/*
+	 * This is a bit more difficult. We only want to add pages
+	 * when we can allocate enough for all CPUs. We do this
+	 * by allocating all the pages and storing them on a local
+	 * link list. If we succeed in our allocation, then we
+	 * add these pages to the cpu_buffers. Otherwise we just free
+	 * them all and return -ENOMEM;
+	 */
+	BUG_ON(nr_pages <= buffer->pages);
+	new_pages = nr_pages - buffer->pages;
+
+	for_each_buffer_cpu(buffer, cpu) {
+		for (i = 0; i < new_pages; i++) {
+			page = kzalloc_node(ALIGN(sizeof(*page),
+						  cache_line_size()),
+					    GFP_KERNEL, cpu_to_node(cpu));
+			if (!page)
+				goto free_pages;
+			list_add(&page->list, &pages);
+			addr = __get_free_page(GFP_KERNEL);
+			if (!addr)
+				goto free_pages;
+			page->page = (void *)addr;
+		}
+	}
+
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		rb_insert_pages(cpu_buffer, &pages, new_pages);
+	}
+
+	BUG_ON(!list_empty(&pages));
+
+ out:
+	buffer->pages = nr_pages;
+	mutex_unlock(&buffer->mutex);
+
+	return size;
+
+ free_pages:
+	list_for_each_entry_safe(page, tmp, &pages, list) {
+		list_del_init(&page->list);
+		free_buffer_page(page);
+	}
+	return -ENOMEM;
+}
+
+static inline int rb_null_event(struct ring_buffer_event *event)
+{
+	return event->type == RINGBUF_TYPE_PADDING;
+}
+
+static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
+{
+	return page->page + index;
+}
+
+static inline struct ring_buffer_event *
+rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return __rb_page_index(cpu_buffer->reader_page,
+			       cpu_buffer->reader_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return __rb_page_index(cpu_buffer->head_page,
+			       cpu_buffer->head_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
+{
+	return __rb_page_index(iter->head_page, iter->head);
+}
+
+static inline unsigned rb_page_write(struct buffer_page *bpage)
+{
+	return local_read(&bpage->write);
+}
+
+static inline unsigned rb_page_commit(struct buffer_page *bpage)
+{
+	return local_read(&bpage->commit);
+}
+
+/* Size is determined by what has been commited */
+static inline unsigned rb_page_size(struct buffer_page *bpage)
+{
+	return rb_page_commit(bpage);
+}
+
+static inline unsigned
+rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return rb_page_commit(cpu_buffer->commit_page);
+}
+
+static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return rb_page_commit(cpu_buffer->head_page);
+}
+
+/*
+ * When the tail hits the head and the buffer is in overwrite mode,
+ * the head jumps to the next page and all content on the previous
+ * page is discarded. But before doing so, we update the overrun
+ * variable of the buffer.
+ */
+static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct ring_buffer_event *event;
+	unsigned long head;
+
+	for (head = 0; head < rb_head_size(cpu_buffer);
+	     head += rb_event_length(event)) {
+
+		event = __rb_page_index(cpu_buffer->head_page, head);
+		BUG_ON(rb_null_event(event));
+		/* Only count data entries */
+		if (event->type != RINGBUF_TYPE_DATA)
+			continue;
+		cpu_buffer->overrun++;
+		cpu_buffer->entries--;
+	}
+}
+
+static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
+			       struct buffer_page **page)
+{
+	struct list_head *p = (*page)->list.next;
+
+	if (p == &cpu_buffer->pages)
+		p = p->next;
+
+	*page = list_entry(p, struct buffer_page, list);
+}
+
+static inline unsigned
+rb_event_index(struct ring_buffer_event *event)
+{
+	unsigned long addr = (unsigned long)event;
+
+	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
+}
+
+static inline int
+rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+	     struct ring_buffer_event *event)
+{
+	unsigned long addr = (unsigned long)event;
+	unsigned long index;
+
+	index = rb_event_index(event);
+	addr &= PAGE_MASK;
+
+	return cpu_buffer->commit_page->page == (void *)addr &&
+		rb_commit_index(cpu_buffer) == index;
+}
+
+static inline void
+rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
+		    struct ring_buffer_event *event)
+{
+	unsigned long addr = (unsigned long)event;
+	unsigned long index;
+
+	index = rb_event_index(event);
+	addr &= PAGE_MASK;
+
+	while (cpu_buffer->commit_page->page != (void *)addr) {
+		RB_WARN_ON(cpu_buffer,
+			   cpu_buffer->commit_page == cpu_buffer->tail_page);
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+		cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+	}
+
+	/* Now set the commit to the event's index */
+	local_set(&cpu_buffer->commit_page->commit, index);
+}
+
+static inline void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	/*
+	 * We only race with interrupts and NMIs on this CPU.
+	 * If we own the commit event, then we can commit
+	 * all others that interrupted us, since the interruptions
+	 * are in stack format (they finish before they come
+	 * back to us). This allows us to do a simple loop to
+	 * assign the commit to the tail.
+	 */
+	while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+		cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+		/* add barrier to keep gcc from optimizing too much */
+		barrier();
+	}
+	while (rb_commit_index(cpu_buffer) !=
+	       rb_page_write(cpu_buffer->commit_page)) {
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		barrier();
+	}
+}
+
+static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+	cpu_buffer->reader_page->read = 0;
+}
+
+static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	/*
+	 * The iterator could be on the reader page (it starts there).
+	 * But the head could have moved, since the reader was
+	 * found. Check for this case and assign the iterator
+	 * to the head page instead of next.
+	 */
+	if (iter->head_page == cpu_buffer->reader_page)
+		iter->head_page = cpu_buffer->head_page;
+	else
+		rb_inc_page(cpu_buffer, &iter->head_page);
+
+	iter->read_stamp = iter->head_page->time_stamp;
+	iter->head = 0;
+}
+
+/**
+ * ring_buffer_update_event - update event type and data
+ * @event: the even to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static inline void
+rb_update_event(struct ring_buffer_event *event,
+			 unsigned type, unsigned length)
+{
+	event->type = type;
+
+	switch (type) {
+
+	case RINGBUF_TYPE_PADDING:
+		break;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		event->len =
+			(RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
+			>> RB_ALIGNMENT_SHIFT;
+		break;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		event->len =
+			(RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
+			>> RB_ALIGNMENT_SHIFT;
+		break;
+
+	case RINGBUF_TYPE_DATA:
+		length -= RB_EVNT_HDR_SIZE;
+		if (length > RB_MAX_SMALL_DATA) {
+			event->len = 0;
+			event->array[0] = length;
+		} else
+			event->len =
+				(length + (RB_ALIGNMENT-1))
+				>> RB_ALIGNMENT_SHIFT;
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline unsigned rb_calculate_event_length(unsigned length)
+{
+	struct ring_buffer_event event; /* Used only for sizeof array */
+
+	/* zero length can cause confusions */
+	if (!length)
+		length = 1;
+
+	if (length > RB_MAX_SMALL_DATA)
+		length += sizeof(event.array[0]);
+
+	length += RB_EVNT_HDR_SIZE;
+	length = ALIGN(length, RB_ALIGNMENT);
+
+	return length;
+}
+
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+		  unsigned type, unsigned long length, u64 *ts)
+{
+	struct buffer_page *tail_page, *head_page, *reader_page;
+	unsigned long tail, write;
+	struct ring_buffer *buffer = cpu_buffer->buffer;
+	struct ring_buffer_event *event;
+	unsigned long flags;
+
+	tail_page = cpu_buffer->tail_page;
+	write = local_add_return(length, &tail_page->write);
+	tail = write - length;
+
+	/* See if we shot pass the end of this buffer page */
+	if (write > BUF_PAGE_SIZE) {
+		struct buffer_page *next_page = tail_page;
+
+		spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+		rb_inc_page(cpu_buffer, &next_page);
+
+		head_page = cpu_buffer->head_page;
+		reader_page = cpu_buffer->reader_page;
+
+		/* we grabbed the lock before incrementing */
+		RB_WARN_ON(cpu_buffer, next_page == reader_page);
+
+		/*
+		 * If for some reason, we had an interrupt storm that made
+		 * it all the way around the buffer, bail, and warn
+		 * about it.
+		 */
+		if (unlikely(next_page == cpu_buffer->commit_page)) {
+			WARN_ON_ONCE(1);
+			goto out_unlock;
+		}
+
+		if (next_page == head_page) {
+			if (!(buffer->flags & RB_FL_OVERWRITE)) {
+				/* reset write */
+				if (tail <= BUF_PAGE_SIZE)
+					local_set(&tail_page->write, tail);
+				goto out_unlock;
+			}
+
+			/* tail_page has not moved yet? */
+			if (tail_page == cpu_buffer->tail_page) {
+				/* count overflows */
+				rb_update_overflow(cpu_buffer);
+
+				rb_inc_page(cpu_buffer, &head_page);
+				cpu_buffer->head_page = head_page;
+				cpu_buffer->head_page->read = 0;
+			}
+		}
+
+		/*
+		 * If the tail page is still the same as what we think
+		 * it is, then it is up to us to update the tail
+		 * pointer.
+		 */
+		if (tail_page == cpu_buffer->tail_page) {
+			local_set(&next_page->write, 0);
+			local_set(&next_page->commit, 0);
+			cpu_buffer->tail_page = next_page;
+
+			/* reread the time stamp */
+			*ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+			cpu_buffer->tail_page->time_stamp = *ts;
+		}
+
+		/*
+		 * The actual tail page has moved forward.
+		 */
+		if (tail < BUF_PAGE_SIZE) {
+			/* Mark the rest of the page with padding */
+			event = __rb_page_index(tail_page, tail);
+			event->type = RINGBUF_TYPE_PADDING;
+		}
+
+		if (tail <= BUF_PAGE_SIZE)
+			/* Set the write back to the previous setting */
+			local_set(&tail_page->write, tail);
+
+		/*
+		 * If this was a commit entry that failed,
+		 * increment that too
+		 */
+		if (tail_page == cpu_buffer->commit_page &&
+		    tail == rb_commit_index(cpu_buffer)) {
+			rb_set_commit_to_write(cpu_buffer);
+		}
+
+		spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+		/* fail and let the caller try again */
+		return ERR_PTR(-EAGAIN);
+	}
+
+	/* We reserved something on the buffer */
+
+	BUG_ON(write > BUF_PAGE_SIZE);
+
+	event = __rb_page_index(tail_page, tail);
+	rb_update_event(event, type, length);
+
+	/*
+	 * If this is a commit and the tail is zero, then update
+	 * this page's time stamp.
+	 */
+	if (!tail && rb_is_commit(cpu_buffer, event))
+		cpu_buffer->commit_page->time_stamp = *ts;
+
+	return event;
+
+ out_unlock:
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+	return NULL;
+}
+
+static int
+rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+		  u64 *ts, u64 *delta)
+{
+	struct ring_buffer_event *event;
+	static int once;
+	int ret;
+
+	if (unlikely(*delta > (1ULL << 59) && !once++)) {
+		printk(KERN_WARNING "Delta way too big! %llu"
+		       " ts=%llu write stamp = %llu\n",
+		       *delta, *ts, cpu_buffer->write_stamp);
+		WARN_ON(1);
+	}
+
+	/*
+	 * The delta is too big, we to add a
+	 * new timestamp.
+	 */
+	event = __rb_reserve_next(cpu_buffer,
+				  RINGBUF_TYPE_TIME_EXTEND,
+				  RB_LEN_TIME_EXTEND,
+				  ts);
+	if (!event)
+		return -EBUSY;
+
+	if (PTR_ERR(event) == -EAGAIN)
+		return -EAGAIN;
+
+	/* Only a commited time event can update the write stamp */
+	if (rb_is_commit(cpu_buffer, event)) {
+		/*
+		 * If this is the first on the page, then we need to
+		 * update the page itself, and just put in a zero.
+		 */
+		if (rb_event_index(event)) {
+			event->time_delta = *delta & TS_MASK;
+			event->array[0] = *delta >> TS_SHIFT;
+		} else {
+			cpu_buffer->commit_page->time_stamp = *ts;
+			event->time_delta = 0;
+			event->array[0] = 0;
+		}
+		cpu_buffer->write_stamp = *ts;
+		/* let the caller know this was the commit */
+		ret = 1;
+	} else {
+		/* Darn, this is just wasted space */
+		event->time_delta = 0;
+		event->array[0] = 0;
+		ret = 0;
+	}
+
+	*delta = 0;
+
+	return ret;
+}
+
+static struct ring_buffer_event *
+rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
+		      unsigned type, unsigned long length)
+{
+	struct ring_buffer_event *event;
+	u64 ts, delta;
+	int commit = 0;
+
+ again:
+	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+
+	/*
+	 * Only the first commit can update the timestamp.
+	 * Yes there is a race here. If an interrupt comes in
+	 * just after the conditional and it traces too, then it
+	 * will also check the deltas. More than one timestamp may
+	 * also be made. But only the entry that did the actual
+	 * commit will be something other than zero.
+	 */
+	if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
+	    rb_page_write(cpu_buffer->tail_page) ==
+	    rb_commit_index(cpu_buffer)) {
+
+		delta = ts - cpu_buffer->write_stamp;
+
+		/* make sure this delta is calculated here */
+		barrier();
+
+		/* Did the write stamp get updated already? */
+		if (unlikely(ts < cpu_buffer->write_stamp))
+			goto again;
+
+		if (test_time_stamp(delta)) {
+
+			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+
+			if (commit == -EBUSY)
+				return NULL;
+
+			if (commit == -EAGAIN)
+				goto again;
+
+			RB_WARN_ON(cpu_buffer, commit < 0);
+		}
+	} else
+		/* Non commits have zero deltas */
+		delta = 0;
+
+	event = __rb_reserve_next(cpu_buffer, type, length, &ts);
+	if (PTR_ERR(event) == -EAGAIN)
+		goto again;
+
+	if (!event) {
+		if (unlikely(commit))
+			/*
+			 * Ouch! We needed a timestamp and it was commited. But
+			 * we didn't get our event reserved.
+			 */
+			rb_set_commit_to_write(cpu_buffer);
+		return NULL;
+	}
+
+	/*
+	 * If the timestamp was commited, make the commit our entry
+	 * now so that we will update it when needed.
+	 */
+	if (commit)
+		rb_set_commit_event(cpu_buffer, event);
+	else if (!rb_is_commit(cpu_buffer, event))
+		delta = 0;
+
+	event->time_delta = delta;
+
+	return event;
+}
+
+static DEFINE_PER_CPU(int, rb_need_resched);
+
+/**
+ * ring_buffer_lock_reserve - reserve a part of the buffer
+ * @buffer: the ring buffer to reserve from
+ * @length: the length of the data to reserve (excluding event header)
+ * @flags: a pointer to save the interrupt flags
+ *
+ * Returns a reseverd event on the ring buffer to copy directly to.
+ * The user of this interface will need to get the body to write into
+ * and can use the ring_buffer_event_data() interface.
+ *
+ * The length is the length of the data needed, not the event length
+ * which also includes the event header.
+ *
+ * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
+ * If NULL is returned, then nothing has been allocated or locked.
+ */
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+			 unsigned long length,
+			 unsigned long *flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	int cpu, resched;
+
+	if (atomic_read(&buffer->record_disabled))
+		return NULL;
+
+	/* If we are tracing schedule, we don't want to recurse */
+	resched = need_resched();
+	preempt_disable_notrace();
+
+	cpu = raw_smp_processor_id();
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		goto out;
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	if (atomic_read(&cpu_buffer->record_disabled))
+		goto out;
+
+	length = rb_calculate_event_length(length);
+	if (length > BUF_PAGE_SIZE)
+		goto out;
+
+	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+	if (!event)
+		goto out;
+
+	/*
+	 * Need to store resched state on this cpu.
+	 * Only the first needs to.
+	 */
+
+	if (preempt_count() == 1)
+		per_cpu(rb_need_resched, cpu) = resched;
+
+	return event;
+
+ out:
+	if (resched)
+		preempt_enable_notrace();
+	else
+		preempt_enable_notrace();
+	return NULL;
+}
+
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+		      struct ring_buffer_event *event)
+{
+	cpu_buffer->entries++;
+
+	/* Only process further if we own the commit */
+	if (!rb_is_commit(cpu_buffer, event))
+		return;
+
+	cpu_buffer->write_stamp += event->time_delta;
+
+	rb_set_commit_to_write(cpu_buffer);
+}
+
+/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ * @flags: the interrupt flags received from ring_buffer_lock_reserve.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+			      struct ring_buffer_event *event,
+			      unsigned long flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu = raw_smp_processor_id();
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	rb_commit(cpu_buffer, event);
+
+	/*
+	 * Only the last preempt count needs to restore preemption.
+	 */
+	if (preempt_count() == 1) {
+		if (per_cpu(rb_need_resched, cpu))
+			preempt_enable_no_resched_notrace();
+		else
+			preempt_enable_notrace();
+	} else
+		preempt_enable_no_resched_notrace();
+
+	return 0;
+}
+
+/**
+ * ring_buffer_write - write data to the buffer without reserving
+ * @buffer: The ring buffer to write to.
+ * @length: The length of the data being written (excluding the event header)
+ * @data: The data to write to the buffer.
+ *
+ * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
+ * one function. If you already have the data to write to the buffer, it
+ * may be easier to simply call this function.
+ *
+ * Note, like ring_buffer_lock_reserve, the length is the length of the data
+ * and not the length of the event which would hold the header.
+ */
+int ring_buffer_write(struct ring_buffer *buffer,
+			unsigned long length,
+			void *data)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	unsigned long event_length;
+	void *body;
+	int ret = -EBUSY;
+	int cpu, resched;
+
+	if (atomic_read(&buffer->record_disabled))
+		return -EBUSY;
+
+	resched = need_resched();
+	preempt_disable_notrace();
+
+	cpu = raw_smp_processor_id();
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		goto out;
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	if (atomic_read(&cpu_buffer->record_disabled))
+		goto out;
+
+	event_length = rb_calculate_event_length(length);
+	event = rb_reserve_next_event(cpu_buffer,
+				      RINGBUF_TYPE_DATA, event_length);
+	if (!event)
+		goto out;
+
+	body = rb_event_data(event);
+
+	memcpy(body, data, length);
+
+	rb_commit(cpu_buffer, event);
+
+	ret = 0;
+ out:
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
+
+	return ret;
+}
+
+static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct buffer_page *reader = cpu_buffer->reader_page;
+	struct buffer_page *head = cpu_buffer->head_page;
+	struct buffer_page *commit = cpu_buffer->commit_page;
+
+	return reader->read == rb_page_commit(reader) &&
+		(commit == reader ||
+		 (commit == head &&
+		  head->read == rb_page_commit(commit)));
+}
+
+/**
+ * ring_buffer_record_disable - stop all writes into the buffer
+ * @buffer: The ring buffer to stop writes to.
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable(struct ring_buffer *buffer)
+{
+	atomic_inc(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable(struct ring_buffer *buffer)
+{
+	atomic_dec(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
+ * @buffer: The ring buffer to stop writes to.
+ * @cpu: The CPU buffer to stop
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	cpu_buffer = buffer->buffers[cpu];
+	atomic_inc(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable_cpu - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ * @cpu: The CPU to enable.
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	cpu_buffer = buffer->buffers[cpu];
+	atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the entries from.
+ */
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return cpu_buffer->entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return cpu_buffer->overrun;
+}
+
+/**
+ * ring_buffer_entries - get the number of entries in a buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of entries in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_entries(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long entries = 0;
+	int cpu;
+
+	/* if you care about this being correct, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		entries += cpu_buffer->entries;
+	}
+
+	return entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of overruns in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long overruns = 0;
+	int cpu;
+
+	/* if you care about this being correct, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		overruns += cpu_buffer->overrun;
+	}
+
+	return overruns;
+}
+
+/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	/* Iterator usage is expected to have record disabled */
+	if (list_empty(&cpu_buffer->reader_page->list)) {
+		iter->head_page = cpu_buffer->head_page;
+		iter->head = cpu_buffer->head_page->read;
+	} else {
+		iter->head_page = cpu_buffer->reader_page;
+		iter->head = cpu_buffer->reader_page->read;
+	}
+	if (iter->head)
+		iter->read_stamp = cpu_buffer->read_stamp;
+	else
+		iter->read_stamp = iter->head_page->time_stamp;
+}
+
+/**
+ * ring_buffer_iter_empty - check if an iterator has no more to read
+ * @iter: The iterator to check
+ */
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	cpu_buffer = iter->cpu_buffer;
+
+	return iter->head_page == cpu_buffer->commit_page &&
+		iter->head == rb_commit_index(cpu_buffer);
+}
+
+static void
+rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+		     struct ring_buffer_event *event)
+{
+	u64 delta;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		return;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		delta = event->array[0];
+		delta <<= TS_SHIFT;
+		delta += event->time_delta;
+		cpu_buffer->read_stamp += delta;
+		return;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		return;
+
+	case RINGBUF_TYPE_DATA:
+		cpu_buffer->read_stamp += event->time_delta;
+		return;
+
+	default:
+		BUG();
+	}
+	return;
+}
+
+static void
+rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
+			  struct ring_buffer_event *event)
+{
+	u64 delta;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		return;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		delta = event->array[0];
+		delta <<= TS_SHIFT;
+		delta += event->time_delta;
+		iter->read_stamp += delta;
+		return;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		return;
+
+	case RINGBUF_TYPE_DATA:
+		iter->read_stamp += event->time_delta;
+		return;
+
+	default:
+		BUG();
+	}
+	return;
+}
+
+static struct buffer_page *
+rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct buffer_page *reader = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+ again:
+	reader = cpu_buffer->reader_page;
+
+	/* If there's more to read, return this page */
+	if (cpu_buffer->reader_page->read < rb_page_size(reader))
+		goto out;
+
+	/* Never should we have an index greater than the size */
+	RB_WARN_ON(cpu_buffer,
+		   cpu_buffer->reader_page->read > rb_page_size(reader));
+
+	/* check if we caught up to the tail */
+	reader = NULL;
+	if (cpu_buffer->commit_page == cpu_buffer->reader_page)
+		goto out;
+
+	/*
+	 * Splice the empty reader page into the list around the head.
+	 * Reset the reader page to size zero.
+	 */
+
+	reader = cpu_buffer->head_page;
+	cpu_buffer->reader_page->list.next = reader->list.next;
+	cpu_buffer->reader_page->list.prev = reader->list.prev;
+
+	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->commit, 0);
+
+	/* Make the reader page now replace the head */
+	reader->list.prev->next = &cpu_buffer->reader_page->list;
+	reader->list.next->prev = &cpu_buffer->reader_page->list;
+
+	/*
+	 * If the tail is on the reader, then we must set the head
+	 * to the inserted page, otherwise we set it one before.
+	 */
+	cpu_buffer->head_page = cpu_buffer->reader_page;
+
+	if (cpu_buffer->commit_page != reader)
+		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+
+	/* Finally update the reader page to the new head */
+	cpu_buffer->reader_page = reader;
+	rb_reset_reader_page(cpu_buffer);
+
+	goto again;
+
+ out:
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+	return reader;
+}
+
+static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct ring_buffer_event *event;
+	struct buffer_page *reader;
+	unsigned length;
+
+	reader = rb_get_reader_page(cpu_buffer);
+
+	/* This function should not be called when buffer is empty */
+	BUG_ON(!reader);
+
+	event = rb_reader_event(cpu_buffer);
+
+	if (event->type == RINGBUF_TYPE_DATA)
+		cpu_buffer->entries--;
+
+	rb_update_read_stamp(cpu_buffer, event);
+
+	length = rb_event_length(event);
+	cpu_buffer->reader_page->read += length;
+}
+
+static void rb_advance_iter(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer *buffer;
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	unsigned length;
+
+	cpu_buffer = iter->cpu_buffer;
+	buffer = cpu_buffer->buffer;
+
+	/*
+	 * Check if we are at the end of the buffer.
+	 */
+	if (iter->head >= rb_page_size(iter->head_page)) {
+		BUG_ON(iter->head_page == cpu_buffer->commit_page);
+		rb_inc_iter(iter);
+		return;
+	}
+
+	event = rb_iter_head_event(iter);
+
+	length = rb_event_length(event);
+
+	/*
+	 * This should not be called to advance the header if we are
+	 * at the tail of the buffer.
+	 */
+	BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
+	       (iter->head + length > rb_commit_index(cpu_buffer)));
+
+	rb_update_iter_read_stamp(iter, event);
+
+	iter->head += length;
+
+	/* check for end of page padding */
+	if ((iter->head >= rb_page_size(iter->head_page)) &&
+	    (iter->head_page != cpu_buffer->commit_page))
+		rb_advance_iter(iter);
+}
+
+/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	struct buffer_page *reader;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+
+ again:
+	reader = rb_get_reader_page(cpu_buffer);
+	if (!reader)
+		return NULL;
+
+	event = rb_reader_event(cpu_buffer);
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		RB_WARN_ON(cpu_buffer, 1);
+		rb_advance_reader(cpu_buffer);
+		return NULL;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		/* Internal data, OK to advance */
+		rb_advance_reader(cpu_buffer);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		rb_advance_reader(cpu_buffer);
+		goto again;
+
+	case RINGBUF_TYPE_DATA:
+		if (ts) {
+			*ts = cpu_buffer->read_stamp + event->time_delta;
+			ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+		}
+		return event;
+
+	default:
+		BUG();
+	}
+
+	return NULL;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+	struct ring_buffer *buffer;
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+
+	if (ring_buffer_iter_empty(iter))
+		return NULL;
+
+	cpu_buffer = iter->cpu_buffer;
+	buffer = cpu_buffer->buffer;
+
+ again:
+	if (rb_per_cpu_empty(cpu_buffer))
+		return NULL;
+
+	event = rb_iter_head_event(iter);
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		rb_inc_iter(iter);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		/* Internal data, OK to advance */
+		rb_advance_iter(iter);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		rb_advance_iter(iter);
+		goto again;
+
+	case RINGBUF_TYPE_DATA:
+		if (ts) {
+			*ts = iter->read_stamp + event->time_delta;
+			ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+		}
+		return event;
+
+	default:
+		BUG();
+	}
+
+	return NULL;
+}
+
+/**
+ * ring_buffer_consume - return an event and consume it
+ * @buffer: The ring buffer to get the next event from
+ *
+ * Returns the next event in the ring buffer, and that event is consumed.
+ * Meaning, that sequential reads will keep returning a different event,
+ * and eventually empty the ring buffer if the producer is slower.
+ */
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	event = ring_buffer_peek(buffer, cpu, ts);
+	if (!event)
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+	rb_advance_reader(cpu_buffer);
+
+	return event;
+}
+
+/**
+ * ring_buffer_read_start - start a non consuming read of the buffer
+ * @buffer: The ring buffer to read from
+ * @cpu: The cpu buffer to iterate over
+ *
+ * This starts up an iteration through the buffer. It also disables
+ * the recording to the buffer until the reading is finished.
+ * This prevents the reading from being corrupted. This is not
+ * a consuming read, so a producer is not expected.
+ *
+ * Must be paired with ring_buffer_finish.
+ */
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_iter *iter;
+	unsigned long flags;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	iter->cpu_buffer = cpu_buffer;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
+	ring_buffer_iter_reset(iter);
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+	return iter;
+}
+
+/**
+ * ring_buffer_finish - finish reading the iterator of the buffer
+ * @iter: The iterator retrieved by ring_buffer_start
+ *
+ * This re-enables the recording to the buffer, and frees the
+ * iterator.
+ */
+void
+ring_buffer_read_finish(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	atomic_dec(&cpu_buffer->record_disabled);
+	kfree(iter);
+}
+
+/**
+ * ring_buffer_read - read the next item in the ring buffer by the iterator
+ * @iter: The ring buffer iterator
+ * @ts: The time stamp of the event read.
+ *
+ * This reads the next event in the ring buffer and increments the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
+{
+	struct ring_buffer_event *event;
+
+	event = ring_buffer_iter_peek(iter, ts);
+	if (!event)
+		return NULL;
+
+	rb_advance_iter(iter);
+
+	return event;
+}
+
+/**
+ * ring_buffer_size - return the size of the ring buffer (in bytes)
+ * @buffer: The ring buffer.
+ */
+unsigned long ring_buffer_size(struct ring_buffer *buffer)
+{
+	return BUF_PAGE_SIZE * buffer->pages;
+}
+
+static void
+rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	cpu_buffer->head_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	local_set(&cpu_buffer->head_page->write, 0);
+	local_set(&cpu_buffer->head_page->commit, 0);
+
+	cpu_buffer->head_page->read = 0;
+
+	cpu_buffer->tail_page = cpu_buffer->head_page;
+	cpu_buffer->commit_page = cpu_buffer->head_page;
+
+	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->commit, 0);
+	cpu_buffer->reader_page->read = 0;
+
+	cpu_buffer->overrun = 0;
+	cpu_buffer->entries = 0;
+}
+
+/**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+ * @cpu: The CPU buffer to be reset
+ */
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	unsigned long flags;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+	rb_reset_cpu(cpu_buffer);
+
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+}
+
+/**
+ * ring_buffer_reset - reset a ring buffer
+ * @buffer: The ring buffer to reset all cpu buffers
+ */
+void ring_buffer_reset(struct ring_buffer *buffer)
+{
+	int cpu;
+
+	for_each_buffer_cpu(buffer, cpu)
+		ring_buffer_reset_cpu(buffer, cpu);
+}
+
+/**
+ * rind_buffer_empty - is the ring buffer empty?
+ * @buffer: The ring buffer to test
+ */
+int ring_buffer_empty(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* yes this is racy, but if you don't like the race, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		if (!rb_per_cpu_empty(cpu_buffer))
+			return 0;
+	}
+	return 1;
+}
+
+/**
+ * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
+ * @buffer: The ring buffer
+ * @cpu: The CPU buffer to test
+ */
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 1;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return rb_per_cpu_empty(cpu_buffer);
+}
+
+/**
+ * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
+ * @buffer_a: One buffer to swap with
+ * @buffer_b: The other buffer to swap with
+ *
+ * This function is useful for tracers that want to take a "snapshot"
+ * of a CPU buffer and has another back up buffer lying around.
+ * it is expected that the tracer handles the cpu buffer not being
+ * used at the moment.
+ */
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+			 struct ring_buffer *buffer_b, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer_a;
+	struct ring_buffer_per_cpu *cpu_buffer_b;
+
+	if (!cpu_isset(cpu, buffer_a->cpumask) ||
+	    !cpu_isset(cpu, buffer_b->cpumask))
+		return -EINVAL;
+
+	/* At least make sure the two buffers are somewhat the same */
+	if (buffer_a->size != buffer_b->size ||
+	    buffer_a->pages != buffer_b->pages)
+		return -EINVAL;
+
+	cpu_buffer_a = buffer_a->buffers[cpu];
+	cpu_buffer_b = buffer_b->buffers[cpu];
+
+	/*
+	 * We can't do a synchronize_sched here because this
+	 * function can be called in atomic context.
+	 * Normally this will be called from the same CPU as cpu.
+	 * If not it's up to the caller to protect this.
+	 */
+	atomic_inc(&cpu_buffer_a->record_disabled);
+	atomic_inc(&cpu_buffer_b->record_disabled);
+
+	buffer_a->buffers[cpu] = cpu_buffer_b;
+	buffer_b->buffers[cpu] = cpu_buffer_a;
+
+	cpu_buffer_b->buffer = buffer_a;
+	cpu_buffer_a->buffer = buffer_b;
+
+	atomic_dec(&cpu_buffer_a->record_disabled);
+	atomic_dec(&cpu_buffer_b->record_disabled);
+
+	return 0;
+}
+
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f3fb3db61c..d345d649d07 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
 #include <linux/utsrelease.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/notifier.h>
 #include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
@@ -22,6 +23,7 @@
 #include <linux/ftrace.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <linux/kdebug.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
@@ -31,25 +33,36 @@
 #include <linux/writeback.h>
 
 #include <linux/stacktrace.h>
+#include <linux/ring_buffer.h>
 
 #include "trace.h"
 
+#define TRACE_BUFFER_FLAGS	(RB_FL_OVERWRITE)
+
 unsigned long __read_mostly	tracing_max_latency = (cycle_t)ULONG_MAX;
 unsigned long __read_mostly	tracing_thresh;
 
-static unsigned long __read_mostly	tracing_nr_buffers;
+static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+
+static inline void ftrace_disable_cpu(void)
+{
+	preempt_disable();
+	local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+}
+
+static inline void ftrace_enable_cpu(void)
+{
+	local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+	preempt_enable();
+}
+
 static cpumask_t __read_mostly		tracing_buffer_mask;
 
 #define for_each_tracing_cpu(cpu)	\
 	for_each_cpu_mask(cpu, tracing_buffer_mask)
 
-static int trace_alloc_page(void);
-static int trace_free_page(void);
-
 static int tracing_disabled = 1;
 
-static unsigned long tracing_pages_allocated;
-
 long
 ns2usecs(cycle_t nsec)
 {
@@ -60,7 +73,9 @@ ns2usecs(cycle_t nsec)
 
 cycle_t ftrace_now(int cpu)
 {
-	return cpu_clock(cpu);
+	u64 ts = ring_buffer_time_stamp(cpu);
+	ring_buffer_normalize_time_stamp(cpu, &ts);
+	return ts;
 }
 
 /*
@@ -100,11 +115,18 @@ static int			tracer_enabled = 1;
 int				ftrace_function_enabled;
 
 /*
- * trace_nr_entries is the number of entries that is allocated
- * for a buffer. Note, the number of entries is always rounded
- * to ENTRIES_PER_PAGE.
+ * trace_buf_size is the size in bytes that is allocated
+ * for a buffer. Note, the number of bytes is always rounded
+ * to page size.
+ *
+ * This number is purposely set to a low number of 16384.
+ * If the dump on oops happens, it will be much appreciated
+ * to not have to wait for all that output. Anyway this can be
+ * boot time and run time configurable.
  */
-static unsigned long		trace_nr_entries = 65536UL;
+#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
+
+static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 
 /* trace_types holds a link list of available tracers. */
 static struct tracer		*trace_types __read_mostly;
@@ -133,24 +155,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 /* trace_flags holds iter_ctrl options */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
 
-static notrace void no_trace_init(struct trace_array *tr)
-{
-	int cpu;
-
-	ftrace_function_enabled = 0;
-	if(tr->ctrl)
-		for_each_online_cpu(cpu)
-			tracing_reset(tr->data[cpu]);
-	tracer_enabled = 0;
-}
-
-/* dummy trace to disable tracing */
-static struct tracer no_tracer __read_mostly = {
-	.name		= "none",
-	.init		= no_trace_init
-};
-
-
 /**
  * trace_wake_up - wake up tasks waiting for trace input
  *
@@ -167,23 +171,21 @@ void trace_wake_up(void)
 		wake_up(&trace_wait);
 }
 
-#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
-
-static int __init set_nr_entries(char *str)
+static int __init set_buf_size(char *str)
 {
-	unsigned long nr_entries;
+	unsigned long buf_size;
 	int ret;
 
 	if (!str)
 		return 0;
-	ret = strict_strtoul(str, 0, &nr_entries);
+	ret = strict_strtoul(str, 0, &buf_size);
 	/* nr_entries can not be zero */
-	if (ret < 0 || nr_entries == 0)
+	if (ret < 0 || buf_size == 0)
 		return 0;
-	trace_nr_entries = nr_entries;
+	trace_buf_size = buf_size;
 	return 1;
 }
-__setup("trace_entries=", set_nr_entries);
+__setup("trace_buf_size=", set_buf_size);
 
 unsigned long nsecs_to_usecs(unsigned long nsecs)
 {
@@ -191,21 +193,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
 }
 
 /*
- * trace_flag_type is an enumeration that holds different
- * states when a trace occurs. These are:
- *  IRQS_OFF	- interrupts were disabled
- *  NEED_RESCED - reschedule is requested
- *  HARDIRQ	- inside an interrupt handler
- *  SOFTIRQ	- inside a softirq handler
- */
-enum trace_flag_type {
-	TRACE_FLAG_IRQS_OFF		= 0x01,
-	TRACE_FLAG_NEED_RESCHED		= 0x02,
-	TRACE_FLAG_HARDIRQ		= 0x04,
-	TRACE_FLAG_SOFTIRQ		= 0x08,
-};
-
-/*
  * TRACE_ITER_SYM_MASK masks the options in trace_flags that
  * control the output of kernel symbols.
  */
@@ -224,6 +211,7 @@ static const char *trace_options[] = {
 	"block",
 	"stacktrace",
 	"sched-tree",
+	"ftrace_printk",
 	NULL
 };
 
@@ -266,54 +254,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	tracing_record_cmdline(current);
 }
 
-#define CHECK_COND(cond)			\
-	if (unlikely(cond)) {			\
-		tracing_disabled = 1;		\
-		WARN_ON(1);			\
-		return -1;			\
-	}
-
-/**
- * check_pages - integrity check of trace buffers
- *
- * As a safty measure we check to make sure the data pages have not
- * been corrupted.
- */
-int check_pages(struct trace_array_cpu *data)
-{
-	struct page *page, *tmp;
-
-	CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
-	CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
-
-	list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
-		CHECK_COND(page->lru.next->prev != &page->lru);
-		CHECK_COND(page->lru.prev->next != &page->lru);
-	}
-
-	return 0;
-}
-
-/**
- * head_page - page address of the first page in per_cpu buffer.
- *
- * head_page returns the page address of the first page in
- * a per_cpu buffer. This also preforms various consistency
- * checks to make sure the buffer has not been corrupted.
- */
-void *head_page(struct trace_array_cpu *data)
-{
-	struct page *page;
-
-	if (list_empty(&data->trace_pages))
-		return NULL;
-
-	page = list_entry(data->trace_pages.next, struct page, lru);
-	BUG_ON(&page->lru == &data->trace_pages);
-
-	return page_address(page);
-}
-
 /**
  * trace_seq_printf - sequence printing of trace information
  * @s: trace sequence descriptor
@@ -395,28 +335,23 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
 	return len;
 }
 
-#define HEX_CHARS 17
-static const char hex2asc[] = "0123456789abcdef";
+#define MAX_MEMHEX_BYTES	8
+#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
 
 static int
 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 {
 	unsigned char hex[HEX_CHARS];
 	unsigned char *data = mem;
-	unsigned char byte;
 	int i, j;
 
-	BUG_ON(len >= HEX_CHARS);
-
 #ifdef __BIG_ENDIAN
 	for (i = 0, j = 0; i < len; i++) {
 #else
 	for (i = len-1, j = 0; i >= 0; i--) {
 #endif
-		byte = data[i];
-
-		hex[j++] = hex2asc[byte & 0x0f];
-		hex[j++] = hex2asc[byte >> 4];
+		hex[j++] = hex_asc_hi(data[i]);
+		hex[j++] = hex_asc_lo(data[i]);
 	}
 	hex[j++] = ' ';
 
@@ -460,34 +395,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
 	trace_seq_reset(s);
 }
 
-/*
- * flip the trace buffers between two trace descriptors.
- * This usually is the buffers between the global_trace and
- * the max_tr to record a snapshot of a current trace.
- *
- * The ftrace_max_lock must be held.
- */
-static void
-flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
-{
-	struct list_head flip_pages;
-
-	INIT_LIST_HEAD(&flip_pages);
-
-	memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
-		sizeof(struct trace_array_cpu) -
-		offsetof(struct trace_array_cpu, trace_head_idx));
-
-	check_pages(tr1);
-	check_pages(tr2);
-	list_splice_init(&tr1->trace_pages, &flip_pages);
-	list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
-	list_splice_init(&flip_pages, &tr2->trace_pages);
-	BUG_ON(!list_empty(&flip_pages));
-	check_pages(tr1);
-	check_pages(tr2);
-}
-
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
@@ -500,17 +407,17 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
 void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-	struct trace_array_cpu *data;
-	int i;
+	struct ring_buffer *buf = tr->buffer;
 
 	WARN_ON_ONCE(!irqs_disabled());
 	__raw_spin_lock(&ftrace_max_lock);
-	/* clear out all the previous traces */
-	for_each_tracing_cpu(i) {
-		data = tr->data[i];
-		flip_trace(max_tr.data[i], data);
-		tracing_reset(data);
-	}
+
+	tr->buffer = max_tr.buffer;
+	max_tr.buffer = buf;
+
+	ftrace_disable_cpu();
+	ring_buffer_reset(tr->buffer);
+	ftrace_enable_cpu();
 
 	__update_max_tr(tr, tsk, cpu);
 	__raw_spin_unlock(&ftrace_max_lock);
@@ -527,16 +434,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 void
 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-	struct trace_array_cpu *data = tr->data[cpu];
-	int i;
+	int ret;
 
 	WARN_ON_ONCE(!irqs_disabled());
 	__raw_spin_lock(&ftrace_max_lock);
-	for_each_tracing_cpu(i)
-		tracing_reset(max_tr.data[i]);
 
-	flip_trace(max_tr.data[cpu], data);
-	tracing_reset(data);
+	ftrace_disable_cpu();
+
+	ring_buffer_reset(max_tr.buffer);
+	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+
+	ftrace_enable_cpu();
+
+	WARN_ON_ONCE(ret);
 
 	__update_max_tr(tr, tsk, cpu);
 	__raw_spin_unlock(&ftrace_max_lock);
@@ -573,7 +483,6 @@ int register_tracer(struct tracer *type)
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 	if (type->selftest) {
 		struct tracer *saved_tracer = current_trace;
-		struct trace_array_cpu *data;
 		struct trace_array *tr = &global_trace;
 		int saved_ctrl = tr->ctrl;
 		int i;
@@ -585,10 +494,7 @@ int register_tracer(struct tracer *type)
 		 * If we fail, we do not register this tracer.
 		 */
 		for_each_tracing_cpu(i) {
-			data = tr->data[i];
-			if (!head_page(data))
-				continue;
-			tracing_reset(data);
+			tracing_reset(tr, i);
 		}
 		current_trace = type;
 		tr->ctrl = 0;
@@ -604,10 +510,7 @@ int register_tracer(struct tracer *type)
 		}
 		/* Only reset on passing, to avoid touching corrupted buffers */
 		for_each_tracing_cpu(i) {
-			data = tr->data[i];
-			if (!head_page(data))
-				continue;
-			tracing_reset(data);
+			tracing_reset(tr, i);
 		}
 		printk(KERN_CONT "PASSED\n");
 	}
@@ -653,13 +556,11 @@ void unregister_tracer(struct tracer *type)
 	mutex_unlock(&trace_types_lock);
 }
 
-void tracing_reset(struct trace_array_cpu *data)
+void tracing_reset(struct trace_array *tr, int cpu)
 {
-	data->trace_idx = 0;
-	data->overrun = 0;
-	data->trace_head = data->trace_tail = head_page(data);
-	data->trace_head_idx = 0;
-	data->trace_tail_idx = 0;
+	ftrace_disable_cpu();
+	ring_buffer_reset_cpu(tr->buffer, cpu);
+	ftrace_enable_cpu();
 }
 
 #define SAVED_CMDLINES 128
@@ -745,82 +646,16 @@ void tracing_record_cmdline(struct task_struct *tsk)
 	trace_save_cmdline(tsk);
 }
 
-static inline struct list_head *
-trace_next_list(struct trace_array_cpu *data, struct list_head *next)
-{
-	/*
-	 * Roundrobin - but skip the head (which is not a real page):
-	 */
-	next = next->next;
-	if (unlikely(next == &data->trace_pages))
-		next = next->next;
-	BUG_ON(next == &data->trace_pages);
-
-	return next;
-}
-
-static inline void *
-trace_next_page(struct trace_array_cpu *data, void *addr)
-{
-	struct list_head *next;
-	struct page *page;
-
-	page = virt_to_page(addr);
-
-	next = trace_next_list(data, &page->lru);
-	page = list_entry(next, struct page, lru);
-
-	return page_address(page);
-}
-
-static inline struct trace_entry *
-tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
-{
-	unsigned long idx, idx_next;
-	struct trace_entry *entry;
-
-	data->trace_idx++;
-	idx = data->trace_head_idx;
-	idx_next = idx + 1;
-
-	BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
-
-	entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
-
-	if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
-		data->trace_head = trace_next_page(data, data->trace_head);
-		idx_next = 0;
-	}
-
-	if (data->trace_head == data->trace_tail &&
-	    idx_next == data->trace_tail_idx) {
-		/* overrun */
-		data->overrun++;
-		data->trace_tail_idx++;
-		if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
-			data->trace_tail =
-				trace_next_page(data, data->trace_tail);
-			data->trace_tail_idx = 0;
-		}
-	}
-
-	data->trace_head_idx = idx_next;
-
-	return entry;
-}
-
-static inline void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
+void
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
+			     int pc)
 {
 	struct task_struct *tsk = current;
-	unsigned long pc;
-
-	pc = preempt_count();
 
-	entry->preempt_count	= pc & 0xff;
-	entry->pid		= (tsk) ? tsk->pid : 0;
-	entry->t		= ftrace_now(raw_smp_processor_id());
-	entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+	entry->preempt_count		= pc & 0xff;
+	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->flags =
+		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -828,145 +663,139 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 void
 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
-	       unsigned long ip, unsigned long parent_ip, unsigned long flags)
+	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+	       int pc)
 {
-	struct trace_entry *entry;
+	struct ring_buffer_event *event;
+	struct ftrace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_FN;
-	entry->fn.ip		= ip;
-	entry->fn.parent_ip	= parent_ip;
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	/* If we are reading the ring buffer, don't trace */
+	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+		return;
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type			= TRACE_FN;
+	entry->ip			= ip;
+	entry->parent_ip		= parent_ip;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
 void
 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
-       unsigned long ip, unsigned long parent_ip, unsigned long flags)
+       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+       int pc)
 {
 	if (likely(!atomic_read(&data->disabled)))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, pc);
 }
 
-#ifdef CONFIG_MMIOTRACE
-void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
-						struct mmiotrace_rw *rw)
+static void ftrace_trace_stack(struct trace_array *tr,
+			       struct trace_array_cpu *data,
+			       unsigned long flags,
+			       int skip, int pc)
 {
-	struct trace_entry *entry;
+	struct ring_buffer_event *event;
+	struct stack_entry *entry;
+	struct stack_trace trace;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry			= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_MMIO_RW;
-	entry->mmiorw		= *rw;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
-
-	trace_wake_up();
-}
-
-void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
-						struct mmiotrace_map *map)
-{
-	struct trace_entry *entry;
-	unsigned long irq_flags;
+	if (!(trace_flags & TRACE_ITER_STACKTRACE))
+		return;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type		= TRACE_STACK;
 
-	entry			= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_MMIO_MAP;
-	entry->mmiomap		= *map;
+	memset(&entry->caller, 0, sizeof(entry->caller));
 
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	trace.nr_entries	= 0;
+	trace.max_entries	= FTRACE_STACK_ENTRIES;
+	trace.skip		= skip;
+	trace.entries		= entry->caller;
 
-	trace_wake_up();
+	save_stack_trace(&trace);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
-#endif
 
 void __trace_stack(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   unsigned long flags,
 		   int skip)
 {
-	struct trace_entry *entry;
-	struct stack_trace trace;
-
-	if (!(trace_flags & TRACE_ITER_STACKTRACE))
-		return;
-
-	entry			= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_STACK;
-
-	memset(&entry->stack, 0, sizeof(entry->stack));
-
-	trace.nr_entries	= 0;
-	trace.max_entries	= FTRACE_STACK_ENTRIES;
-	trace.skip		= skip;
-	trace.entries		= entry->stack.caller;
-
-	save_stack_trace(&trace);
+	ftrace_trace_stack(tr, data, flags, skip, preempt_count());
 }
 
-void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3)
+static void
+ftrace_trace_special(void *__tr, void *__data,
+		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
+		     int pc)
 {
+	struct ring_buffer_event *event;
 	struct trace_array_cpu *data = __data;
 	struct trace_array *tr = __tr;
-	struct trace_entry *entry;
+	struct special_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_SPECIAL;
-	entry->special.arg1	= arg1;
-	entry->special.arg2	= arg2;
-	entry->special.arg3	= arg3;
-	__trace_stack(tr, data, irq_flags, 4);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, pc);
+	entry->ent.type			= TRACE_SPECIAL;
+	entry->arg1			= arg1;
+	entry->arg2			= arg2;
+	entry->arg3			= arg3;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+	ftrace_trace_stack(tr, data, irq_flags, 4, pc);
 
 	trace_wake_up();
 }
 
 void
+__trace_special(void *__tr, void *__data,
+		unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+	ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
+}
+
+void
 tracing_sched_switch_trace(struct trace_array *tr,
 			   struct trace_array_cpu *data,
 			   struct task_struct *prev,
 			   struct task_struct *next,
-			   unsigned long flags)
+			   unsigned long flags, int pc)
 {
-	struct trace_entry *entry;
+	struct ring_buffer_event *event;
+	struct ctx_switch_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_CTX;
-	entry->ctx.prev_pid	= prev->pid;
-	entry->ctx.prev_prio	= prev->prio;
-	entry->ctx.prev_state	= prev->state;
-	entry->ctx.next_pid	= next->pid;
-	entry->ctx.next_prio	= next->prio;
-	entry->ctx.next_state	= next->state;
-	__trace_stack(tr, data, flags, 5);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type			= TRACE_CTX;
+	entry->prev_pid			= prev->pid;
+	entry->prev_prio		= prev->prio;
+	entry->prev_state		= prev->state;
+	entry->next_pid			= next->pid;
+	entry->next_prio		= next->prio;
+	entry->next_state		= next->state;
+	entry->next_cpu	= task_cpu(next);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+	ftrace_trace_stack(tr, data, flags, 5, pc);
 }
 
 void
@@ -974,25 +803,28 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 			   struct trace_array_cpu *data,
 			   struct task_struct *wakee,
 			   struct task_struct *curr,
-			   unsigned long flags)
+			   unsigned long flags, int pc)
 {
-	struct trace_entry *entry;
+	struct ring_buffer_event *event;
+	struct ctx_switch_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_WAKE;
-	entry->ctx.prev_pid	= curr->pid;
-	entry->ctx.prev_prio	= curr->prio;
-	entry->ctx.prev_state	= curr->state;
-	entry->ctx.next_pid	= wakee->pid;
-	entry->ctx.next_prio	= wakee->prio;
-	entry->ctx.next_state	= wakee->state;
-	__trace_stack(tr, data, flags, 6);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type			= TRACE_WAKE;
+	entry->prev_pid			= curr->pid;
+	entry->prev_prio		= curr->prio;
+	entry->prev_state		= curr->state;
+	entry->next_pid			= wakee->pid;
+	entry->next_prio		= wakee->prio;
+	entry->next_state		= wakee->state;
+	entry->next_cpu			= task_cpu(wakee);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+	ftrace_trace_stack(tr, data, flags, 6, pc);
 
 	trace_wake_up();
 }
@@ -1002,23 +834,21 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
-	unsigned long flags;
-	long disabled;
 	int cpu;
+	int pc;
 
-	if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
+	if (tracing_disabled || !tr->ctrl)
 		return;
 
-	local_irq_save(flags);
+	pc = preempt_count();
+	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (likely(disabled == 1))
-		__trace_special(tr, data, arg1, arg2, arg3);
+	if (likely(!atomic_read(&data->disabled)))
+		ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
 
-	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	preempt_enable_notrace();
 }
 
 #ifdef CONFIG_FTRACE
@@ -1029,7 +859,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
-	int cpu;
+	int cpu, resched;
+	int pc;
 
 	if (unlikely(!ftrace_function_enabled))
 		return;
@@ -1037,16 +868,22 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 	if (skip_trace(ip))
 		return;
 
-	local_irq_save(flags);
+	pc = preempt_count();
+	resched = need_resched();
+	preempt_disable_notrace();
+	local_save_flags(flags);
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, pc);
 
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
 }
 
 static struct ftrace_ops trace_ops __read_mostly =
@@ -1073,111 +910,96 @@ enum trace_file_type {
 	TRACE_FILE_LAT_FMT	= 1,
 };
 
-static struct trace_entry *
-trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
-		struct trace_iterator *iter, int cpu)
+static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
 {
-	struct page *page;
-	struct trace_entry *array;
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
 
-	if (iter->next_idx[cpu] >= tr->entries ||
-	    iter->next_idx[cpu] >= data->trace_idx ||
-	    (data->trace_head == data->trace_tail &&
-	     data->trace_head_idx == data->trace_tail_idx))
-		return NULL;
+	iter->idx++;
+	if (iter->buffer_iter[iter->cpu])
+		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
 
-	if (!iter->next_page[cpu]) {
-		/* Initialize the iterator for this cpu trace buffer */
-		WARN_ON(!data->trace_tail);
-		page = virt_to_page(data->trace_tail);
-		iter->next_page[cpu] = &page->lru;
-		iter->next_page_idx[cpu] = data->trace_tail_idx;
-	}
+	ftrace_enable_cpu();
+}
+
+static struct trace_entry *
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
+{
+	struct ring_buffer_event *event;
+	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
 
-	page = list_entry(iter->next_page[cpu], struct page, lru);
-	BUG_ON(&data->trace_pages == &page->lru);
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
+
+	if (buf_iter)
+		event = ring_buffer_iter_peek(buf_iter, ts);
+	else
+		event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
 
-	array = page_address(page);
+	ftrace_enable_cpu();
 
-	WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
-	return &array[iter->next_page_idx[cpu]];
+	return event ? ring_buffer_event_data(event) : NULL;
 }
 
 static struct trace_entry *
-find_next_entry(struct trace_iterator *iter, int *ent_cpu)
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
-	struct trace_array *tr = iter->tr;
+	struct ring_buffer *buffer = iter->tr->buffer;
 	struct trace_entry *ent, *next = NULL;
+	u64 next_ts = 0, ts;
 	int next_cpu = -1;
 	int cpu;
 
 	for_each_tracing_cpu(cpu) {
-		if (!head_page(tr->data[cpu]))
+
+		if (ring_buffer_empty_cpu(buffer, cpu))
 			continue;
-		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+
+		ent = peek_next_entry(iter, cpu, &ts);
+
 		/*
 		 * Pick the entry with the smallest timestamp:
 		 */
-		if (ent && (!next || ent->t < next->t)) {
+		if (ent && (!next || ts < next_ts)) {
 			next = ent;
 			next_cpu = cpu;
+			next_ts = ts;
 		}
 	}
 
 	if (ent_cpu)
 		*ent_cpu = next_cpu;
 
+	if (ent_ts)
+		*ent_ts = next_ts;
+
 	return next;
 }
 
-static void trace_iterator_increment(struct trace_iterator *iter)
+/* Find the next real entry, without updating the iterator itself */
+static struct trace_entry *
+find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
-	iter->idx++;
-	iter->next_idx[iter->cpu]++;
-	iter->next_page_idx[iter->cpu]++;
-
-	if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
-		struct trace_array_cpu *data = iter->tr->data[iter->cpu];
-
-		iter->next_page_idx[iter->cpu] = 0;
-		iter->next_page[iter->cpu] =
-			trace_next_list(data, iter->next_page[iter->cpu]);
-	}
+	return __find_next_entry(iter, ent_cpu, ent_ts);
 }
 
-static void trace_consume(struct trace_iterator *iter)
+/* Find the next real entry, and increment the iterator to the next entry */
+static void *find_next_entry_inc(struct trace_iterator *iter)
 {
-	struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+	iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
 
-	data->trace_tail_idx++;
-	if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
-		data->trace_tail = trace_next_page(data, data->trace_tail);
-		data->trace_tail_idx = 0;
-	}
+	if (iter->ent)
+		trace_iterator_increment(iter, iter->cpu);
 
-	/* Check if we empty it, then reset the index */
-	if (data->trace_head == data->trace_tail &&
-	    data->trace_head_idx == data->trace_tail_idx)
-		data->trace_idx = 0;
+	return iter->ent ? iter : NULL;
 }
 
-static void *find_next_entry_inc(struct trace_iterator *iter)
+static void trace_consume(struct trace_iterator *iter)
 {
-	struct trace_entry *next;
-	int next_cpu = -1;
-
-	next = find_next_entry(iter, &next_cpu);
-
-	iter->prev_ent = iter->ent;
-	iter->prev_cpu = iter->cpu;
-
-	iter->ent = next;
-	iter->cpu = next_cpu;
-
-	if (next)
-		trace_iterator_increment(iter);
-
-	return next ? iter : NULL;
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
+	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+	ftrace_enable_cpu();
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1210,7 +1032,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 	struct trace_iterator *iter = m->private;
 	void *p = NULL;
 	loff_t l = 0;
-	int i;
+	int cpu;
 
 	mutex_lock(&trace_types_lock);
 
@@ -1229,14 +1051,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		iter->ent = NULL;
 		iter->cpu = 0;
 		iter->idx = -1;
-		iter->prev_ent = NULL;
-		iter->prev_cpu = -1;
 
-		for_each_tracing_cpu(i) {
-			iter->next_idx[i] = 0;
-			iter->next_page[i] = NULL;
+		ftrace_disable_cpu();
+
+		for_each_tracing_cpu(cpu) {
+			ring_buffer_iter_reset(iter->buffer_iter[cpu]);
 		}
 
+		ftrace_enable_cpu();
+
 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
 			;
 
@@ -1330,21 +1153,21 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 
 static void print_lat_help_header(struct seq_file *m)
 {
-	seq_puts(m, "#                _------=> CPU#            \n");
-	seq_puts(m, "#               / _-----=> irqs-off        \n");
-	seq_puts(m, "#              | / _----=> need-resched    \n");
-	seq_puts(m, "#              || / _---=> hardirq/softirq \n");
-	seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
-	seq_puts(m, "#              |||| /                      \n");
-	seq_puts(m, "#              |||||     delay             \n");
-	seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
-	seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
+	seq_puts(m, "#                  _------=> CPU#            \n");
+	seq_puts(m, "#                 / _-----=> irqs-off        \n");
+	seq_puts(m, "#                | / _----=> need-resched    \n");
+	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
+	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
+	seq_puts(m, "#                |||| /                      \n");
+	seq_puts(m, "#                |||||     delay             \n");
+	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
+	seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
 }
 
 static void print_func_help_header(struct seq_file *m)
 {
-	seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
-	seq_puts(m, "#              | |      |          |         |\n");
+	seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
+	seq_puts(m, "#              | |       |          |         |\n");
 }
 
 
@@ -1355,23 +1178,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 	struct trace_array *tr = iter->tr;
 	struct trace_array_cpu *data = tr->data[tr->cpu];
 	struct tracer *type = current_trace;
-	unsigned long total   = 0;
-	unsigned long entries = 0;
-	int cpu;
+	unsigned long total;
+	unsigned long entries;
 	const char *name = "preemption";
 
 	if (type)
 		name = type->name;
 
-	for_each_tracing_cpu(cpu) {
-		if (head_page(tr->data[cpu])) {
-			total += tr->data[cpu]->trace_idx;
-			if (tr->data[cpu]->trace_idx > tr->entries)
-				entries += tr->entries;
-			else
-				entries += tr->data[cpu]->trace_idx;
-		}
-	}
+	entries = ring_buffer_entries(iter->tr->buffer);
+	total = entries +
+		ring_buffer_overruns(iter->tr->buffer);
 
 	seq_printf(m, "%s latency trace v1.1.5 on %s\n",
 		   name, UTS_RELEASE);
@@ -1428,7 +1244,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 	comm = trace_find_cmdline(entry->pid);
 
 	trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
-	trace_seq_printf(s, "%d", cpu);
+	trace_seq_printf(s, "%3d", cpu);
 	trace_seq_printf(s, "%c%c",
 			(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
 			((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
@@ -1457,7 +1273,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 unsigned long preempt_mark_thresh = 100;
 
 static void
-lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
+lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
 		    unsigned long rel_usecs)
 {
 	trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1471,34 +1287,76 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 
-static int
+/*
+ * The message is supposed to contain an ending newline.
+ * If the printing stops prematurely, try to add a newline of our own.
+ */
+void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+{
+	struct trace_entry *ent;
+	struct trace_field_cont *cont;
+	bool ok = true;
+
+	ent = peek_next_entry(iter, iter->cpu, NULL);
+	if (!ent || ent->type != TRACE_CONT) {
+		trace_seq_putc(s, '\n');
+		return;
+	}
+
+	do {
+		cont = (struct trace_field_cont *)ent;
+		if (ok)
+			ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
+
+		ftrace_disable_cpu();
+
+		if (iter->buffer_iter[iter->cpu])
+			ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+		else
+			ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+
+		ftrace_enable_cpu();
+
+		ent = peek_next_entry(iter, iter->cpu, NULL);
+	} while (ent && ent->type == TRACE_CONT);
+
+	if (!ok)
+		trace_seq_putc(s, '\n');
+}
+
+static enum print_line_t
 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
-	struct trace_entry *next_entry = find_next_entry(iter, NULL);
+	struct trace_entry *next_entry;
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
 	struct trace_entry *entry = iter->ent;
 	unsigned long abs_usecs;
 	unsigned long rel_usecs;
+	u64 next_ts;
 	char *comm;
 	int S, T;
 	int i;
 	unsigned state;
 
+	if (entry->type == TRACE_CONT)
+		return TRACE_TYPE_HANDLED;
+
+	next_entry = find_next_entry(iter, NULL, &next_ts);
 	if (!next_entry)
-		next_entry = entry;
-	rel_usecs = ns2usecs(next_entry->t - entry->t);
-	abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
+		next_ts = iter->ts;
+	rel_usecs = ns2usecs(next_ts - iter->ts);
+	abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
 
 	if (verbose) {
 		comm = trace_find_cmdline(entry->pid);
-		trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
+		trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
 				 " %ld.%03ldms (+%ld.%03ldms): ",
 				 comm,
 				 entry->pid, cpu, entry->flags,
 				 entry->preempt_count, trace_idx,
-				 ns2usecs(entry->t),
+				 ns2usecs(iter->ts),
 				 abs_usecs/1000,
 				 abs_usecs % 1000, rel_usecs/1000,
 				 rel_usecs % 1000);
@@ -1507,52 +1365,85 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		lat_print_timestamp(s, abs_usecs, rel_usecs);
 	}
 	switch (entry->type) {
-	case TRACE_FN:
-		seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+	case TRACE_FN: {
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_puts(s, " (");
-		if (kretprobed(entry->fn.parent_ip))
+		if (kretprobed(field->parent_ip))
 			trace_seq_puts(s, KRETPROBE_MSG);
 		else
-			seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
+			seq_print_ip_sym(s, field->parent_ip, sym_flags);
 		trace_seq_puts(s, ")\n");
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
+
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 
-		state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
+		state = field->prev_state ?
+			__ffs(field->prev_state) + 1 : 0;
 		S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
-		comm = trace_find_cmdline(entry->ctx.next_pid);
-		trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
-				 entry->ctx.prev_pid,
-				 entry->ctx.prev_prio,
+		comm = trace_find_cmdline(field->next_pid);
+		trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+				 field->prev_pid,
+				 field->prev_prio,
 				 S, entry->type == TRACE_CTX ? "==>" : "  +",
-				 entry->ctx.next_pid,
-				 entry->ctx.next_prio,
+				 field->next_cpu,
+				 field->next_pid,
+				 field->next_prio,
 				 T, comm);
 		break;
-	case TRACE_SPECIAL:
+	}
+	case TRACE_SPECIAL: {
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
+
 		trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		break;
-	case TRACE_STACK:
+	}
+	case TRACE_STACK: {
+		struct stack_entry *field;
+
+		trace_assign_type(field, entry);
+
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i)
 				trace_seq_puts(s, " <= ");
-			seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
+			seq_print_ip_sym(s, field->caller[i], sym_flags);
 		}
 		trace_seq_puts(s, "\n");
 		break;
+	}
+	case TRACE_PRINT: {
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
+	}
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_trace_fmt(struct trace_iterator *iter)
+static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1567,90 +1458,126 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 	entry = iter->ent;
 
+	if (entry->type == TRACE_CONT)
+		return TRACE_TYPE_HANDLED;
+
 	comm = trace_find_cmdline(iter->ent->pid);
 
-	t = ns2usecs(entry->t);
+	t = ns2usecs(iter->ts);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
 	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
 	if (!ret)
-		return 0;
-	ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
+		return TRACE_TYPE_PARTIAL_LINE;
+	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 	ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	switch (entry->type) {
-	case TRACE_FN:
-		ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+	case TRACE_FN: {
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
+
+		ret = seq_print_ip_sym(s, field->ip, sym_flags);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
-						entry->fn.parent_ip) {
+						field->parent_ip) {
 			ret = trace_seq_printf(s, " <-");
 			if (!ret)
-				return 0;
-			if (kretprobed(entry->fn.parent_ip))
+				return TRACE_TYPE_PARTIAL_LINE;
+			if (kretprobed(field->parent_ip))
 				ret = trace_seq_puts(s, KRETPROBE_MSG);
 			else
-				ret = seq_print_ip_sym(s, entry->fn.parent_ip,
+				ret = seq_print_ip_sym(s,
+						       field->parent_ip,
 						       sym_flags);
 			if (!ret)
-				return 0;
+				return TRACE_TYPE_PARTIAL_LINE;
 		}
 		ret = trace_seq_printf(s, "\n");
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
-		ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
-				       entry->ctx.prev_pid,
-				       entry->ctx.prev_prio,
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
+		ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
+				       field->prev_pid,
+				       field->prev_prio,
 				       S,
 				       entry->type == TRACE_CTX ? "==>" : "  +",
-				       entry->ctx.next_pid,
-				       entry->ctx.next_prio,
+				       field->next_cpu,
+				       field->next_pid,
+				       field->next_prio,
 				       T);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
-	case TRACE_SPECIAL:
+	}
+	case TRACE_SPECIAL: {
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
+
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
-	case TRACE_STACK:
+	}
+	case TRACE_STACK: {
+		struct stack_entry *field;
+
+		trace_assign_type(field, entry);
+
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i) {
 				ret = trace_seq_puts(s, " <= ");
 				if (!ret)
-					return 0;
+					return TRACE_TYPE_PARTIAL_LINE;
 			}
-			ret = seq_print_ip_sym(s, entry->stack.caller[i],
+			ret = seq_print_ip_sym(s, field->caller[i],
 					       sym_flags);
 			if (!ret)
-				return 0;
+				return TRACE_TYPE_PARTIAL_LINE;
 		}
 		ret = trace_seq_puts(s, "\n");
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
-	return 1;
+	case TRACE_PRINT: {
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
+	}
+	}
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_raw_fmt(struct trace_iterator *iter)
+static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
@@ -1659,47 +1586,77 @@ static int print_raw_fmt(struct trace_iterator *iter)
 
 	entry = iter->ent;
 
+	if (entry->type == TRACE_CONT)
+		return TRACE_TYPE_HANDLED;
+
 	ret = trace_seq_printf(s, "%d %d %llu ",
-		entry->pid, iter->cpu, entry->t);
+		entry->pid, iter->cpu, iter->ts);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	switch (entry->type) {
-	case TRACE_FN:
+	case TRACE_FN: {
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
+
 		ret = trace_seq_printf(s, "%x %x\n",
-					entry->fn.ip, entry->fn.parent_ip);
+					field->ip,
+					field->parent_ip);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
-		ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
-				       entry->ctx.prev_pid,
-				       entry->ctx.prev_prio,
+		ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
+				       field->prev_pid,
+				       field->prev_prio,
 				       S,
-				       entry->ctx.next_pid,
-				       entry->ctx.next_prio,
+				       field->next_cpu,
+				       field->next_pid,
+				       field->next_prio,
 				       T);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
+	case TRACE_STACK: {
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
+
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
-	return 1;
+	case TRACE_PRINT: {
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
+
+		trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
+	}
+	}
+	return TRACE_TYPE_HANDLED;
 }
 
 #define SEQ_PUT_FIELD_RET(s, x)				\
@@ -1710,11 +1667,12 @@ do {							\
 
 #define SEQ_PUT_HEX_FIELD_RET(s, x)			\
 do {							\
+	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
 	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
 		return 0;				\
 } while (0)
 
-static int print_hex_fmt(struct trace_iterator *iter)
+static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned char newline = '\n';
@@ -1723,97 +1681,139 @@ static int print_hex_fmt(struct trace_iterator *iter)
 
 	entry = iter->ent;
 
+	if (entry->type == TRACE_CONT)
+		return TRACE_TYPE_HANDLED;
+
 	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
-	SEQ_PUT_HEX_FIELD_RET(s, entry->t);
+	SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
-	case TRACE_FN:
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+	case TRACE_FN: {
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
+
+		SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
+		SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, S);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, T);
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
+	case TRACE_STACK: {
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
+
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
 		break;
 	}
+	}
 	SEQ_PUT_FIELD_RET(s, newline);
 
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_bin_fmt(struct trace_iterator *iter)
+static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
 
 	entry = iter->ent;
 
+	if (entry->type == TRACE_CONT)
+		return TRACE_TYPE_HANDLED;
+
 	SEQ_PUT_FIELD_RET(s, entry->pid);
-	SEQ_PUT_FIELD_RET(s, entry->cpu);
-	SEQ_PUT_FIELD_RET(s, entry->t);
+	SEQ_PUT_FIELD_RET(s, iter->cpu);
+	SEQ_PUT_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
-	case TRACE_FN:
-		SEQ_PUT_FIELD_RET(s, entry->fn.ip);
-		SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
+	case TRACE_FN: {
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
+
+		SEQ_PUT_FIELD_RET(s, field->ip);
+		SEQ_PUT_FIELD_RET(s, field->parent_ip);
 		break;
-	case TRACE_CTX:
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
+	}
+	case TRACE_CTX: {
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
+
+		SEQ_PUT_FIELD_RET(s, field->prev_pid);
+		SEQ_PUT_FIELD_RET(s, field->prev_prio);
+		SEQ_PUT_FIELD_RET(s, field->prev_state);
+		SEQ_PUT_FIELD_RET(s, field->next_pid);
+		SEQ_PUT_FIELD_RET(s, field->next_prio);
+		SEQ_PUT_FIELD_RET(s, field->next_state);
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
-		SEQ_PUT_FIELD_RET(s, entry->special.arg1);
-		SEQ_PUT_FIELD_RET(s, entry->special.arg2);
-		SEQ_PUT_FIELD_RET(s, entry->special.arg3);
+	case TRACE_STACK: {
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
+
+		SEQ_PUT_FIELD_RET(s, field->arg1);
+		SEQ_PUT_FIELD_RET(s, field->arg2);
+		SEQ_PUT_FIELD_RET(s, field->arg3);
 		break;
 	}
+	}
 	return 1;
 }
 
 static int trace_empty(struct trace_iterator *iter)
 {
-	struct trace_array_cpu *data;
 	int cpu;
 
 	for_each_tracing_cpu(cpu) {
-		data = iter->tr->data[cpu];
-
-		if (head_page(data) && data->trace_idx &&
-		    (data->trace_tail != data->trace_head ||
-		     data->trace_tail_idx != data->trace_head_idx))
-			return 0;
+		if (iter->buffer_iter[cpu]) {
+			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+				return 0;
+		} else {
+			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+				return 0;
+		}
 	}
+
 	return 1;
 }
 
-static int print_trace_line(struct trace_iterator *iter)
+static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
-	if (iter->trace && iter->trace->print_line)
-		return iter->trace->print_line(iter);
+	enum print_line_t ret;
+
+	if (iter->trace && iter->trace->print_line) {
+		ret = iter->trace->print_line(iter);
+		if (ret != TRACE_TYPE_UNHANDLED)
+			return ret;
+	}
 
 	if (trace_flags & TRACE_ITER_BIN)
 		return print_bin_fmt(iter);
@@ -1869,6 +1869,8 @@ static struct trace_iterator *
 __tracing_open(struct inode *inode, struct file *file, int *ret)
 {
 	struct trace_iterator *iter;
+	struct seq_file *m;
+	int cpu;
 
 	if (tracing_disabled) {
 		*ret = -ENODEV;
@@ -1889,28 +1891,45 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
 	iter->trace = current_trace;
 	iter->pos = -1;
 
+	for_each_tracing_cpu(cpu) {
+
+		iter->buffer_iter[cpu] =
+			ring_buffer_read_start(iter->tr->buffer, cpu);
+
+		if (!iter->buffer_iter[cpu])
+			goto fail_buffer;
+	}
+
 	/* TODO stop tracer */
 	*ret = seq_open(file, &tracer_seq_ops);
-	if (!*ret) {
-		struct seq_file *m = file->private_data;
-		m->private = iter;
+	if (*ret)
+		goto fail_buffer;
 
-		/* stop the trace while dumping */
-		if (iter->tr->ctrl) {
-			tracer_enabled = 0;
-			ftrace_function_enabled = 0;
-		}
+	m = file->private_data;
+	m->private = iter;
 
-		if (iter->trace && iter->trace->open)
-			iter->trace->open(iter);
-	} else {
-		kfree(iter);
-		iter = NULL;
+	/* stop the trace while dumping */
+	if (iter->tr->ctrl) {
+		tracer_enabled = 0;
+		ftrace_function_enabled = 0;
 	}
+
+	if (iter->trace && iter->trace->open)
+			iter->trace->open(iter);
+
 	mutex_unlock(&trace_types_lock);
 
  out:
 	return iter;
+
+ fail_buffer:
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
+	mutex_unlock(&trace_types_lock);
+
+	return ERR_PTR(-ENOMEM);
 }
 
 int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -1926,8 +1945,14 @@ int tracing_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	struct trace_iterator *iter = m->private;
+	int cpu;
 
 	mutex_lock(&trace_types_lock);
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
+
 	if (iter->trace && iter->trace->close)
 		iter->trace->close(iter);
 
@@ -2352,9 +2377,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 	struct tracer *t;
 	char buf[max_tracer_type_len+1];
 	int i;
+	size_t ret;
 
 	if (cnt > max_tracer_type_len)
 		cnt = max_tracer_type_len;
+	ret = cnt;
 
 	if (copy_from_user(&buf, ubuf, cnt))
 		return -EFAULT;
@@ -2370,7 +2397,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 		if (strcmp(t->name, buf) == 0)
 			break;
 	}
-	if (!t || t == current_trace)
+	if (!t) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (t == current_trace)
 		goto out;
 
 	if (current_trace && current_trace->reset)
@@ -2383,9 +2414,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
  out:
 	mutex_unlock(&trace_types_lock);
 
-	filp->f_pos += cnt;
+	if (ret == cnt)
+		filp->f_pos += cnt;
 
-	return cnt;
+	return ret;
 }
 
 static ssize_t
@@ -2500,20 +2532,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
 	struct trace_iterator *iter = filp->private_data;
-	struct trace_array_cpu *data;
-	static cpumask_t mask;
-	unsigned long flags;
-#ifdef CONFIG_FTRACE
-	int ftrace_save;
-#endif
-	int cpu;
 	ssize_t sret;
 
 	/* return any leftover data */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
 	if (sret != -EBUSY)
 		return sret;
-	sret = 0;
 
 	trace_seq_reset(&iter->seq);
 
@@ -2524,6 +2548,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 			goto out;
 	}
 
+waitagain:
+	sret = 0;
 	while (trace_empty(iter)) {
 
 		if ((filp->f_flags & O_NONBLOCK)) {
@@ -2588,46 +2614,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	       offsetof(struct trace_iterator, seq));
 	iter->pos = -1;
 
-	/*
-	 * We need to stop all tracing on all CPUS to read the
-	 * the next buffer. This is a bit expensive, but is
-	 * not done often. We fill all what we can read,
-	 * and then release the locks again.
-	 */
-
-	cpus_clear(mask);
-	local_irq_save(flags);
-#ifdef CONFIG_FTRACE
-	ftrace_save = ftrace_enabled;
-	ftrace_enabled = 0;
-#endif
-	smp_wmb();
-	for_each_tracing_cpu(cpu) {
-		data = iter->tr->data[cpu];
-
-		if (!head_page(data) || !data->trace_idx)
-			continue;
-
-		atomic_inc(&data->disabled);
-		cpu_set(cpu, mask);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		__raw_spin_lock(&data->lock);
-
-		if (data->overrun > iter->last_overrun[cpu])
-			iter->overrun[cpu] +=
-				data->overrun - iter->last_overrun[cpu];
-		iter->last_overrun[cpu] = data->overrun;
-	}
-
 	while (find_next_entry_inc(iter) != NULL) {
-		int ret;
+		enum print_line_t ret;
 		int len = iter->seq.len;
 
 		ret = print_trace_line(iter);
-		if (!ret) {
+		if (ret == TRACE_TYPE_PARTIAL_LINE) {
 			/* don't print partial lines */
 			iter->seq.len = len;
 			break;
@@ -2639,26 +2631,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 			break;
 	}
 
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		__raw_spin_unlock(&data->lock);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		atomic_dec(&data->disabled);
-	}
-#ifdef CONFIG_FTRACE
-	ftrace_enabled = ftrace_save;
-#endif
-	local_irq_restore(flags);
-
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
 	if (iter->seq.readpos >= iter->seq.len)
 		trace_seq_reset(&iter->seq);
+
+	/*
+	 * If there was nothing to send to user, inspite of consuming trace
+	 * entries, go back to wait for more entries.
+	 */
 	if (sret == -EBUSY)
-		sret = 0;
+		goto waitagain;
 
 out:
 	mutex_unlock(&trace_types_lock);
@@ -2684,7 +2667,8 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 {
 	unsigned long val;
 	char buf[64];
-	int i, ret;
+	int ret;
+	struct trace_array *tr = filp->private_data;
 
 	if (cnt >= sizeof(buf))
 		return -EINVAL;
@@ -2704,59 +2688,38 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 
 	mutex_lock(&trace_types_lock);
 
-	if (current_trace != &no_tracer) {
+	if (tr->ctrl) {
 		cnt = -EBUSY;
-		pr_info("ftrace: set current_tracer to none"
+		pr_info("ftrace: please disable tracing"
 			" before modifying buffer size\n");
 		goto out;
 	}
 
-	if (val > global_trace.entries) {
-		long pages_requested;
-		unsigned long freeable_pages;
-
-		/* make sure we have enough memory before mapping */
-		pages_requested =
-			(val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
-
-		/* account for each buffer (and max_tr) */
-		pages_requested *= tracing_nr_buffers * 2;
-
-		/* Check for overflow */
-		if (pages_requested < 0) {
-			cnt = -ENOMEM;
-			goto out;
-		}
-
-		freeable_pages = determine_dirtyable_memory();
-
-		/* we only allow to request 1/4 of useable memory */
-		if (pages_requested >
-		    ((freeable_pages + tracing_pages_allocated) / 4)) {
-			cnt = -ENOMEM;
+	if (val != global_trace.entries) {
+		ret = ring_buffer_resize(global_trace.buffer, val);
+		if (ret < 0) {
+			cnt = ret;
 			goto out;
 		}
 
-		while (global_trace.entries < val) {
-			if (trace_alloc_page()) {
-				cnt = -ENOMEM;
-				goto out;
+		ret = ring_buffer_resize(max_tr.buffer, val);
+		if (ret < 0) {
+			int r;
+			cnt = ret;
+			r = ring_buffer_resize(global_trace.buffer,
+					       global_trace.entries);
+			if (r < 0) {
+				/* AARGH! We are left with different
+				 * size max buffer!!!! */
+				WARN_ON(1);
+				tracing_disabled = 1;
 			}
-			/* double check that we don't go over the known pages */
-			if (tracing_pages_allocated > pages_requested)
-				break;
+			goto out;
 		}
 
-	} else {
-		/* include the number of entries in val (inc of page entries) */
-		while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
-			trace_free_page();
+		global_trace.entries = val;
 	}
 
-	/* check integrity */
-	for_each_tracing_cpu(i)
-		check_pages(global_trace.data[i]);
-
 	filp->f_pos += cnt;
 
 	/* If check pages failed, return ENOMEM */
@@ -2769,6 +2732,52 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
+static int mark_printk(const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+	va_start(args, fmt);
+	ret = trace_vprintk(0, fmt, args);
+	va_end(args);
+	return ret;
+}
+
+static ssize_t
+tracing_mark_write(struct file *filp, const char __user *ubuf,
+					size_t cnt, loff_t *fpos)
+{
+	char *buf;
+	char *end;
+	struct trace_array *tr = &global_trace;
+
+	if (!tr->ctrl || tracing_disabled)
+		return -EINVAL;
+
+	if (cnt > TRACE_BUF_SIZE)
+		cnt = TRACE_BUF_SIZE;
+
+	buf = kmalloc(cnt + 1, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, ubuf, cnt)) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	/* Cut from the first nil or newline. */
+	buf[cnt] = '\0';
+	end = strchr(buf, '\n');
+	if (end)
+		*end = '\0';
+
+	cnt = mark_printk("%s\n", buf);
+	kfree(buf);
+	*fpos += cnt;
+
+	return cnt;
+}
+
 static struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -2800,6 +2809,11 @@ static struct file_operations tracing_entries_fops = {
 	.write		= tracing_entries_write,
 };
 
+static struct file_operations tracing_mark_fops = {
+	.open		= tracing_open_generic,
+	.write		= tracing_mark_write,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 static ssize_t
@@ -2846,7 +2860,7 @@ struct dentry *tracing_init_dentry(void)
 #include "trace_selftest.c"
 #endif
 
-static __init void tracer_init_debugfs(void)
+static __init int tracer_init_debugfs(void)
 {
 	struct dentry *d_tracer;
 	struct dentry *entry;
@@ -2881,12 +2895,12 @@ static __init void tracer_init_debugfs(void)
 	entry = debugfs_create_file("available_tracers", 0444, d_tracer,
 				    &global_trace, &show_traces_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+		pr_warning("Could not create debugfs 'available_tracers' entry\n");
 
 	entry = debugfs_create_file("current_tracer", 0444, d_tracer,
 				    &global_trace, &set_tracer_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+		pr_warning("Could not create debugfs 'current_tracer' entry\n");
 
 	entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
 				    &tracing_max_latency,
@@ -2899,7 +2913,7 @@ static __init void tracer_init_debugfs(void)
 				    &tracing_thresh, &tracing_max_lat_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'tracing_thresh' entry\n");
 	entry = debugfs_create_file("README", 0644, d_tracer,
 				    NULL, &tracing_readme_fops);
 	if (!entry)
@@ -2909,13 +2923,19 @@ static __init void tracer_init_debugfs(void)
 				    NULL, &tracing_pipe_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'trace_pipe' entry\n");
 
 	entry = debugfs_create_file("trace_entries", 0644, d_tracer,
 				    &global_trace, &tracing_entries_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'trace_entries' entry\n");
+
+	entry = debugfs_create_file("trace_marker", 0220, d_tracer,
+				    NULL, &tracing_mark_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'trace_marker' entry\n");
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
@@ -2928,230 +2948,263 @@ static __init void tracer_init_debugfs(void)
 #ifdef CONFIG_SYSPROF_TRACER
 	init_tracer_sysprof_debugfs(d_tracer);
 #endif
+	return 0;
 }
 
-static int trace_alloc_page(void)
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
+	static DEFINE_SPINLOCK(trace_buf_lock);
+	static char trace_buf[TRACE_BUF_SIZE];
+
+	struct ring_buffer_event *event;
+	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
-	struct page *page, *tmp;
-	LIST_HEAD(pages);
-	void *array;
-	unsigned pages_allocated = 0;
-	int i;
+	struct print_entry *entry;
+	unsigned long flags, irq_flags;
+	int cpu, len = 0, size, pc;
 
-	/* first allocate a page for each CPU */
-	for_each_tracing_cpu(i) {
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_pages;
-		}
+	if (!tr->ctrl || tracing_disabled)
+		return 0;
 
-		pages_allocated++;
-		page = virt_to_page(array);
-		list_add(&page->lru, &pages);
+	pc = preempt_count();
+	preempt_disable_notrace();
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
 
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_pages;
-		}
-		pages_allocated++;
-		page = virt_to_page(array);
-		list_add(&page->lru, &pages);
-#endif
-	}
+	if (unlikely(atomic_read(&data->disabled)))
+		goto out;
 
-	/* Now that we successfully allocate a page per CPU, add them */
-	for_each_tracing_cpu(i) {
-		data = global_trace.data[i];
-		page = list_entry(pages.next, struct page, lru);
-		list_del_init(&page->lru);
-		list_add_tail(&page->lru, &data->trace_pages);
-		ClearPageLRU(page);
+	spin_lock_irqsave(&trace_buf_lock, flags);
+	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		page = list_entry(pages.next, struct page, lru);
-		list_del_init(&page->lru);
-		list_add_tail(&page->lru, &data->trace_pages);
-		SetPageLRU(page);
-#endif
-	}
-	tracing_pages_allocated += pages_allocated;
-	global_trace.entries += ENTRIES_PER_PAGE;
+	len = min(len, TRACE_BUF_SIZE-1);
+	trace_buf[len] = 0;
 
-	return 0;
+	size = sizeof(*entry) + len + 1;
+	event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
+	if (!event)
+		goto out_unlock;
+	entry = ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type			= TRACE_PRINT;
+	entry->ip			= ip;
 
- free_pages:
-	list_for_each_entry_safe(page, tmp, &pages, lru) {
-		list_del_init(&page->lru);
-		__free_page(page);
-	}
-	return -ENOMEM;
+	memcpy(&entry->buf, trace_buf, len);
+	entry->buf[len] = 0;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ out_unlock:
+	spin_unlock_irqrestore(&trace_buf_lock, flags);
+
+ out:
+	preempt_enable_notrace();
+
+	return len;
 }
+EXPORT_SYMBOL_GPL(trace_vprintk);
 
-static int trace_free_page(void)
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 {
-	struct trace_array_cpu *data;
-	struct page *page;
-	struct list_head *p;
-	int i;
-	int ret = 0;
+	int ret;
+	va_list ap;
 
-	/* free one page from each buffer */
-	for_each_tracing_cpu(i) {
-		data = global_trace.data[i];
-		p = data->trace_pages.next;
-		if (p == &data->trace_pages) {
-			/* should never happen */
-			WARN_ON(1);
-			tracing_disabled = 1;
-			ret = -1;
-			break;
-		}
-		page = list_entry(p, struct page, lru);
-		ClearPageLRU(page);
-		list_del(&page->lru);
-		tracing_pages_allocated--;
-		tracing_pages_allocated--;
-		__free_page(page);
+	if (!(trace_flags & TRACE_ITER_PRINTK))
+		return 0;
 
-		tracing_reset(data);
+	va_start(ap, fmt);
+	ret = trace_vprintk(ip, fmt, ap);
+	va_end(ap);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__ftrace_printk);
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		p = data->trace_pages.next;
-		if (p == &data->trace_pages) {
-			/* should never happen */
-			WARN_ON(1);
-			tracing_disabled = 1;
-			ret = -1;
-			break;
-		}
-		page = list_entry(p, struct page, lru);
-		ClearPageLRU(page);
-		list_del(&page->lru);
-		__free_page(page);
+static int trace_panic_handler(struct notifier_block *this,
+			       unsigned long event, void *unused)
+{
+	ftrace_dump();
+	return NOTIFY_OK;
+}
 
-		tracing_reset(data);
-#endif
-	}
-	global_trace.entries -= ENTRIES_PER_PAGE;
+static struct notifier_block trace_panic_notifier = {
+	.notifier_call  = trace_panic_handler,
+	.next           = NULL,
+	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
+};
 
-	return ret;
+static int trace_die_handler(struct notifier_block *self,
+			     unsigned long val,
+			     void *data)
+{
+	switch (val) {
+	case DIE_OOPS:
+		ftrace_dump();
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
 }
 
-__init static int tracer_alloc_buffers(void)
+static struct notifier_block trace_die_notifier = {
+	.notifier_call = trace_die_handler,
+	.priority = 200
+};
+
+/*
+ * printk is set to max of 1024, we really don't need it that big.
+ * Nothing should be printing 1000 characters anyway.
+ */
+#define TRACE_MAX_PRINT		1000
+
+/*
+ * Define here KERN_TRACE so that we have one place to modify
+ * it if we decide to change what log level the ftrace dump
+ * should be at.
+ */
+#define KERN_TRACE		KERN_INFO
+
+static void
+trace_printk_seq(struct trace_seq *s)
 {
-	struct trace_array_cpu *data;
-	void *array;
-	struct page *page;
-	int pages = 0;
-	int ret = -ENOMEM;
-	int i;
+	/* Probably should print a warning here. */
+	if (s->len >= 1000)
+		s->len = 1000;
 
-	/* TODO: make the number of buffers hot pluggable with CPUS */
-	tracing_nr_buffers = num_possible_cpus();
-	tracing_buffer_mask = cpu_possible_map;
+	/* should be zero ended, but we are paranoid. */
+	s->buffer[s->len] = 0;
 
-	/* Allocate the first page for all buffers */
-	for_each_tracing_cpu(i) {
-		data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
-		max_tr.data[i] = &per_cpu(max_data, i);
+	printk(KERN_TRACE "%s", s->buffer);
 
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_buffers;
-		}
+	trace_seq_reset(s);
+}
+
+
+void ftrace_dump(void)
+{
+	static DEFINE_SPINLOCK(ftrace_dump_lock);
+	/* use static because iter can be a bit big for the stack */
+	static struct trace_iterator iter;
+	static cpumask_t mask;
+	static int dump_ran;
+	unsigned long flags;
+	int cnt = 0, cpu;
 
-		/* set the array to the list */
-		INIT_LIST_HEAD(&data->trace_pages);
-		page = virt_to_page(array);
-		list_add(&page->lru, &data->trace_pages);
-		/* use the LRU flag to differentiate the two buffers */
-		ClearPageLRU(page);
+	/* only one dump */
+	spin_lock_irqsave(&ftrace_dump_lock, flags);
+	if (dump_ran)
+		goto out;
 
-		data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-		max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	dump_ran = 1;
 
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_buffers;
-		}
+	/* No turning back! */
+	ftrace_kill_atomic();
 
-		INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
-		page = virt_to_page(array);
-		list_add(&page->lru, &max_tr.data[i]->trace_pages);
-		SetPageLRU(page);
-#endif
+	for_each_tracing_cpu(cpu) {
+		atomic_inc(&global_trace.data[cpu]->disabled);
 	}
 
+	printk(KERN_TRACE "Dumping ftrace buffer:\n");
+
+	iter.tr = &global_trace;
+	iter.trace = current_trace;
+
 	/*
-	 * Since we allocate by orders of pages, we may be able to
-	 * round up a bit.
+	 * We need to stop all tracing on all CPUS to read the
+	 * the next buffer. This is a bit expensive, but is
+	 * not done often. We fill all what we can read,
+	 * and then release the locks again.
 	 */
-	global_trace.entries = ENTRIES_PER_PAGE;
-	pages++;
 
-	while (global_trace.entries < trace_nr_entries) {
-		if (trace_alloc_page())
-			break;
-		pages++;
+	cpus_clear(mask);
+
+	while (!trace_empty(&iter)) {
+
+		if (!cnt)
+			printk(KERN_TRACE "---------------------------------\n");
+
+		cnt++;
+
+		/* reset all but tr, trace, and overruns */
+		memset(&iter.seq, 0,
+		       sizeof(struct trace_iterator) -
+		       offsetof(struct trace_iterator, seq));
+		iter.iter_flags |= TRACE_FILE_LAT_FMT;
+		iter.pos = -1;
+
+		if (find_next_entry_inc(&iter) != NULL) {
+			print_trace_line(&iter);
+			trace_consume(&iter);
+		}
+
+		trace_printk_seq(&iter.seq);
 	}
-	max_tr.entries = global_trace.entries;
 
-	pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
-		pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
-	pr_info("   actual entries %ld\n", global_trace.entries);
+	if (!cnt)
+		printk(KERN_TRACE "   (ftrace buffer empty)\n");
+	else
+		printk(KERN_TRACE "---------------------------------\n");
+
+ out:
+	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+}
+
+__init static int tracer_alloc_buffers(void)
+{
+	struct trace_array_cpu *data;
+	int i;
+
+	/* TODO: make the number of buffers hot pluggable with CPUS */
+	tracing_buffer_mask = cpu_possible_map;
+
+	global_trace.buffer = ring_buffer_alloc(trace_buf_size,
+						   TRACE_BUFFER_FLAGS);
+	if (!global_trace.buffer) {
+		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
+		WARN_ON(1);
+		return 0;
+	}
+	global_trace.entries = ring_buffer_size(global_trace.buffer);
 
-	tracer_init_debugfs();
+#ifdef CONFIG_TRACER_MAX_TRACE
+	max_tr.buffer = ring_buffer_alloc(trace_buf_size,
+					     TRACE_BUFFER_FLAGS);
+	if (!max_tr.buffer) {
+		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
+		WARN_ON(1);
+		ring_buffer_free(global_trace.buffer);
+		return 0;
+	}
+	max_tr.entries = ring_buffer_size(max_tr.buffer);
+	WARN_ON(max_tr.entries != global_trace.entries);
+#endif
+
+	/* Allocate the first page for all buffers */
+	for_each_tracing_cpu(i) {
+		data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
+		max_tr.data[i] = &per_cpu(max_data, i);
+	}
 
 	trace_init_cmdlines();
 
-	register_tracer(&no_tracer);
-	current_trace = &no_tracer;
+	register_tracer(&nop_trace);
+#ifdef CONFIG_BOOT_TRACER
+	register_tracer(&boot_tracer);
+	current_trace = &boot_tracer;
+	current_trace->init(&global_trace);
+#else
+	current_trace = &nop_trace;
+#endif
 
 	/* All seems OK, enable tracing */
 	global_trace.ctrl = tracer_enabled;
 	tracing_disabled = 0;
 
-	return 0;
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &trace_panic_notifier);
 
- free_buffers:
-	for (i-- ; i >= 0; i--) {
-		struct page *page, *tmp;
-		struct trace_array_cpu *data = global_trace.data[i];
+	register_die_notifier(&trace_die_notifier);
 
-		if (data) {
-			list_for_each_entry_safe(page, tmp,
-						 &data->trace_pages, lru) {
-				list_del_init(&page->lru);
-				__free_page(page);
-			}
-		}
-
-#ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		if (data) {
-			list_for_each_entry_safe(page, tmp,
-						 &data->trace_pages, lru) {
-				list_del_init(&page->lru);
-				__free_page(page);
-			}
-		}
-#endif
-	}
-	return ret;
+	return 0;
 }
-fs_initcall(tracer_alloc_buffers);
+early_initcall(tracer_alloc_buffers);
+fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f86788c2..f1f99572cde 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,7 +5,9 @@
 #include <asm/atomic.h>
 #include <linux/sched.h>
 #include <linux/clocksource.h>
+#include <linux/ring_buffer.h>
 #include <linux/mmiotrace.h>
+#include <linux/ftrace.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -13,38 +15,60 @@ enum trace_type {
 	TRACE_FN,
 	TRACE_CTX,
 	TRACE_WAKE,
+	TRACE_CONT,
 	TRACE_STACK,
+	TRACE_PRINT,
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
+	TRACE_BOOT,
 
 	__TRACE_LAST_TYPE
 };
 
 /*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+	unsigned char		type;
+	unsigned char		cpu;
+	unsigned char		flags;
+	unsigned char		preempt_count;
+	int			pid;
+};
+
+/*
  * Function trace entry - function address and parent function addres:
  */
 struct ftrace_entry {
+	struct trace_entry	ent;
 	unsigned long		ip;
 	unsigned long		parent_ip;
 };
+extern struct tracer boot_tracer;
 
 /*
  * Context switch trace entry - which task (and prio) we switched from/to:
  */
 struct ctx_switch_entry {
+	struct trace_entry	ent;
 	unsigned int		prev_pid;
 	unsigned char		prev_prio;
 	unsigned char		prev_state;
 	unsigned int		next_pid;
 	unsigned char		next_prio;
 	unsigned char		next_state;
+	unsigned int		next_cpu;
 };
 
 /*
  * Special (free-form) trace entry:
  */
 struct special_entry {
+	struct trace_entry	ent;
 	unsigned long		arg1;
 	unsigned long		arg2;
 	unsigned long		arg3;
@@ -57,33 +81,60 @@ struct special_entry {
 #define FTRACE_STACK_ENTRIES	8
 
 struct stack_entry {
+	struct trace_entry	ent;
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
 /*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ * ftrace_printk entry:
  */
-struct trace_entry {
-	char			type;
-	char			cpu;
-	char			flags;
-	char			preempt_count;
-	int			pid;
-	cycle_t			t;
-	union {
-		struct ftrace_entry		fn;
-		struct ctx_switch_entry		ctx;
-		struct special_entry		special;
-		struct stack_entry		stack;
-		struct mmiotrace_rw		mmiorw;
-		struct mmiotrace_map		mmiomap;
-	};
+struct print_entry {
+	struct trace_entry	ent;
+	unsigned long		ip;
+	char			buf[];
+};
+
+#define TRACE_OLD_SIZE		88
+
+struct trace_field_cont {
+	unsigned char		type;
+	/* Temporary till we get rid of this completely */
+	char			buf[TRACE_OLD_SIZE - 1];
+};
+
+struct trace_mmiotrace_rw {
+	struct trace_entry	ent;
+	struct mmiotrace_rw	rw;
 };
 
-#define TRACE_ENTRY_SIZE	sizeof(struct trace_entry)
+struct trace_mmiotrace_map {
+	struct trace_entry	ent;
+	struct mmiotrace_map	map;
+};
+
+struct trace_boot {
+	struct trace_entry	ent;
+	struct boot_trace	initcall;
+};
+
+/*
+ * trace_flag_type is an enumeration that holds different
+ * states when a trace occurs. These are:
+ *  IRQS_OFF	- interrupts were disabled
+ *  NEED_RESCED - reschedule is requested
+ *  HARDIRQ	- inside an interrupt handler
+ *  SOFTIRQ	- inside a softirq handler
+ *  CONT	- multiple entries hold the trace item
+ */
+enum trace_flag_type {
+	TRACE_FLAG_IRQS_OFF		= 0x01,
+	TRACE_FLAG_NEED_RESCHED		= 0x02,
+	TRACE_FLAG_HARDIRQ		= 0x04,
+	TRACE_FLAG_SOFTIRQ		= 0x08,
+	TRACE_FLAG_CONT			= 0x10,
+};
+
+#define TRACE_BUF_SIZE		1024
 
 /*
  * The CPU trace array - it consists of thousands of trace entries
@@ -91,16 +142,9 @@ struct trace_entry {
  * the trace, etc.)
  */
 struct trace_array_cpu {
-	struct list_head	trace_pages;
 	atomic_t		disabled;
-	raw_spinlock_t		lock;
-	struct lock_class_key	lock_key;
 
 	/* these fields get copied into max-trace: */
-	unsigned		trace_head_idx;
-	unsigned		trace_tail_idx;
-	void			*trace_head; /* producer */
-	void			*trace_tail; /* consumer */
 	unsigned long		trace_idx;
 	unsigned long		overrun;
 	unsigned long		saved_latency;
@@ -124,6 +168,7 @@ struct trace_iterator;
  * They have on/off state as well:
  */
 struct trace_array {
+	struct ring_buffer	*buffer;
 	unsigned long		entries;
 	long			ctrl;
 	int			cpu;
@@ -132,6 +177,56 @@ struct trace_array {
 	struct trace_array_cpu	*data[NR_CPUS];
 };
 
+#define FTRACE_CMP_TYPE(var, type) \
+	__builtin_types_compatible_p(typeof(var), type *)
+
+#undef IF_ASSIGN
+#define IF_ASSIGN(var, entry, etype, id)		\
+	if (FTRACE_CMP_TYPE(var, etype)) {		\
+		var = (typeof(var))(entry);		\
+		WARN_ON(id && (entry)->type != id);	\
+		break;					\
+	}
+
+/* Will cause compile errors if type is not found. */
+extern void __ftrace_bad_type(void);
+
+/*
+ * The trace_assign_type is a verifier that the entry type is
+ * the same as the type being assigned. To add new types simply
+ * add a line with the following format:
+ *
+ * IF_ASSIGN(var, ent, type, id);
+ *
+ *  Where "type" is the trace type that includes the trace_entry
+ *  as the "ent" item. And "id" is the trace identifier that is
+ *  used in the trace_type enum.
+ *
+ *  If the type can have more than one id, then use zero.
+ */
+#define trace_assign_type(var, ent)					\
+	do {								\
+		IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);	\
+		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
+		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
+		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
+		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
+		IF_ASSIGN(var, ent, struct special_entry, 0);		\
+		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
+			  TRACE_MMIO_RW);				\
+		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
+			  TRACE_MMIO_MAP);				\
+		IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT);	\
+		__ftrace_bad_type();					\
+	} while (0)
+
+/* Return values for print_line callback */
+enum print_line_t {
+	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+	TRACE_TYPE_HANDLED	= 1,
+	TRACE_TYPE_UNHANDLED	= 2	/* Relay to other output functions */
+};
+
 /*
  * A specific tracer, represented by methods that operate on a trace array:
  */
@@ -152,7 +247,7 @@ struct tracer {
 	int			(*selftest)(struct tracer *trace,
 					    struct trace_array *tr);
 #endif
-	int			(*print_line)(struct trace_iterator *iter);
+	enum print_line_t	(*print_line)(struct trace_iterator *iter);
 	struct tracer		*next;
 	int			print_max;
 };
@@ -171,57 +266,58 @@ struct trace_iterator {
 	struct trace_array	*tr;
 	struct tracer		*trace;
 	void			*private;
-	long			last_overrun[NR_CPUS];
-	long			overrun[NR_CPUS];
+	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
 
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
 	int			cpu;
-
-	struct trace_entry	*prev_ent;
-	int			prev_cpu;
+	u64			ts;
 
 	unsigned long		iter_flags;
 	loff_t			pos;
-	unsigned long		next_idx[NR_CPUS];
-	struct list_head	*next_page[NR_CPUS];
-	unsigned		next_page_idx[NR_CPUS];
 	long			idx;
 };
 
-void tracing_reset(struct trace_array_cpu *data);
+void trace_wake_up(void);
+void tracing_reset(struct trace_array *tr, int cpu);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
+struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
+						struct trace_array_cpu *data);
+void tracing_generic_entry_update(struct trace_entry *entry,
+				  unsigned long flags,
+				  int pc);
+
 void ftrace(struct trace_array *tr,
 			    struct trace_array_cpu *data,
 			    unsigned long ip,
 			    unsigned long parent_ip,
-			    unsigned long flags);
+			    unsigned long flags, int pc);
 void tracing_sched_switch_trace(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct task_struct *prev,
 				struct task_struct *next,
-				unsigned long flags);
+				unsigned long flags, int pc);
 void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct task_struct *wakee,
 				struct task_struct *cur,
-				unsigned long flags);
+				unsigned long flags, int pc);
 void trace_special(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   unsigned long arg1,
 		   unsigned long arg2,
-		   unsigned long arg3);
+		   unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
 		    struct trace_array_cpu *data,
 		    unsigned long ip,
 		    unsigned long parent_ip,
-		    unsigned long flags);
+		    unsigned long flags, int pc);
 
 void tracing_start_cmdline_record(void);
 void tracing_stop_cmdline_record(void);
@@ -268,51 +364,33 @@ extern unsigned long ftrace_update_tot_cnt;
 extern int DYN_FTRACE_TEST_NAME(void);
 #endif
 
-#ifdef CONFIG_MMIOTRACE
-extern void __trace_mmiotrace_rw(struct trace_array *tr,
-				struct trace_array_cpu *data,
-				struct mmiotrace_rw *rw);
-extern void __trace_mmiotrace_map(struct trace_array *tr,
-				struct trace_array_cpu *data,
-				struct mmiotrace_map *map);
-#endif
-
 #ifdef CONFIG_FTRACE_STARTUP_TEST
-#ifdef CONFIG_FTRACE
 extern int trace_selftest_startup_function(struct tracer *trace,
 					   struct trace_array *tr);
-#endif
-#ifdef CONFIG_IRQSOFF_TRACER
 extern int trace_selftest_startup_irqsoff(struct tracer *trace,
 					  struct trace_array *tr);
-#endif
-#ifdef CONFIG_PREEMPT_TRACER
 extern int trace_selftest_startup_preemptoff(struct tracer *trace,
 					     struct trace_array *tr);
-#endif
-#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
 extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
 						 struct trace_array *tr);
-#endif
-#ifdef CONFIG_SCHED_TRACER
 extern int trace_selftest_startup_wakeup(struct tracer *trace,
 					 struct trace_array *tr);
-#endif
-#ifdef CONFIG_CONTEXT_SWITCH_TRACER
+extern int trace_selftest_startup_nop(struct tracer *trace,
+					 struct trace_array *tr);
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 					       struct trace_array *tr);
-#endif
-#ifdef CONFIG_SYSPROF_TRACER
 extern int trace_selftest_startup_sysprof(struct tracer *trace,
 					       struct trace_array *tr);
-#endif
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+extern void trace_seq_print_cont(struct trace_seq *s,
+				 struct trace_iterator *iter);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt);
 extern long ns2usecs(cycle_t nsec);
+extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
 
 extern unsigned long trace_flags;
 
@@ -334,6 +412,9 @@ enum trace_iterator_flags {
 	TRACE_ITER_BLOCK		= 0x80,
 	TRACE_ITER_STACKTRACE		= 0x100,
 	TRACE_ITER_SCHED_TREE		= 0x200,
+	TRACE_ITER_PRINTK		= 0x400,
 };
 
+extern struct tracer nop_trace;
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 00000000000..d0a5e50eeff
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,126 @@
+/*
+ * ring buffer based initcalls tracer
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+
+#include "trace.h"
+
+static struct trace_array *boot_trace;
+static int trace_boot_enabled;
+
+
+/* Should be started after do_pre_smp_initcalls() in init/main.c */
+void start_boot_trace(void)
+{
+	trace_boot_enabled = 1;
+}
+
+void stop_boot_trace(void)
+{
+	trace_boot_enabled = 0;
+}
+
+void reset_boot_trace(struct trace_array *tr)
+{
+	stop_boot_trace();
+}
+
+static void boot_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	boot_trace = tr;
+
+	trace_boot_enabled = 0;
+
+	for_each_cpu_mask(cpu, cpu_possible_map)
+		tracing_reset(tr, cpu);
+}
+
+static void boot_trace_ctrl_update(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		start_boot_trace();
+	else
+		stop_boot_trace();
+}
+
+static enum print_line_t initcall_print_line(struct trace_iterator *iter)
+{
+	int ret;
+	struct trace_entry *entry = iter->ent;
+	struct trace_boot *field = (struct trace_boot *)entry;
+	struct boot_trace *it = &field->initcall;
+	struct trace_seq *s = &iter->seq;
+	struct timespec calltime = ktime_to_timespec(it->calltime);
+	struct timespec rettime = ktime_to_timespec(it->rettime);
+
+	if (entry->type == TRACE_BOOT) {
+		ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
+					  calltime.tv_sec,
+					  calltime.tv_nsec,
+					  it->func, it->caller);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+
+		ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
+					  "returned %d after %lld msecs\n",
+					  rettime.tv_sec,
+					  rettime.tv_nsec,
+					  it->func, it->result, it->duration);
+
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		return TRACE_TYPE_HANDLED;
+	}
+	return TRACE_TYPE_UNHANDLED;
+}
+
+struct tracer boot_tracer __read_mostly =
+{
+	.name		= "initcall",
+	.init		= boot_trace_init,
+	.reset		= reset_boot_trace,
+	.ctrl_update	= boot_trace_ctrl_update,
+	.print_line	= initcall_print_line,
+};
+
+void trace_boot(struct boot_trace *it, initcall_t fn)
+{
+	struct ring_buffer_event *event;
+	struct trace_boot *entry;
+	struct trace_array_cpu *data;
+	unsigned long irq_flags;
+	struct trace_array *tr = boot_trace;
+
+	if (!trace_boot_enabled)
+		return;
+
+	/* Get its name now since this function could
+	 * disappear because it is in the .init section.
+	 */
+	sprint_symbol(it->func, (unsigned long)fn);
+	preempt_disable();
+	data = tr->data[smp_processor_id()];
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
+	entry->ent.type = TRACE_BOOT;
+	entry->initcall = *it;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+
+ out:
+	preempt_enable();
+}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 31214489797..e90eb0c2c56 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void start_function_trace(struct trace_array *tr)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ece6cfb649f..a7db7f040ae 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 
 	atomic_dec(&data->disabled);
 }
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
 	unsigned long latency, t0, t1;
 	cycle_t T0, T1, delta;
 	unsigned long flags;
+	int pc;
 
 	/*
 	 * usecs conversion is slow so we try to delay the conversion
@@ -141,6 +142,8 @@ check_critical_timing(struct trace_array *tr,
 
 	local_save_flags(flags);
 
+	pc = preempt_count();
+
 	if (!report_latency(delta))
 		goto out;
 
@@ -150,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
 	if (!report_latency(delta))
 		goto out_unlock;
 
-	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
 
 	latency = nsecs_to_usecs(delta);
 
@@ -173,8 +176,8 @@ out_unlock:
 out:
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
-	tracing_reset(data);
-	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+	tracing_reset(tr, cpu);
+	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
 }
 
 static inline void
@@ -203,11 +206,11 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
 	data->critical_start = parent_ip ? : ip;
-	tracing_reset(data);
+	tracing_reset(tr, cpu);
 
 	local_save_flags(flags);
 
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 
 	per_cpu(tracing_cpu, cpu) = 1;
 
@@ -234,14 +237,14 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 
 	data = tr->data[cpu];
 
-	if (unlikely(!data) || unlikely(!head_page(data)) ||
+	if (unlikely(!data) ||
 	    !data->critical_start || atomic_read(&data->disabled))
 		return;
 
 	atomic_inc(&data->disabled);
 
 	local_save_flags(flags);
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 	check_critical_timing(tr, data, parent_ip ? : ip, cpu);
 	data->critical_start = 0;
 	atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19dcbb..f28484618ff 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void mmio_trace_init(struct trace_array *tr)
@@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter)
 {
 	int cpu;
 	unsigned long cnt = 0;
+/* FIXME: */
+#if 0
 	for_each_online_cpu(cpu) {
 		cnt += iter->overrun[cpu];
 		iter->overrun[cpu] = 0;
 	}
+#endif
+	(void)cpu;
 	return cnt;
 }
 
@@ -171,17 +175,21 @@ print_out:
 	return (ret == -EBUSY) ? 0 : ret;
 }
 
-static int mmio_print_rw(struct trace_iterator *iter)
+static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_rw *rw	= &entry->mmiorw;
+	struct trace_mmiotrace_rw *field;
+	struct mmiotrace_rw *rw;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->t);
+	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->mmiorw.opcode) {
+	trace_assign_type(field, entry);
+	rw = &field->rw;
+
+	switch (rw->opcode) {
 	case MMIO_READ:
 		ret = trace_seq_printf(s,
 			"R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -209,21 +217,25 @@ static int mmio_print_rw(struct trace_iterator *iter)
 		break;
 	}
 	if (ret)
-		return 1;
-	return 0;
+		return TRACE_TYPE_HANDLED;
+	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static int mmio_print_map(struct trace_iterator *iter)
+static enum print_line_t mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_map *m	= &entry->mmiomap;
+	struct trace_mmiotrace_map *field;
+	struct mmiotrace_map *m;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->t);
+	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
-	int ret = 1;
+	int ret;
 
-	switch (entry->mmiorw.opcode) {
+	trace_assign_type(field, entry);
+	m = &field->map;
+
+	switch (m->opcode) {
 	case MMIO_PROBE:
 		ret = trace_seq_printf(s,
 			"MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -241,20 +253,43 @@ static int mmio_print_map(struct trace_iterator *iter)
 		break;
 	}
 	if (ret)
-		return 1;
-	return 0;
+		return TRACE_TYPE_HANDLED;
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
+{
+	struct trace_entry *entry = iter->ent;
+	struct print_entry *print = (struct print_entry *)entry;
+	const char *msg		= print->buf;
+	struct trace_seq *s	= &iter->seq;
+	unsigned long long t	= ns2usecs(iter->ts);
+	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned secs		= (unsigned long)t;
+	int ret;
+
+	/* The trailing newline must be in the message. */
+	ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (entry->flags & TRACE_FLAG_CONT)
+		trace_seq_print_cont(s, iter);
+
+	return TRACE_TYPE_HANDLED;
 }
 
-/* return 0 to abort printing without consuming current entry in pipe mode */
-static int mmio_print_line(struct trace_iterator *iter)
+static enum print_line_t mmio_print_line(struct trace_iterator *iter)
 {
 	switch (iter->ent->type) {
 	case TRACE_MMIO_RW:
 		return mmio_print_rw(iter);
 	case TRACE_MMIO_MAP:
 		return mmio_print_map(iter);
+	case TRACE_PRINT:
+		return mmio_print_mark(iter);
 	default:
-		return 1; /* ignore unknown entries */
+		return TRACE_TYPE_HANDLED; /* ignore unknown entries */
 	}
 }
 
@@ -276,6 +311,27 @@ __init static int init_mmio_trace(void)
 }
 device_initcall(init_mmio_trace);
 
+static void __trace_mmiotrace_rw(struct trace_array *tr,
+				struct trace_array_cpu *data,
+				struct mmiotrace_rw *rw)
+{
+	struct ring_buffer_event *event;
+	struct trace_mmiotrace_rw *entry;
+	unsigned long irq_flags;
+
+	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+	entry->ent.type			= TRACE_MMIO_RW;
+	entry->rw			= *rw;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+}
+
 void mmio_trace_rw(struct mmiotrace_rw *rw)
 {
 	struct trace_array *tr = mmio_trace_array;
@@ -283,6 +339,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
 	__trace_mmiotrace_rw(tr, data, rw);
 }
 
+static void __trace_mmiotrace_map(struct trace_array *tr,
+				struct trace_array_cpu *data,
+				struct mmiotrace_map *map)
+{
+	struct ring_buffer_event *event;
+	struct trace_mmiotrace_map *entry;
+	unsigned long irq_flags;
+
+	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+	entry->ent.type			= TRACE_MMIO_MAP;
+	entry->map			= *map;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+}
+
 void mmio_trace_mapping(struct mmiotrace_map *map)
 {
 	struct trace_array *tr = mmio_trace_array;
@@ -293,3 +370,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
 	__trace_mmiotrace_map(tr, data, map);
 	preempt_enable();
 }
+
+int mmio_trace_printk(const char *fmt, va_list args)
+{
+	return trace_vprintk(0, fmt, args);
+}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 00000000000..4592b486251
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,64 @@
+/*
+ * nop tracer
+ *
+ * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array	*ctx_trace;
+
+static void start_nop_trace(struct trace_array *tr)
+{
+	/* Nothing to do! */
+}
+
+static void stop_nop_trace(struct trace_array *tr)
+{
+	/* Nothing to do! */
+}
+
+static void nop_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	ctx_trace = tr;
+
+	for_each_online_cpu(cpu)
+		tracing_reset(tr, cpu);
+
+	if (tr->ctrl)
+		start_nop_trace(tr);
+}
+
+static void nop_trace_reset(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		stop_nop_trace(tr);
+}
+
+static void nop_trace_ctrl_update(struct trace_array *tr)
+{
+	/* When starting a new trace, reset the buffers */
+	if (tr->ctrl)
+		start_nop_trace(tr);
+	else
+		stop_nop_trace(tr);
+}
+
+struct tracer nop_trace __read_mostly =
+{
+	.name		= "nop",
+	.init		= nop_trace_init,
+	.reset		= nop_trace_reset,
+	.ctrl_update	= nop_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+	.selftest	= trace_selftest_startup_nop,
+#endif
+};
+
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa..b8f56beb1a6 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,8 +9,8 @@
 #include <linux/debugfs.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
-#include <linux/marker.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -19,15 +19,16 @@ static int __read_mostly	tracer_enabled;
 static atomic_t			sched_ref;
 
 static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 			struct task_struct *next)
 {
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	unsigned long flags;
-	long disabled;
 	int cpu;
+	int pc;
+
+	if (!atomic_read(&sched_ref))
+		return;
 
 	tracing_record_cmdline(prev);
 	tracing_record_cmdline(next);
@@ -35,97 +36,41 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
 	if (!tracer_enabled)
 		return;
 
+	pc = preempt_count();
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
+	data = ctx_trace->data[cpu];
 
-	if (likely(disabled == 1))
-		tracing_sched_switch_trace(tr, data, prev, next, flags);
+	if (likely(!atomic_read(&data->disabled)))
+		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
 
-	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-		      const char *format, va_list *args)
-{
-	struct task_struct *prev;
-	struct task_struct *next;
-	struct rq *__rq;
-
-	if (!atomic_read(&sched_ref))
-		return;
-
-	/* skip prev_pid %d next_pid %d prev_state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	__rq = va_arg(*args, typeof(__rq));
-	prev = va_arg(*args, typeof(prev));
-	next = va_arg(*args, typeof(next));
-
-	/*
-	 * If tracer_switch_func only points to the local
-	 * switch func, it still needs the ptr passed to it.
-	 */
-	sched_switch_func(probe_data, __rq, prev, next);
-}
-
 static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
-			task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 {
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	unsigned long flags;
-	long disabled;
-	int cpu;
+	int cpu, pc;
 
-	if (!tracer_enabled)
+	if (!likely(tracer_enabled))
 		return;
 
-	tracing_record_cmdline(curr);
+	pc = preempt_count();
+	tracing_record_cmdline(current);
 
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
+	data = ctx_trace->data[cpu];
 
-	if (likely(disabled == 1))
-		tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+	if (likely(!atomic_read(&data->disabled)))
+		tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+					   flags, pc);
 
-	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-		 const char *format, va_list *args)
-{
-	struct task_struct *curr;
-	struct task_struct *task;
-	struct rq *__rq;
-
-	if (likely(!tracer_enabled))
-		return;
-
-	/* Skip pid %d state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	/* now get the meat: "rq %p task %p rq->curr %p" */
-	__rq = va_arg(*args, typeof(__rq));
-	task = va_arg(*args, typeof(task));
-	curr = va_arg(*args, typeof(curr));
-
-	tracing_record_cmdline(task);
-	tracing_record_cmdline(curr);
-
-	wakeup_func(probe_data, __rq, task, curr);
-}
-
 static void sched_switch_reset(struct trace_array *tr)
 {
 	int cpu;
@@ -133,67 +78,47 @@ static void sched_switch_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static int tracing_sched_register(void)
 {
 	int ret;
 
-	ret = marker_probe_register("kernel_sched_wakeup",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&ctx_trace);
+	ret = register_trace_sched_wakeup(probe_sched_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup\n");
 		return ret;
 	}
 
-	ret = marker_probe_register("kernel_sched_wakeup_new",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&ctx_trace);
+	ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup_new\n");
 		goto fail_deprobe;
 	}
 
-	ret = marker_probe_register("kernel_sched_schedule",
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		sched_switch_callback,
-		&ctx_trace);
+	ret = register_trace_sched_switch(probe_sched_switch);
 	if (ret) {
-		pr_info("sched trace: Couldn't add marker"
+		pr_info("sched trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_schedule\n");
 		goto fail_deprobe_wake_new;
 	}
 
 	return ret;
 fail_deprobe_wake_new:
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_wakeup_new(probe_sched_wakeup);
 fail_deprobe:
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_wakeup(probe_sched_wakeup);
 	return ret;
 }
 
 static void tracing_sched_unregister(void)
 {
-	marker_probe_unregister("kernel_sched_schedule",
-				sched_switch_callback,
-				&ctx_trace);
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&ctx_trace);
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_switch(probe_sched_switch);
+	unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+	unregister_trace_sched_wakeup(probe_sched_wakeup);
 }
 
 static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb62cd..fe4a252c236 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <linux/marker.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -44,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 	long disabled;
 	int resched;
 	int cpu;
+	int pc;
 
 	if (likely(!wakeup_task))
 		return;
 
+	pc = preempt_count();
 	resched = need_resched();
 	preempt_disable_notrace();
 
@@ -70,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 	if (task_cpu(wakeup_task) != cpu)
 		goto unlock;
 
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, pc);
 
  unlock:
 	__raw_spin_unlock(&wakeup_lock);
@@ -112,17 +114,18 @@ static int report_latency(cycle_t delta)
 }
 
 static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	struct task_struct *next)
 {
 	unsigned long latency = 0, t0 = 0, t1 = 0;
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	cycle_t T0, T1, delta;
 	unsigned long flags;
 	long disabled;
 	int cpu;
+	int pc;
+
+	tracing_record_cmdline(prev);
 
 	if (unlikely(!tracer_enabled))
 		return;
@@ -139,12 +142,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 	if (next != wakeup_task)
 		return;
 
+	pc = preempt_count();
+
 	/* The task we are waiting for is waking up */
-	data = tr->data[wakeup_cpu];
+	data = wakeup_trace->data[wakeup_cpu];
 
 	/* disable local data, not wakeup_cpu data */
 	cpu = raw_smp_processor_id();
-	disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
 	if (likely(disabled != 1))
 		goto out;
 
@@ -155,7 +160,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
-	trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 	/*
 	 * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +179,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 	t0 = nsecs_to_usecs(T0);
 	t1 = nsecs_to_usecs(T1);
 
-	update_max_tr(tr, wakeup_task, wakeup_cpu);
+	update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
 
 out_unlock:
-	__wakeup_reset(tr);
+	__wakeup_reset(wakeup_trace);
 	__raw_spin_unlock(&wakeup_lock);
 	local_irq_restore(flags);
 out:
-	atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-		      const char *format, va_list *args)
-{
-	struct task_struct *prev;
-	struct task_struct *next;
-	struct rq *__rq;
-
-	/* skip prev_pid %d next_pid %d prev_state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	__rq = va_arg(*args, typeof(__rq));
-	prev = va_arg(*args, typeof(prev));
-	next = va_arg(*args, typeof(next));
-
-	tracing_record_cmdline(prev);
-
-	/*
-	 * If tracer_switch_func only points to the local
-	 * switch func, it still needs the ptr passed to it.
-	 */
-	wakeup_sched_switch(probe_data, __rq, prev, next);
+	atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void __wakeup_reset(struct trace_array *tr)
@@ -216,7 +196,7 @@ static void __wakeup_reset(struct trace_array *tr)
 
 	for_each_possible_cpu(cpu) {
 		data = tr->data[cpu];
-		tracing_reset(data);
+		tracing_reset(tr, cpu);
 	}
 
 	wakeup_cpu = -1;
@@ -240,19 +220,26 @@ static void wakeup_reset(struct trace_array *tr)
 }
 
 static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
-		   struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
 {
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	long disabled;
+	int pc;
+
+	if (likely(!tracer_enabled))
+		return;
+
+	tracing_record_cmdline(p);
+	tracing_record_cmdline(current);
 
 	if (likely(!rt_task(p)) ||
 			p->prio >= wakeup_prio ||
-			p->prio >= curr->prio)
+			p->prio >= current->prio)
 		return;
 
-	disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+	pc = preempt_count();
+	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
 	if (unlikely(disabled != 1))
 		goto out;
 
@@ -264,7 +251,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 		goto out_locked;
 
 	/* reset the trace */
-	__wakeup_reset(tr);
+	__wakeup_reset(wakeup_trace);
 
 	wakeup_cpu = task_cpu(p);
 	wakeup_prio = p->prio;
@@ -274,74 +261,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 
 	local_save_flags(flags);
 
-	tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
-	trace_function(tr, tr->data[wakeup_cpu],
-		       CALLER_ADDR1, CALLER_ADDR2, flags);
+	wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+	trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
+		       CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 out_locked:
 	__raw_spin_unlock(&wakeup_lock);
 out:
-	atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-		 const char *format, va_list *args)
-{
-	struct trace_array **ptr = probe_data;
-	struct trace_array *tr = *ptr;
-	struct task_struct *curr;
-	struct task_struct *task;
-	struct rq *__rq;
-
-	if (likely(!tracer_enabled))
-		return;
-
-	/* Skip pid %d state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	/* now get the meat: "rq %p task %p rq->curr %p" */
-	__rq = va_arg(*args, typeof(__rq));
-	task = va_arg(*args, typeof(task));
-	curr = va_arg(*args, typeof(curr));
-
-	tracing_record_cmdline(task);
-	tracing_record_cmdline(curr);
-
-	wakeup_check_start(tr, task, curr);
+	atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void start_wakeup_tracer(struct trace_array *tr)
 {
 	int ret;
 
-	ret = marker_probe_register("kernel_sched_wakeup",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&wakeup_trace);
+	ret = register_trace_sched_wakeup(probe_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup\n");
 		return;
 	}
 
-	ret = marker_probe_register("kernel_sched_wakeup_new",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&wakeup_trace);
+	ret = register_trace_sched_wakeup_new(probe_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup_new\n");
 		goto fail_deprobe;
 	}
 
-	ret = marker_probe_register("kernel_sched_schedule",
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		sched_switch_callback,
-		&wakeup_trace);
+	ret = register_trace_sched_switch(probe_wakeup_sched_switch);
 	if (ret) {
-		pr_info("sched trace: Couldn't add marker"
+		pr_info("sched trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_schedule\n");
 		goto fail_deprobe_wake_new;
 	}
@@ -363,28 +313,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
 
 	return;
 fail_deprobe_wake_new:
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_wakeup_new(probe_wakeup);
 fail_deprobe:
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
 	tracer_enabled = 0;
 	unregister_ftrace_function(&trace_ops);
-	marker_probe_unregister("kernel_sched_schedule",
-				sched_switch_callback,
-				&wakeup_trace);
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&wakeup_trace);
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_switch(probe_wakeup_sched_switch);
+	unregister_trace_sched_wakeup_new(probe_wakeup);
+	unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e073b..09cf230d7ec 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,65 +9,29 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	case TRACE_FN:
 	case TRACE_CTX:
 	case TRACE_WAKE:
+	case TRACE_CONT:
 	case TRACE_STACK:
+	case TRACE_PRINT:
 	case TRACE_SPECIAL:
 		return 1;
 	}
 	return 0;
 }
 
-static int
-trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
+static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
 {
-	struct trace_entry *entries;
-	struct page *page;
-	int idx = 0;
-	int i;
+	struct ring_buffer_event *event;
+	struct trace_entry *entry;
 
-	BUG_ON(list_empty(&data->trace_pages));
-	page = list_entry(data->trace_pages.next, struct page, lru);
-	entries = page_address(page);
+	while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
+		entry = ring_buffer_event_data(event);
 
-	check_pages(data);
-	if (head_page(data) != entries)
-		goto failed;
-
-	/*
-	 * The starting trace buffer always has valid elements,
-	 * if any element exists.
-	 */
-	entries = head_page(data);
-
-	for (i = 0; i < tr->entries; i++) {
-
-		if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
+		if (!trace_valid_entry(entry)) {
 			printk(KERN_CONT ".. invalid entry %d ",
-				entries[idx].type);
+				entry->type);
 			goto failed;
 		}
-
-		idx++;
-		if (idx >= ENTRIES_PER_PAGE) {
-			page = virt_to_page(entries);
-			if (page->lru.next == &data->trace_pages) {
-				if (i != tr->entries - 1) {
-					printk(KERN_CONT ".. entries buffer mismatch");
-					goto failed;
-				}
-			} else {
-				page = list_entry(page->lru.next, struct page, lru);
-				entries = page_address(page);
-			}
-			idx = 0;
-		}
 	}
-
-	page = virt_to_page(entries);
-	if (page->lru.next != &data->trace_pages) {
-		printk(KERN_CONT ".. too many entries");
-		goto failed;
-	}
-
 	return 0;
 
  failed:
@@ -89,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
 	/* Don't allow flipping of max traces now */
 	raw_local_irq_save(flags);
 	__raw_spin_lock(&ftrace_max_lock);
-	for_each_possible_cpu(cpu) {
-		if (!head_page(tr->data[cpu]))
-			continue;
 
-		cnt += tr->data[cpu]->trace_idx;
+	cnt = ring_buffer_entries(tr->buffer);
 
-		ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
+	for_each_possible_cpu(cpu) {
+		ret = trace_test_buffer_cpu(tr, cpu);
 		if (ret)
 			break;
 	}
@@ -120,11 +82,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 					   struct trace_array *tr,
 					   int (*func)(void))
 {
-	unsigned long count;
-	int ret;
 	int save_ftrace_enabled = ftrace_enabled;
 	int save_tracer_enabled = tracer_enabled;
+	unsigned long count;
 	char *func_name;
+	int ret;
 
 	/* The ftrace test PASSED */
 	printk(KERN_CONT "PASSED\n");
@@ -157,6 +119,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	/* enable tracing */
 	tr->ctrl = 1;
 	trace->init(tr);
+
 	/* Sleep for a 1/10 of a second */
 	msleep(100);
 
@@ -212,10 +175,10 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 int
 trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 {
-	unsigned long count;
-	int ret;
 	int save_ftrace_enabled = ftrace_enabled;
 	int save_tracer_enabled = tracer_enabled;
+	unsigned long count;
+	int ret;
 
 	/* make sure msleep has been recorded */
 	msleep(1);
@@ -415,6 +378,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
 }
 #endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
 
+#ifdef CONFIG_NOP_TRACER
+int
+trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
+{
+	/* What could possibly go wrong? */
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SCHED_TRACER
 static int trace_wakeup_test_thread(void *data)
 {
@@ -486,6 +458,9 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
 
 	wake_up_process(p);
 
+	/* give a little time to let the thread wake up */
+	msleep(100);
+
 	/* stop the tracing. */
 	tr->ctrl = 0;
 	trace->ctrl_update(tr);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 00000000000..74c5d9a3afa
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include "trace.h"
+
+#define STACK_TRACE_ENTRIES 500
+
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
+	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
+static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
+
+static struct stack_trace max_stack_trace = {
+	.max_entries		= STACK_TRACE_ENTRIES,
+	.entries		= stack_dump_trace,
+};
+
+static unsigned long max_stack_size;
+static raw_spinlock_t max_stack_lock =
+	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int stack_trace_disabled __read_mostly;
+static DEFINE_PER_CPU(int, trace_active);
+
+static inline void check_stack(void)
+{
+	unsigned long this_size, flags;
+	unsigned long *p, *top, *start;
+	int i;
+
+	this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+	this_size = THREAD_SIZE - this_size;
+
+	if (this_size <= max_stack_size)
+		return;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&max_stack_lock);
+
+	/* a race could have already updated it */
+	if (this_size <= max_stack_size)
+		goto out;
+
+	max_stack_size = this_size;
+
+	max_stack_trace.nr_entries	= 0;
+	max_stack_trace.skip		= 3;
+
+	save_stack_trace(&max_stack_trace);
+
+	/*
+	 * Now find where in the stack these are.
+	 */
+	i = 0;
+	start = &this_size;
+	top = (unsigned long *)
+		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
+
+	/*
+	 * Loop through all the entries. One of the entries may
+	 * for some reason be missed on the stack, so we may
+	 * have to account for them. If they are all there, this
+	 * loop will only happen once. This code only takes place
+	 * on a new max, so it is far from a fast path.
+	 */
+	while (i < max_stack_trace.nr_entries) {
+
+		stack_dump_index[i] = this_size;
+		p = start;
+
+		for (; p < top && i < max_stack_trace.nr_entries; p++) {
+			if (*p == stack_dump_trace[i]) {
+				this_size = stack_dump_index[i++] =
+					(top - p) * sizeof(unsigned long);
+				/* Start the search from here */
+				start = p + 1;
+			}
+		}
+
+		i++;
+	}
+
+ out:
+	__raw_spin_unlock(&max_stack_lock);
+	raw_local_irq_restore(flags);
+}
+
+static void
+stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+	int cpu, resched;
+
+	if (unlikely(!ftrace_enabled || stack_trace_disabled))
+		return;
+
+	resched = need_resched();
+	preempt_disable_notrace();
+
+	cpu = raw_smp_processor_id();
+	/* no atomic needed, we only modify this variable by this cpu */
+	if (per_cpu(trace_active, cpu)++ != 0)
+		goto out;
+
+	check_stack();
+
+ out:
+	per_cpu(trace_active, cpu)--;
+	/* prevent recursion in schedule */
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+	.func = stack_trace_call,
+};
+
+static ssize_t
+stack_max_size_read(struct file *filp, char __user *ubuf,
+		    size_t count, loff_t *ppos)
+{
+	unsigned long *ptr = filp->private_data;
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
+	if (r > sizeof(buf))
+		r = sizeof(buf);
+	return simple_read_from_buffer(ubuf, count, ppos, buf, r);
+}
+
+static ssize_t
+stack_max_size_write(struct file *filp, const char __user *ubuf,
+		     size_t count, loff_t *ppos)
+{
+	long *ptr = filp->private_data;
+	unsigned long val, flags;
+	char buf[64];
+	int ret;
+
+	if (count >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&max_stack_lock);
+	*ptr = val;
+	__raw_spin_unlock(&max_stack_lock);
+	raw_local_irq_restore(flags);
+
+	return count;
+}
+
+static struct file_operations stack_max_size_fops = {
+	.open		= tracing_open_generic,
+	.read		= stack_max_size_read,
+	.write		= stack_max_size_write,
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	long i = (long)m->private;
+
+	(*pos)++;
+
+	i++;
+
+	if (i >= max_stack_trace.nr_entries ||
+	    stack_dump_trace[i] == ULONG_MAX)
+		return NULL;
+
+	m->private = (void *)i;
+
+	return &m->private;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+	void *t = &m->private;
+	loff_t l = 0;
+
+	local_irq_disable();
+	__raw_spin_lock(&max_stack_lock);
+
+	for (; t && l < *pos; t = t_next(m, t, &l))
+		;
+
+	return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+	__raw_spin_unlock(&max_stack_lock);
+	local_irq_enable();
+}
+
+static int trace_lookup_stack(struct seq_file *m, long i)
+{
+	unsigned long addr = stack_dump_trace[i];
+#ifdef CONFIG_KALLSYMS
+	char str[KSYM_SYMBOL_LEN];
+
+	sprint_symbol(str, addr);
+
+	return seq_printf(m, "%s\n", str);
+#else
+	return seq_printf(m, "%p\n", (void*)addr);
+#endif
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+	long i = *(long *)v;
+	int size;
+
+	if (i < 0) {
+		seq_printf(m, "        Depth   Size      Location"
+			   "    (%d entries)\n"
+			   "        -----   ----      --------\n",
+			   max_stack_trace.nr_entries);
+		return 0;
+	}
+
+	if (i >= max_stack_trace.nr_entries ||
+	    stack_dump_trace[i] == ULONG_MAX)
+		return 0;
+
+	if (i+1 == max_stack_trace.nr_entries ||
+	    stack_dump_trace[i+1] == ULONG_MAX)
+		size = stack_dump_index[i];
+	else
+		size = stack_dump_index[i] - stack_dump_index[i+1];
+
+	seq_printf(m, "%3ld) %8d   %5d   ", i, stack_dump_index[i], size);
+
+	trace_lookup_stack(m, i);
+
+	return 0;
+}
+
+static struct seq_operations stack_trace_seq_ops = {
+	.start		= t_start,
+	.next		= t_next,
+	.stop		= t_stop,
+	.show		= t_show,
+};
+
+static int stack_trace_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	ret = seq_open(file, &stack_trace_seq_ops);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = (void *)-1;
+	}
+
+	return ret;
+}
+
+static struct file_operations stack_trace_fops = {
+	.open		= stack_trace_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+};
+
+static __init int stack_trace_init(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+
+	entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
+				    &max_stack_size, &stack_max_size_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+
+	entry = debugfs_create_file("stack_trace", 0444, d_tracer,
+				    NULL, &stack_trace_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'stack_trace' entry\n");
+
+	register_ftrace_function(&trace_ops);
+
+	return 0;
+}
+
+device_initcall(stack_trace_init);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index db58fb66a13..9587d3bcba5 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void start_stack_trace(struct trace_array *tr)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
new file mode 100644
index 00000000000..f2b7c28a470
--- /dev/null
+++ b/kernel/tracepoint.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/tracepoint.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+extern struct tracepoint __start___tracepoints[];
+extern struct tracepoint __stop___tracepoints[];
+
+/* Set to 1 to enable tracepoint debug output */
+static const int tracepoint_debug;
+
+/*
+ * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
+ * builtin and module tracepoints and the hash table.
+ */
+static DEFINE_MUTEX(tracepoints_mutex);
+
+/*
+ * Tracepoint hash table, containing the active tracepoints.
+ * Protected by tracepoints_mutex.
+ */
+#define TRACEPOINT_HASH_BITS 6
+#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+
+/*
+ * Note about RCU :
+ * It is used to to delay the free of multiple probes array until a quiescent
+ * state is reached.
+ * Tracepoint entries modifications are protected by the tracepoints_mutex.
+ */
+struct tracepoint_entry {
+	struct hlist_node hlist;
+	void **funcs;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	struct rcu_head rcu;
+	void *oldptr;
+	unsigned char rcu_pending:1;
+	char name[0];
+};
+
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+
+static void free_old_closure(struct rcu_head *head)
+{
+	struct tracepoint_entry *entry = container_of(head,
+		struct tracepoint_entry, rcu);
+	kfree(entry->oldptr);
+	/* Make sure we free the data before setting the pending flag to 0 */
+	smp_wmb();
+	entry->rcu_pending = 0;
+}
+
+static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+{
+	if (!old)
+		return;
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu_sched(&entry->rcu, free_old_closure);
+}
+
+static void debug_print_probes(struct tracepoint_entry *entry)
+{
+	int i;
+
+	if (!tracepoint_debug)
+		return;
+
+	for (i = 0; entry->funcs[i]; i++)
+		printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
+}
+
+static void *
+tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
+{
+	int nr_probes = 0;
+	void **old, **new;
+
+	WARN_ON(!probe);
+
+	debug_print_probes(entry);
+	old = entry->funcs;
+	if (old) {
+		/* (N -> N+1), (N != 0, 1) probes */
+		for (nr_probes = 0; old[nr_probes]; nr_probes++)
+			if (old[nr_probes] == probe)
+				return ERR_PTR(-EEXIST);
+	}
+	/* + 2 : one for new probe, one for NULL func */
+	new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+	if (new == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (old)
+		memcpy(new, old, nr_probes * sizeof(void *));
+	new[nr_probes] = probe;
+	entry->refcount = nr_probes + 1;
+	entry->funcs = new;
+	debug_print_probes(entry);
+	return old;
+}
+
+static void *
+tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
+{
+	int nr_probes = 0, nr_del = 0, i;
+	void **old, **new;
+
+	old = entry->funcs;
+
+	debug_print_probes(entry);
+	/* (N -> M), (N > 1, M >= 0) probes */
+	for (nr_probes = 0; old[nr_probes]; nr_probes++) {
+		if ((!probe || old[nr_probes] == probe))
+			nr_del++;
+	}
+
+	if (nr_probes - nr_del == 0) {
+		/* N -> 0, (N > 1) */
+		entry->funcs = NULL;
+		entry->refcount = 0;
+		debug_print_probes(entry);
+		return old;
+	} else {
+		int j = 0;
+		/* N -> M, (N > 1, M > 0) */
+		/* + 1 for NULL */
+		new = kzalloc((nr_probes - nr_del + 1)
+			* sizeof(void *), GFP_KERNEL);
+		if (new == NULL)
+			return ERR_PTR(-ENOMEM);
+		for (i = 0; old[i]; i++)
+			if ((probe && old[i] != probe))
+				new[j++] = old[i];
+		entry->refcount = nr_probes - nr_del;
+		entry->funcs = new;
+	}
+	debug_print_probes(entry);
+	return old;
+}
+
+/*
+ * Get tracepoint if the tracepoint is present in the tracepoint hash table.
+ * Must be called with tracepoints_mutex held.
+ * Returns NULL if not present.
+ */
+static struct tracepoint_entry *get_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the tracepoint to the tracepoint hash table. Must be called with
+ * tracepoints_mutex held.
+ */
+static struct tracepoint_entry *add_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"tracepoint %s busy\n", name);
+			return ERR_PTR(-EEXIST);	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+	memcpy(&e->name[0], name, name_len);
+	e->funcs = NULL;
+	e->refcount = 0;
+	e->rcu_pending = 0;
+	hlist_add_head(&e->hlist, head);
+	return e;
+}
+
+/*
+ * Remove the tracepoint from the tracepoint hash table. Must be called with
+ * mutex_lock held.
+ */
+static int remove_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found)
+		return -ENOENT;
+	if (e->refcount)
+		return -EBUSY;
+	hlist_del(&e->hlist);
+	/* Make sure the call_rcu_sched has been executed */
+	if (e->rcu_pending)
+		rcu_barrier_sched();
+	kfree(e);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one tracepoint.
+ */
+static void set_tracepoint(struct tracepoint_entry **entry,
+	struct tracepoint *elem, int active)
+{
+	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	/*
+	 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
+	 * probe callbacks array is consistent before setting a pointer to it.
+	 * This array is referenced by __DO_TRACE from
+	 * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
+	 * is used.
+	 */
+	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
+	elem->state = active;
+}
+
+/*
+ * Disable a tracepoint and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by preempt_disable around the call site.
+ */
+static void disable_tracepoint(struct tracepoint *elem)
+{
+	elem->state = 0;
+}
+
+/**
+ * tracepoint_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of tracepoints.
+ */
+void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{
+	struct tracepoint *iter;
+	struct tracepoint_entry *mark_entry;
+
+	mutex_lock(&tracepoints_mutex);
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_tracepoint(iter->name);
+		if (mark_entry) {
+			set_tracepoint(&mark_entry, iter,
+					!!mark_entry->refcount);
+		} else {
+			disable_tracepoint(iter);
+		}
+	}
+	mutex_unlock(&tracepoints_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ */
+static void tracepoint_update_probes(void)
+{
+	/* Core kernel tracepoints */
+	tracepoint_update_probe_range(__start___tracepoints,
+		__stop___tracepoints);
+	/* tracepoints in modules. */
+	module_update_tracepoints();
+}
+
+/**
+ * tracepoint_probe_register -  Connect a probe to a tracepoint
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int tracepoint_probe_register(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	int ret = 0;
+	void *old;
+
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry) {
+		entry = add_tracepoint(name);
+		if (IS_ERR(entry)) {
+			ret = PTR_ERR(entry);
+			goto end;
+		}
+	}
+	/*
+	 * If we detect that a call_rcu_sched is pending for this tracepoint,
+	 * make sure it's executed now.
+	 */
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	old = tracepoint_entry_add_probe(entry, probe);
+	if (IS_ERR(old)) {
+		ret = PTR_ERR(old);
+		goto end;
+	}
+	mutex_unlock(&tracepoints_mutex);
+	tracepoint_update_probes();		/* may update entry */
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	tracepoint_entry_free_old(entry, old);
+end:
+	mutex_unlock(&tracepoints_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+
+/**
+ * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int tracepoint_probe_unregister(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	void *old;
+	int ret = -ENOENT;
+
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry)
+		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	old = tracepoint_entry_remove_probe(entry, probe);
+	mutex_unlock(&tracepoints_mutex);
+	tracepoint_update_probes();		/* may update entry */
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry)
+		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	tracepoint_entry_free_old(entry, old);
+	remove_tracepoint(name);	/* Ignore busy error message */
+	ret = 0;
+end:
+	mutex_unlock(&tracepoints_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+/**
+ * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
+ * @tracepoint: current tracepoints (in), next tracepoint (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next tracepoint has been found (1) or not (0).
+ * Will return the first tracepoint in the range if the input tracepoint is
+ * NULL.
+ */
+int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end)
+{
+	if (!*tracepoint && begin != end) {
+		*tracepoint = begin;
+		return 1;
+	}
+	if (*tracepoint >= begin && *tracepoint < end)
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
+
+static void tracepoint_get_iter(struct tracepoint_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel tracepoints */
+	if (!iter->module) {
+		found = tracepoint_get_iter_range(&iter->tracepoint,
+				__start___tracepoints, __stop___tracepoints);
+		if (found)
+			goto end;
+	}
+	/* tracepoints in modules. */
+	found = module_get_iter_tracepoints(iter);
+end:
+	if (!found)
+		tracepoint_iter_reset(iter);
+}
+
+void tracepoint_iter_start(struct tracepoint_iter *iter)
+{
+	tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_start);
+
+void tracepoint_iter_next(struct tracepoint_iter *iter)
+{
+	iter->tracepoint++;
+	/*
+	 * iter->tracepoint may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the tracepoints, getting the
+	 * tracepoints from following modules if necessary.
+	 */
+	tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_next);
+
+void tracepoint_iter_stop(struct tracepoint_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
+
+void tracepoint_iter_reset(struct tracepoint_iter *iter)
+{
+	iter->module = NULL;
+	iter->tracepoint = NULL;
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c2006bfeea4..1338469ac84 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -316,17 +316,6 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
 EXPORT_SYMBOL(bitmap_scnprintf);
 
 /**
- * bitmap_scnprintf_len - return buffer length needed to convert
- * bitmap to an ASCII hex string
- * @nr_bits: number of bits to be converted
- */
-int bitmap_scnprintf_len(unsigned int nr_bits)
-{
-	unsigned int nr_nibbles = ALIGN(nr_bits, 4) / 4;
-	return nr_nibbles + ALIGN(nr_nibbles, CHUNKSZ / 4) / (CHUNKSZ / 4) - 1;
-}
-
-/**
  * __bitmap_parse - convert an ASCII hex string into a bitmap.
  * @buf: pointer to buffer containing string.
  * @buflen: buffer size in bytes.  If string is smaller than this
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index cceecb6a963..a013bbc2371 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
+#include <linux/ioport.h>
 
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/div64.h>
@@ -550,18 +551,51 @@ static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int
 #endif
 }
 
+static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags)
+{
+#ifndef IO_RSRC_PRINTK_SIZE
+#define IO_RSRC_PRINTK_SIZE	4
+#endif
+
+#ifndef MEM_RSRC_PRINTK_SIZE
+#define MEM_RSRC_PRINTK_SIZE	8
+#endif
+
+	/* room for the actual numbers, the two "0x", -, [, ] and the final zero */
+	char sym[4*sizeof(resource_size_t) + 8];
+	char *p = sym, *pend = sym + sizeof(sym);
+	int size = -1;
+
+	if (res->flags & IORESOURCE_IO)
+		size = IO_RSRC_PRINTK_SIZE;
+	else if (res->flags & IORESOURCE_MEM)
+		size = MEM_RSRC_PRINTK_SIZE;
+
+	*p++ = '[';
+	p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+	*p++ = '-';
+	p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+	*p++ = ']';
+	*p = 0;
+
+	return string(buf, end, sym, field_width, precision, flags);
+}
+
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed
  * by an extra set of alphanumeric characters that are extended format
  * specifiers.
  *
- * Right now we just handle 'F' (for symbolic Function descriptor pointers)
- * and 'S' (for Symbolic direct pointers), but this can easily be
- * extended in the future (network address types etc).
+ * Right now we handle:
+ *
+ * - 'F' For symbolic function descriptor pointers
+ * - 'S' For symbolic direct pointers
+ * - 'R' For a struct resource pointer, it prints the range of
+ *       addresses (not the name nor the flags)
  *
- * The difference between 'S' and 'F' is that on ia64 and ppc64 function
- * pointers are really function descriptors, which contain a pointer the
- * real address. 
+ * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
+ * function pointers are really function descriptors, which contain a
+ * pointer to the real address.
  */
 static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
 {
@@ -571,6 +605,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
 		/* Fallthrough */
 	case 'S':
 		return symbol_string(buf, end, ptr, field_width, precision, flags);
+	case 'R':
+		return resource_string(buf, end, ptr, field_width, precision, flags);
 	}
 	flags |= SMALL;
 	if (field_width == -1) {
@@ -590,6 +626,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
  * This function follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
  * %pF output the name of a function pointer
+ * %pR output the address range in a struct resource
  *
  * The return value is the number of characters which would
  * be generated for the given input, excluding the trailing
diff --git a/mm/Kconfig b/mm/Kconfig
index 1a501a4de95..5b5790f8a81 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -209,5 +209,16 @@ config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
 
+config UNEVICTABLE_LRU
+	bool "Add LRU list to track non-evictable pages"
+	default y
+	depends on MMU
+	help
+	  Keeps unevictable pages off of the active and inactive pageout
+	  lists, so kswapd will not waste CPU time or have its balancing
+	  algorithms thrown off by scanning these pages.  Selecting this
+	  will use one page flag and increase the code size a little,
+	  say Y unless you know what you are doing.
+
 config MMU_NOTIFIER
 	bool
diff --git a/mm/Makefile b/mm/Makefile
index da4ccf015ae..c06b45a1ff5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,5 +33,4 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
-
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
diff --git a/mm/filemap.c b/mm/filemap.c
index 903bf316912..ab8553658af 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include <linux/memcontrol.h>
+#include <linux/mm_inline.h> /* for page_is_file_cache() */
 #include "internal.h"
 
 /*
@@ -115,12 +116,12 @@ void __remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
-	mem_cgroup_uncharge_cache_page(page);
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
 	mapping->nrpages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	BUG_ON(page_mapped(page));
+	mem_cgroup_uncharge_cache_page(page);
 
 	/*
 	 * Some filesystems seem to re-dirty the page even after
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t offset, gfp_t gfp_mask)
 {
-	int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-	if (ret == 0)
-		lru_cache_add(page);
+	int ret;
+
+	/*
+	 * Splice_read and readahead add shmem/tmpfs pages into the page cache
+	 * before shmem_readpage has a chance to mark them as SwapBacked: they
+	 * need to go on the active_anon lru below, and mem_cgroup_cache_charge
+	 * (called in add_to_page_cache) needs to know where they're going too.
+	 */
+	if (mapping_cap_swap_backed(mapping))
+		SetPageSwapBacked(page);
+
+	ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+	if (ret == 0) {
+		if (page_is_file_cache(page))
+			lru_cache_add_file(page);
+		else
+			lru_cache_add_active_anon(page);
+	}
 	return ret;
 }
 
@@ -557,17 +573,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
  * mechananism between PageLocked pages and PageWriteback pages is shared.
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
- * The first mb is necessary to safely close the critical section opened by the
- * test_and_set_bit() to lock the page; the second mb is necessary to enforce
- * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
- * races with a parallel wait_on_page_locked()).
+ * The mb is necessary to enforce ordering between the clear_bit and the read
+ * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
  */
 void unlock_page(struct page *page)
 {
-	smp_mb__before_clear_bit();
-	if (!test_and_clear_bit(PG_locked, &page->flags))
-		BUG();
-	smp_mb__after_clear_bit(); 
+	VM_BUG_ON(!PageLocked(page));
+	clear_bit_unlock(PG_locked, &page->flags);
+	smp_mb__after_clear_bit();
 	wake_up_page(page, PG_locked);
 }
 EXPORT_SYMBOL(unlock_page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 7881638e4a1..7d12ca70ef7 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -21,6 +21,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long addr, pte_t *ptep)
 {
@@ -215,15 +217,31 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 		spin_unlock(&mapping->i_mmap_lock);
 	}
 
+	if (vma->vm_flags & VM_LOCKED) {
+		/*
+		 * drop PG_Mlocked flag for over-mapped range
+		 */
+		unsigned int saved_flags = vma->vm_flags;
+		munlock_vma_pages_range(vma, start, start + size);
+		vma->vm_flags = saved_flags;
+	}
+
 	mmu_notifier_invalidate_range_start(mm, start, start + size);
 	err = populate_range(mm, vma, start, size, pgoff);
 	mmu_notifier_invalidate_range_end(mm, start, start + size);
 	if (!err && !(flags & MAP_NONBLOCK)) {
-		if (unlikely(has_write_lock)) {
-			downgrade_write(&mm->mmap_sem);
-			has_write_lock = 0;
+		if (vma->vm_flags & VM_LOCKED) {
+			/*
+			 * might be mapping previously unmapped range of file
+			 */
+			mlock_vma_pages_range(vma, start, start + size);
+		} else {
+			if (unlikely(has_write_lock)) {
+				downgrade_write(&mm->mmap_sem);
+				has_write_lock = 0;
+			}
+			make_pages_present(start, start+size);
 		}
-		make_pages_present(start, start+size);
 	}
 
 	/*
@@ -240,4 +258,3 @@ out:
 
 	return err;
 }
-
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38633864a93..ce8cbb29860 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -262,7 +262,7 @@ struct resv_map {
 	struct list_head regions;
 };
 
-struct resv_map *resv_map_alloc(void)
+static struct resv_map *resv_map_alloc(void)
 {
 	struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
 	if (!resv_map)
@@ -274,7 +274,7 @@ struct resv_map *resv_map_alloc(void)
 	return resv_map;
 }
 
-void resv_map_release(struct kref *ref)
+static void resv_map_release(struct kref *ref)
 {
 	struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
 
@@ -289,7 +289,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 	if (!(vma->vm_flags & VM_SHARED))
 		return (struct resv_map *)(get_vma_private_data(vma) &
 							~HPAGE_RESV_MASK);
-	return 0;
+	return NULL;
 }
 
 static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
@@ -1459,11 +1459,11 @@ int hugetlb_report_meminfo(char *buf)
 {
 	struct hstate *h = &default_hstate;
 	return sprintf(buf,
-			"HugePages_Total: %5lu\n"
-			"HugePages_Free:  %5lu\n"
-			"HugePages_Rsvd:  %5lu\n"
-			"HugePages_Surp:  %5lu\n"
-			"Hugepagesize:    %5lu kB\n",
+			"HugePages_Total:   %5lu\n"
+			"HugePages_Free:    %5lu\n"
+			"HugePages_Rsvd:    %5lu\n"
+			"HugePages_Surp:    %5lu\n"
+			"Hugepagesize:   %8lu kB\n",
 			h->nr_huge_pages,
 			h->free_huge_pages,
 			h->resv_huge_pages,
@@ -1747,10 +1747,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  * from other VMAs and let the children be SIGKILLed if they are faulting the
  * same region.
  */
-int unmap_ref_private(struct mm_struct *mm,
-					struct vm_area_struct *vma,
-					struct page *page,
-					unsigned long address)
+static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+				struct page *page, unsigned long address)
 {
 	struct vm_area_struct *iter_vma;
 	struct address_space *mapping;
@@ -2073,6 +2071,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }
 
+static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
+{
+	if (!ptep || write || shared)
+		return 0;
+	else
+		return huge_pte_none(huge_ptep_get(ptep));
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i,
@@ -2082,6 +2088,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long vaddr = *position;
 	int remainder = *length;
 	struct hstate *h = hstate_vma(vma);
+	int zeropage_ok = 0;
+	int shared = vma->vm_flags & VM_SHARED;
 
 	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
@@ -2094,8 +2102,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * first, for the page indexing below to work.
 		 */
 		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+		if (huge_zeropage_ok(pte, write, shared))
+			zeropage_ok = 1;
 
-		if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
+		if (!pte ||
+		    (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
 		    (write && !pte_write(huge_ptep_get(pte)))) {
 			int ret;
 
@@ -2115,8 +2126,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		page = pte_page(huge_ptep_get(pte));
 same_page:
 		if (pages) {
-			get_page(page);
-			pages[i] = page + pfn_offset;
+			if (zeropage_ok)
+				pages[i] = ZERO_PAGE(0);
+			else
+				pages[i] = page + pfn_offset;
+			get_page(pages[i]);
 		}
 
 		if (vmas)
diff --git a/mm/internal.h b/mm/internal.h
index 1f43f741697..e4e728bdf32 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -39,6 +39,15 @@ static inline void __put_page(struct page *page)
 	atomic_dec(&page->_count);
 }
 
+/*
+ * in mm/vmscan.c:
+ */
+extern int isolate_lru_page(struct page *page);
+extern void putback_lru_page(struct page *page);
+
+/*
+ * in mm/page_alloc.c
+ */
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 
 /*
@@ -52,6 +61,120 @@ static inline unsigned long page_order(struct page *page)
 	return page_private(page);
 }
 
+extern long mlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+extern void munlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
+{
+	munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
+}
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * unevictable_migrate_page() called only from migrate_page_copy() to
+ * migrate unevictable flag to new page.
+ * Note that the old page has been isolated from the LRU lists at this
+ * point so we don't need to worry about LRU statistics.
+ */
+static inline void unevictable_migrate_page(struct page *new, struct page *old)
+{
+	if (TestClearPageUnevictable(old))
+		SetPageUnevictable(new);
+}
+#else
+static inline void unevictable_migrate_page(struct page *new, struct page *old)
+{
+}
+#endif
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * Called only in fault path via page_evictable() for a new page
+ * to determine if it's being mapped into a LOCKED vma.
+ * If so, mark page as mlocked.
+ */
+static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page)
+{
+	VM_BUG_ON(PageLRU(page));
+
+	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
+		return 0;
+
+	if (!TestSetPageMlocked(page)) {
+		inc_zone_page_state(page, NR_MLOCK);
+		count_vm_event(UNEVICTABLE_PGMLOCKED);
+	}
+	return 1;
+}
+
+/*
+ * must be called with vma's mmap_sem held for read, and page locked.
+ */
+extern void mlock_vma_page(struct page *page);
+
+/*
+ * Clear the page's PageMlocked().  This can be useful in a situation where
+ * we want to unconditionally remove a page from the pagecache -- e.g.,
+ * on truncation or freeing.
+ *
+ * It is legal to call this function for any page, mlocked or not.
+ * If called for a page that is still mapped by mlocked vmas, all we do
+ * is revert to lazy LRU behaviour -- semantics are not broken.
+ */
+extern void __clear_page_mlock(struct page *page);
+static inline void clear_page_mlock(struct page *page)
+{
+	if (unlikely(TestClearPageMlocked(page)))
+		__clear_page_mlock(page);
+}
+
+/*
+ * mlock_migrate_page - called only from migrate_page_copy() to
+ * migrate the Mlocked page flag; update statistics.
+ */
+static inline void mlock_migrate_page(struct page *newpage, struct page *page)
+{
+	if (TestClearPageMlocked(page)) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__dec_zone_page_state(page, NR_MLOCK);
+		SetPageMlocked(newpage);
+		__inc_zone_page_state(newpage, NR_MLOCK);
+		local_irq_restore(flags);
+	}
+}
+
+/*
+ * free_page_mlock() -- clean up attempts to free and mlocked() page.
+ * Page should not be on lru, so no need to fix that up.
+ * free_pages_check() will verify...
+ */
+static inline void free_page_mlock(struct page *page)
+{
+	if (unlikely(TestClearPageMlocked(page))) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__dec_zone_page_state(page, NR_MLOCK);
+		__count_vm_event(UNEVICTABLE_MLOCKFREED);
+		local_irq_restore(flags);
+	}
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
+{
+	return 0;
+}
+static inline void clear_page_mlock(struct page *page) { }
+static inline void mlock_vma_page(struct page *page) { }
+static inline void mlock_migrate_page(struct page *new, struct page *old) { }
+static inline void free_page_mlock(struct page *page) { }
+
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
 /*
  * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
  * so all functions starting at paging_init should be marked __init
@@ -120,4 +243,12 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 }
 #endif /* CONFIG_SPARSEMEM */
 
+#define GUP_FLAGS_WRITE                  0x1
+#define GUP_FLAGS_FORCE                  0x2
+#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
+
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, int len, int flags,
+		     struct page **pages, struct vm_area_struct **vmas);
+
 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36896f3eb7f..d4a92b63e98 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -32,11 +32,12 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
+#include <linux/page_cgroup.h>
 
 #include <asm/uaccess.h>
 
 struct cgroup_subsys mem_cgroup_subsys __read_mostly;
-static struct kmem_cache *page_cgroup_cache __read_mostly;
 #define MEM_CGROUP_RECLAIM_RETRIES	5
 
 /*
@@ -65,11 +66,10 @@ struct mem_cgroup_stat {
 /*
  * For accounting under irq disable, no need for increment preempt count.
  */
-static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat,
+static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
 		enum mem_cgroup_stat_index idx, int val)
 {
-	int cpu = smp_processor_id();
-	stat->cpustat[cpu].count[idx] += val;
+	stat->count[idx] += val;
 }
 
 static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
@@ -85,22 +85,13 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
 /*
  * per-zone information in memory controller.
  */
-
-enum mem_cgroup_zstat_index {
-	MEM_CGROUP_ZSTAT_ACTIVE,
-	MEM_CGROUP_ZSTAT_INACTIVE,
-
-	NR_MEM_CGROUP_ZSTAT,
-};
-
 struct mem_cgroup_per_zone {
 	/*
 	 * spin_lock to protect the per cgroup LRU
 	 */
 	spinlock_t		lru_lock;
-	struct list_head	active_list;
-	struct list_head	inactive_list;
-	unsigned long count[NR_MEM_CGROUP_ZSTAT];
+	struct list_head	lists[NR_LRU_LISTS];
+	unsigned long		count[NR_LRU_LISTS];
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)	((mz)->count[(idx)])
@@ -144,69 +135,52 @@ struct mem_cgroup {
 };
 static struct mem_cgroup init_mem_cgroup;
 
-/*
- * We use the lower bit of the page->page_cgroup pointer as a bit spin
- * lock.  We need to ensure that page->page_cgroup is at least two
- * byte aligned (based on comments from Nick Piggin).  But since
- * bit_spin_lock doesn't actually set that lock bit in a non-debug
- * uniprocessor kernel, we should avoid setting it here too.
- */
-#define PAGE_CGROUP_LOCK_BIT 	0x0
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-#define PAGE_CGROUP_LOCK 	(1 << PAGE_CGROUP_LOCK_BIT)
-#else
-#define PAGE_CGROUP_LOCK	0x0
-#endif
-
-/*
- * A page_cgroup page is associated with every page descriptor. The
- * page_cgroup helps us identify information about the cgroup
- */
-struct page_cgroup {
-	struct list_head lru;		/* per cgroup LRU list */
-	struct page *page;
-	struct mem_cgroup *mem_cgroup;
-	int flags;
-};
-#define PAGE_CGROUP_FLAG_CACHE	(0x1)	/* charged as cache */
-#define PAGE_CGROUP_FLAG_ACTIVE (0x2)	/* page is active in this cgroup */
-
-static int page_cgroup_nid(struct page_cgroup *pc)
-{
-	return page_to_nid(pc->page);
-}
-
-static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
-{
-	return page_zonenum(pc->page);
-}
-
 enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
+	MEM_CGROUP_CHARGE_TYPE_SHMEM,	/* used by page migration of shmem */
 	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
+	NR_CHARGE_TYPE,
+};
+
+/* only for here (for easy reading.) */
+#define PCGF_CACHE	(1UL << PCG_CACHE)
+#define PCGF_USED	(1UL << PCG_USED)
+#define PCGF_ACTIVE	(1UL << PCG_ACTIVE)
+#define PCGF_LOCK	(1UL << PCG_LOCK)
+#define PCGF_FILE	(1UL << PCG_FILE)
+static const unsigned long
+pcg_default_flags[NR_CHARGE_TYPE] = {
+	PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */
+	PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */
+	PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
+	0, /* FORCE */
 };
 
 /*
  * Always modified under lru lock. Then, not necessary to preempt_disable()
  */
-static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
-					bool charge)
+static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
+					 struct page_cgroup *pc,
+					 bool charge)
 {
 	int val = (charge)? 1 : -1;
 	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cpustat;
 
 	VM_BUG_ON(!irqs_disabled());
-	if (flags & PAGE_CGROUP_FLAG_CACHE)
-		__mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val);
+
+	cpustat = &stat->cpustat[smp_processor_id()];
+	if (PageCgroupCache(pc))
+		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
 	else
-		__mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
+		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
 
 	if (charge)
-		__mem_cgroup_stat_add_safe(stat,
+		__mem_cgroup_stat_add_safe(cpustat,
 				MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
 	else
-		__mem_cgroup_stat_add_safe(stat,
+		__mem_cgroup_stat_add_safe(cpustat,
 				MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
 }
 
@@ -227,7 +201,7 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
 }
 
 static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem,
-					enum mem_cgroup_zstat_index idx)
+					enum lru_list idx)
 {
 	int nid, zid;
 	struct mem_cgroup_per_zone *mz;
@@ -262,85 +236,77 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 				struct mem_cgroup, css);
 }
 
-static inline int page_cgroup_locked(struct page *page)
-{
-	return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
-{
-	VM_BUG_ON(!page_cgroup_locked(page));
-	page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
-}
-
-struct page_cgroup *page_get_page_cgroup(struct page *page)
-{
-	return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK);
-}
-
-static void lock_page_cgroup(struct page *page)
-{
-	bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static int try_lock_page_cgroup(struct page *page)
-{
-	return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static void unlock_page_cgroup(struct page *page)
-{
-	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
 static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 			struct page_cgroup *pc)
 {
-	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
+	int lru = LRU_BASE;
+
+	if (PageCgroupUnevictable(pc))
+		lru = LRU_UNEVICTABLE;
+	else {
+		if (PageCgroupActive(pc))
+			lru += LRU_ACTIVE;
+		if (PageCgroupFile(pc))
+			lru += LRU_FILE;
+	}
 
-	if (from)
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
-	else
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
+	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
 
-	mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
+	mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false);
 	list_del(&pc->lru);
 }
 
 static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
 				struct page_cgroup *pc)
 {
-	int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
-
-	if (!to) {
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
-		list_add(&pc->lru, &mz->inactive_list);
-	} else {
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
-		list_add(&pc->lru, &mz->active_list);
+	int lru = LRU_BASE;
+
+	if (PageCgroupUnevictable(pc))
+		lru = LRU_UNEVICTABLE;
+	else {
+		if (PageCgroupActive(pc))
+			lru += LRU_ACTIVE;
+		if (PageCgroupFile(pc))
+			lru += LRU_FILE;
 	}
-	mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
+
+	MEM_CGROUP_ZSTAT(mz, lru) += 1;
+	list_add(&pc->lru, &mz->lists[lru]);
+
+	mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true);
 }
 
-static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
 {
-	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
 	struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
+	int active    = PageCgroupActive(pc);
+	int file      = PageCgroupFile(pc);
+	int unevictable = PageCgroupUnevictable(pc);
+	enum lru_list from = unevictable ? LRU_UNEVICTABLE :
+				(LRU_FILE * !!file + !!active);
 
-	if (from)
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
-	else
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
+	if (lru == from)
+		return;
 
-	if (active) {
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
-		pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
-		list_move(&pc->lru, &mz->active_list);
+	MEM_CGROUP_ZSTAT(mz, from) -= 1;
+	/*
+	 * However this is done under mz->lru_lock, another flags, which
+	 * are not related to LRU, will be modified from out-of-lock.
+	 * We have to use atomic set/clear flags.
+	 */
+	if (is_unevictable_lru(lru)) {
+		ClearPageCgroupActive(pc);
+		SetPageCgroupUnevictable(pc);
 	} else {
-		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
-		pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
-		list_move(&pc->lru, &mz->inactive_list);
+		if (is_active_lru(lru))
+			SetPageCgroupActive(pc);
+		else
+			ClearPageCgroupActive(pc);
+		ClearPageCgroupUnevictable(pc);
 	}
+
+	MEM_CGROUP_ZSTAT(mz, lru) += 1;
+	list_move(&pc->lru, &mz->lists[lru]);
 }
 
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
@@ -356,7 +322,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
 /*
  * This routine assumes that the appropriate zone's lru lock is already held
  */
-void mem_cgroup_move_lists(struct page *page, bool active)
+void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
 {
 	struct page_cgroup *pc;
 	struct mem_cgroup_per_zone *mz;
@@ -372,17 +338,16 @@ void mem_cgroup_move_lists(struct page *page, bool active)
 	 * safely get to page_cgroup without it, so just try_lock it:
 	 * mem_cgroup_isolate_pages allows for page left on wrong list.
 	 */
-	if (!try_lock_page_cgroup(page))
+	pc = lookup_page_cgroup(page);
+	if (!trylock_page_cgroup(pc))
 		return;
-
-	pc = page_get_page_cgroup(page);
-	if (pc) {
+	if (pc && PageCgroupUsed(pc)) {
 		mz = page_cgroup_zoneinfo(pc);
 		spin_lock_irqsave(&mz->lru_lock, flags);
-		__mem_cgroup_move_lists(pc, active);
+		__mem_cgroup_move_lists(pc, lru);
 		spin_unlock_irqrestore(&mz->lru_lock, flags);
 	}
-	unlock_page_cgroup(page);
+	unlock_page_cgroup(pc);
 }
 
 /*
@@ -403,21 +368,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
 }
 
 /*
- * This function is called from vmscan.c. In page reclaiming loop. balance
- * between active and inactive list is calculated. For memory controller
- * page reclaiming, we should use using mem_cgroup's imbalance rather than
- * zone's global lru imbalance.
- */
-long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
-{
-	unsigned long active, inactive;
-	/* active and inactive are the number of pages. 'long' is ok.*/
-	active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE);
-	inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE);
-	return (long) (active / (inactive + 1));
-}
-
-/*
  * prev_priority control...this will be used in memory reclaim path.
  */
 int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
@@ -444,28 +394,17 @@ void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
  * (see include/linux/mmzone.h)
  */
 
-long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-				   struct zone *zone, int priority)
+long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
+					int priority, enum lru_list lru)
 {
-	long nr_active;
+	long nr_pages;
 	int nid = zone->zone_pgdat->node_id;
 	int zid = zone_idx(zone);
 	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
 
-	nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE);
-	return (nr_active >> priority);
-}
+	nr_pages = MEM_CGROUP_ZSTAT(mz, lru);
 
-long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-					struct zone *zone, int priority)
-{
-	long nr_inactive;
-	int nid = zone->zone_pgdat->node_id;
-	int zid = zone_idx(zone);
-	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
-
-	nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE);
-	return (nr_inactive >> priority);
+	return (nr_pages >> priority);
 }
 
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -473,7 +412,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					unsigned long *scanned, int order,
 					int mode, struct zone *z,
 					struct mem_cgroup *mem_cont,
-					int active)
+					int active, int file)
 {
 	unsigned long nr_taken = 0;
 	struct page *page;
@@ -484,38 +423,38 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 	int nid = z->zone_pgdat->node_id;
 	int zid = zone_idx(z);
 	struct mem_cgroup_per_zone *mz;
+	int lru = LRU_FILE * !!file + !!active;
 
 	BUG_ON(!mem_cont);
 	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
-	if (active)
-		src = &mz->active_list;
-	else
-		src = &mz->inactive_list;
-
+	src = &mz->lists[lru];
 
 	spin_lock(&mz->lru_lock);
 	scan = 0;
 	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
 		if (scan >= nr_to_scan)
 			break;
+		if (unlikely(!PageCgroupUsed(pc)))
+			continue;
 		page = pc->page;
 
 		if (unlikely(!PageLRU(page)))
 			continue;
 
-		if (PageActive(page) && !active) {
-			__mem_cgroup_move_lists(pc, true);
-			continue;
-		}
-		if (!PageActive(page) && active) {
-			__mem_cgroup_move_lists(pc, false);
+		/*
+		 * TODO: play better with lumpy reclaim, grabbing anything.
+		 */
+		if (PageUnevictable(page) ||
+		    (PageActive(page) && !active) ||
+		    (!PageActive(page) && active)) {
+			__mem_cgroup_move_lists(pc, page_lru(page));
 			continue;
 		}
 
 		scan++;
 		list_move(&pc->lru, &pc_list);
 
-		if (__isolate_lru_page(page, mode) == 0) {
+		if (__isolate_lru_page(page, mode, file) == 0) {
 			list_move(&page->lru, dst);
 			nr_taken++;
 		}
@@ -540,26 +479,27 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 {
 	struct mem_cgroup *mem;
 	struct page_cgroup *pc;
-	unsigned long flags;
 	unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 	struct mem_cgroup_per_zone *mz;
+	unsigned long flags;
 
-	pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
-	if (unlikely(pc == NULL))
-		goto err;
-
+	pc = lookup_page_cgroup(page);
+	/* can happen at boot */
+	if (unlikely(!pc))
+		return 0;
+	prefetchw(pc);
 	/*
 	 * We always charge the cgroup the mm_struct belongs to.
 	 * The mm_struct's mem_cgroup changes on task migration if the
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
+
 	if (likely(!memcg)) {
 		rcu_read_lock();
 		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
 		if (unlikely(!mem)) {
 			rcu_read_unlock();
-			kmem_cache_free(page_cgroup_cache, pc);
 			return 0;
 		}
 		/*
@@ -572,7 +512,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 		css_get(&memcg->css);
 	}
 
-	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
+	while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
 		if (!(gfp_mask & __GFP_WAIT))
 			goto out;
 
@@ -595,39 +535,33 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 		}
 	}
 
-	pc->mem_cgroup = mem;
-	pc->page = page;
-	/*
-	 * If a page is accounted as a page cache, insert to inactive list.
-	 * If anon, insert to active list.
-	 */
-	if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
-		pc->flags = PAGE_CGROUP_FLAG_CACHE;
-	else
-		pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
 
-	lock_page_cgroup(page);
-	if (unlikely(page_get_page_cgroup(page))) {
-		unlock_page_cgroup(page);
+	lock_page_cgroup(pc);
+	if (unlikely(PageCgroupUsed(pc))) {
+		unlock_page_cgroup(pc);
 		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		css_put(&mem->css);
-		kmem_cache_free(page_cgroup_cache, pc);
+
 		goto done;
 	}
-	page_assign_page_cgroup(page, pc);
+	pc->mem_cgroup = mem;
+	/*
+	 * If a page is accounted as a page cache, insert to inactive list.
+	 * If anon, insert to active list.
+	 */
+	pc->flags = pcg_default_flags[ctype];
 
 	mz = page_cgroup_zoneinfo(pc);
+
 	spin_lock_irqsave(&mz->lru_lock, flags);
 	__mem_cgroup_add_list(mz, pc);
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
+	unlock_page_cgroup(pc);
 
-	unlock_page_cgroup(page);
 done:
 	return 0;
 out:
 	css_put(&mem->css);
-	kmem_cache_free(page_cgroup_cache, pc);
-err:
 	return -ENOMEM;
 }
 
@@ -635,7 +569,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
 	if (mem_cgroup_subsys.disabled)
 		return 0;
-
+	if (PageCompound(page))
+		return 0;
 	/*
 	 * If already mapped, we don't have to account.
 	 * If page cache, page->mapping has address_space.
@@ -656,7 +591,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 {
 	if (mem_cgroup_subsys.disabled)
 		return 0;
-
+	if (PageCompound(page))
+		return 0;
 	/*
 	 * Corner case handling. This is called from add_to_page_cache()
 	 * in usual. But some FS (shmem) precharges this page before calling it
@@ -669,22 +605,27 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 	if (!(gfp_mask & __GFP_WAIT)) {
 		struct page_cgroup *pc;
 
-		lock_page_cgroup(page);
-		pc = page_get_page_cgroup(page);
-		if (pc) {
-			VM_BUG_ON(pc->page != page);
-			VM_BUG_ON(!pc->mem_cgroup);
-			unlock_page_cgroup(page);
+
+		pc = lookup_page_cgroup(page);
+		if (!pc)
+			return 0;
+		lock_page_cgroup(pc);
+		if (PageCgroupUsed(pc)) {
+			unlock_page_cgroup(pc);
 			return 0;
 		}
-		unlock_page_cgroup(page);
+		unlock_page_cgroup(pc);
 	}
 
 	if (unlikely(!mm))
 		mm = &init_mm;
 
-	return mem_cgroup_charge_common(page, mm, gfp_mask,
+	if (page_is_file_cache(page))
+		return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+	else
+		return mem_cgroup_charge_common(page, mm, gfp_mask,
+				MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
 }
 
 /*
@@ -704,44 +645,46 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	/*
 	 * Check if our page_cgroup is valid
 	 */
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	if (unlikely(!pc))
-		goto unlock;
-
-	VM_BUG_ON(pc->page != page);
+	pc = lookup_page_cgroup(page);
+	if (unlikely(!pc || !PageCgroupUsed(pc)))
+		return;
 
-	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
-	    && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
-		|| page_mapped(page)))
-		goto unlock;
+	lock_page_cgroup(pc);
+	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page))
+	     || !PageCgroupUsed(pc)) {
+		/* This happens at race in zap_pte_range() and do_swap_page()*/
+		unlock_page_cgroup(pc);
+		return;
+	}
+	ClearPageCgroupUsed(pc);
+	mem = pc->mem_cgroup;
 
 	mz = page_cgroup_zoneinfo(pc);
 	spin_lock_irqsave(&mz->lru_lock, flags);
 	__mem_cgroup_remove_list(mz, pc);
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
+	unlock_page_cgroup(pc);
 
-	page_assign_page_cgroup(page, NULL);
-	unlock_page_cgroup(page);
-
-	mem = pc->mem_cgroup;
 	res_counter_uncharge(&mem->res, PAGE_SIZE);
 	css_put(&mem->css);
 
-	kmem_cache_free(page_cgroup_cache, pc);
 	return;
-unlock:
-	unlock_page_cgroup(page);
 }
 
 void mem_cgroup_uncharge_page(struct page *page)
 {
+	/* early check. */
+	if (page_mapped(page))
+		return;
+	if (page->mapping && !PageAnon(page))
+		return;
 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
 void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 	VM_BUG_ON(page_mapped(page));
+	VM_BUG_ON(page->mapping);
 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
 }
 
@@ -758,15 +701,19 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 	if (mem_cgroup_subsys.disabled)
 		return 0;
 
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	if (pc) {
+	pc = lookup_page_cgroup(page);
+	lock_page_cgroup(pc);
+	if (PageCgroupUsed(pc)) {
 		mem = pc->mem_cgroup;
 		css_get(&mem->css);
-		if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
-			ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+		if (PageCgroupCache(pc)) {
+			if (page_is_file_cache(page))
+				ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+			else
+				ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+		}
 	}
-	unlock_page_cgroup(page);
+	unlock_page_cgroup(pc);
 	if (mem) {
 		ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
 			ctype, mem);
@@ -791,7 +738,7 @@ void mem_cgroup_end_migration(struct page *newpage)
 	 */
 	if (!newpage->mapping)
 		__mem_cgroup_uncharge_common(newpage,
-					 MEM_CGROUP_CHARGE_TYPE_FORCE);
+				MEM_CGROUP_CHARGE_TYPE_FORCE);
 	else if (PageAnon(newpage))
 		mem_cgroup_uncharge_page(newpage);
 }
@@ -863,7 +810,7 @@ int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
 #define FORCE_UNCHARGE_BATCH	(128)
 static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 			    struct mem_cgroup_per_zone *mz,
-			    int active)
+			    enum lru_list lru)
 {
 	struct page_cgroup *pc;
 	struct page *page;
@@ -871,15 +818,14 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 	unsigned long flags;
 	struct list_head *list;
 
-	if (active)
-		list = &mz->active_list;
-	else
-		list = &mz->inactive_list;
+	list = &mz->lists[lru];
 
 	spin_lock_irqsave(&mz->lru_lock, flags);
 	while (!list_empty(list)) {
 		pc = list_entry(list->prev, struct page_cgroup, lru);
 		page = pc->page;
+		if (!PageCgroupUsed(pc))
+			break;
 		get_page(page);
 		spin_unlock_irqrestore(&mz->lru_lock, flags);
 		/*
@@ -894,8 +840,10 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 				count = FORCE_UNCHARGE_BATCH;
 				cond_resched();
 			}
-		} else
-			cond_resched();
+		} else {
+			spin_lock_irqsave(&mz->lru_lock, flags);
+			break;
+		}
 		spin_lock_irqsave(&mz->lru_lock, flags);
 	}
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -919,15 +867,17 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
 	while (mem->res.usage > 0) {
 		if (atomic_read(&mem->css.cgroup->count) > 0)
 			goto out;
+		/* This is for making all *used* pages to be on LRU. */
+		lru_add_drain_all();
 		for_each_node_state(node, N_POSSIBLE)
 			for (zid = 0; zid < MAX_NR_ZONES; zid++) {
 				struct mem_cgroup_per_zone *mz;
+				enum lru_list l;
 				mz = mem_cgroup_zoneinfo(mem, node, zid);
-				/* drop all page_cgroup in active_list */
-				mem_cgroup_force_empty_list(mem, mz, 1);
-				/* drop all page_cgroup in inactive_list */
-				mem_cgroup_force_empty_list(mem, mz, 0);
+				for_each_lru(l)
+					mem_cgroup_force_empty_list(mem, mz, l);
 			}
+		cond_resched();
 	}
 	ret = 0;
 out:
@@ -1012,14 +962,27 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 	}
 	/* showing # of active pages */
 	{
-		unsigned long active, inactive;
-
-		inactive = mem_cgroup_get_all_zonestat(mem_cont,
-						MEM_CGROUP_ZSTAT_INACTIVE);
-		active = mem_cgroup_get_all_zonestat(mem_cont,
-						MEM_CGROUP_ZSTAT_ACTIVE);
-		cb->fill(cb, "active", (active) * PAGE_SIZE);
-		cb->fill(cb, "inactive", (inactive) * PAGE_SIZE);
+		unsigned long active_anon, inactive_anon;
+		unsigned long active_file, inactive_file;
+		unsigned long unevictable;
+
+		inactive_anon = mem_cgroup_get_all_zonestat(mem_cont,
+						LRU_INACTIVE_ANON);
+		active_anon = mem_cgroup_get_all_zonestat(mem_cont,
+						LRU_ACTIVE_ANON);
+		inactive_file = mem_cgroup_get_all_zonestat(mem_cont,
+						LRU_INACTIVE_FILE);
+		active_file = mem_cgroup_get_all_zonestat(mem_cont,
+						LRU_ACTIVE_FILE);
+		unevictable = mem_cgroup_get_all_zonestat(mem_cont,
+							LRU_UNEVICTABLE);
+
+		cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE);
+		cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE);
+		cb->fill(cb, "active_file", (active_file) * PAGE_SIZE);
+		cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE);
+		cb->fill(cb, "unevictable", unevictable * PAGE_SIZE);
+
 	}
 	return 0;
 }
@@ -1062,6 +1025,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 {
 	struct mem_cgroup_per_node *pn;
 	struct mem_cgroup_per_zone *mz;
+	enum lru_list l;
 	int zone, tmp = node;
 	/*
 	 * This routine is called against possible nodes.
@@ -1082,9 +1046,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 		mz = &pn->zoneinfo[zone];
-		INIT_LIST_HEAD(&mz->active_list);
-		INIT_LIST_HEAD(&mz->inactive_list);
 		spin_lock_init(&mz->lru_lock);
+		for_each_lru(l)
+			INIT_LIST_HEAD(&mz->lists[l]);
 	}
 	return 0;
 }
@@ -1124,8 +1088,8 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	int node;
 
 	if (unlikely((cont->parent) == NULL)) {
+		page_cgroup_init();
 		mem = &init_mem_cgroup;
-		page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC);
 	} else {
 		mem = mem_cgroup_alloc();
 		if (!mem)
diff --git a/mm/memory.c b/mm/memory.c
index 1002f473f49..164951c4730 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1129,12 +1129,17 @@ static inline int use_zero_page(struct vm_area_struct *vma)
 	return !vma->vm_ops || !vma->vm_ops->fault;
 }
 
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, int len, int write, int force,
+
+
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, int len, int flags,
 		struct page **pages, struct vm_area_struct **vmas)
 {
 	int i;
-	unsigned int vm_flags;
+	unsigned int vm_flags = 0;
+	int write = !!(flags & GUP_FLAGS_WRITE);
+	int force = !!(flags & GUP_FLAGS_FORCE);
+	int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 
 	if (len <= 0)
 		return 0;
@@ -1158,7 +1163,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			pud_t *pud;
 			pmd_t *pmd;
 			pte_t *pte;
-			if (write) /* user gate pages are read-only */
+
+			/* user gate pages are read-only */
+			if (!ignore && write)
 				return i ? : -EFAULT;
 			if (pg > TASK_SIZE)
 				pgd = pgd_offset_k(pg);
@@ -1190,8 +1197,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			continue;
 		}
 
-		if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
-				|| !(vm_flags & vma->vm_flags))
+		if (!vma ||
+		    (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
+		    (!ignore && !(vm_flags & vma->vm_flags)))
 			return i ? : -EFAULT;
 
 		if (is_vm_hugetlb_page(vma)) {
@@ -1266,6 +1274,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	} while (len);
 	return i;
 }
+
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, int len, int write, int force,
+		struct page **pages, struct vm_area_struct **vmas)
+{
+	int flags = 0;
+
+	if (write)
+		flags |= GUP_FLAGS_WRITE;
+	if (force)
+		flags |= GUP_FLAGS_FORCE;
+
+	return __get_user_pages(tsk, mm,
+				start, len, flags,
+				pages, vmas);
+}
+
 EXPORT_SYMBOL(get_user_pages);
 
 pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -1296,18 +1321,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 	pte_t *pte;
 	spinlock_t *ptl;
 
-	retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
-	if (retval)
-		goto out;
-
 	retval = -EINVAL;
 	if (PageAnon(page))
-		goto out_uncharge;
+		goto out;
 	retval = -ENOMEM;
 	flush_dcache_page(page);
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
-		goto out_uncharge;
+		goto out;
 	retval = -EBUSY;
 	if (!pte_none(*pte))
 		goto out_unlock;
@@ -1323,8 +1344,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 	return retval;
 out_unlock:
 	pte_unmap_unlock(pte, ptl);
-out_uncharge:
-	mem_cgroup_uncharge_page(page);
 out:
 	return retval;
 }
@@ -1858,6 +1877,15 @@ gotten:
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (!new_page)
 		goto oom;
+	/*
+	 * Don't let another task, with possibly unlocked vma,
+	 * keep the mlocked page.
+	 */
+	if (vma->vm_flags & VM_LOCKED) {
+		lock_page(old_page);	/* for LRU manipulation */
+		clear_page_mlock(old_page);
+		unlock_page(old_page);
+	}
 	cow_user_page(new_page, old_page, address, vma);
 	__SetPageUptodate(new_page);
 
@@ -1886,11 +1914,13 @@ gotten:
 		 * thread doing COW.
 		 */
 		ptep_clear_flush_notify(vma, address, page_table);
-		set_pte_at(mm, address, page_table, entry);
-		update_mmu_cache(vma, address, entry);
-		lru_cache_add_active(new_page);
+		SetPageSwapBacked(new_page);
+		lru_cache_add_active_or_unevictable(new_page, vma);
 		page_add_new_anon_rmap(new_page, vma, address);
 
+//TODO:  is this safe?  do_anonymous_page() does it this way.
+		set_pte_at(mm, address, page_table, entry);
+		update_mmu_cache(vma, address, entry);
 		if (old_page) {
 			/*
 			 * Only after switching the pte to the new page may
@@ -2288,16 +2318,17 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		count_vm_event(PGMAJFAULT);
 	}
 
+	mark_page_accessed(page);
+
+	lock_page(page);
+	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+
 	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
-		delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 		ret = VM_FAULT_OOM;
+		unlock_page(page);
 		goto out;
 	}
 
-	mark_page_accessed(page);
-	lock_page(page);
-	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-
 	/*
 	 * Back out if somebody else already faulted in this pte.
 	 */
@@ -2324,7 +2355,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page_add_anon_rmap(page, vma, address);
 
 	swap_free(entry);
-	if (vm_swap_full())
+	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
 		remove_exclusive_swap_page(page);
 	unlock_page(page);
 
@@ -2382,7 +2413,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pte_none(*page_table))
 		goto release;
 	inc_mm_counter(mm, anon_rss);
-	lru_cache_add_active(page);
+	SetPageSwapBacked(page);
+	lru_cache_add_active_or_unevictable(page, vma);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
 
@@ -2423,6 +2455,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page;
 	pte_t entry;
 	int anon = 0;
+	int charged = 0;
 	struct page *dirty_page = NULL;
 	struct vm_fault vmf;
 	int ret;
@@ -2463,6 +2496,18 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 				ret = VM_FAULT_OOM;
 				goto out;
 			}
+			if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+				ret = VM_FAULT_OOM;
+				page_cache_release(page);
+				goto out;
+			}
+			charged = 1;
+			/*
+			 * Don't let another task, with possibly unlocked vma,
+			 * keep the mlocked page.
+			 */
+			if (vma->vm_flags & VM_LOCKED)
+				clear_page_mlock(vmf.page);
 			copy_user_highpage(page, vmf.page, address, vma);
 			__SetPageUptodate(page);
 		} else {
@@ -2497,11 +2542,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	}
 
-	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
-		ret = VM_FAULT_OOM;
-		goto out;
-	}
-
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 
 	/*
@@ -2520,11 +2560,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
-                        inc_mm_counter(mm, anon_rss);
-                        lru_cache_add_active(page);
-                        page_add_new_anon_rmap(page, vma, address);
+			inc_mm_counter(mm, anon_rss);
+			SetPageSwapBacked(page);
+			lru_cache_add_active_or_unevictable(page, vma);
+			page_add_new_anon_rmap(page, vma, address);
 		} else {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(page);
@@ -2533,11 +2573,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 				get_page(dirty_page);
 			}
 		}
+//TODO:  is this safe?  do_anonymous_page() does it this way.
+		set_pte_at(mm, address, page_table, entry);
 
 		/* no need to invalidate: a not-present page won't be cached */
 		update_mmu_cache(vma, address, entry);
 	} else {
-		mem_cgroup_uncharge_page(page);
+		if (charged)
+			mem_cgroup_uncharge_page(page);
 		if (anon)
 			page_cache_release(page);
 		else
@@ -2772,19 +2815,9 @@ int make_pages_present(unsigned long addr, unsigned long end)
 	len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
 	ret = get_user_pages(current, current->mm, addr,
 			len, write, 0, NULL, NULL);
-	if (ret < 0) {
-		/*
-		   SUS require strange return value to mlock
-		    - invalid addr generate to ENOMEM.
-		    - out of memory should generate EAGAIN.
-		*/
-		if (ret == -EFAULT)
-			ret = -ENOMEM;
-		else if (ret == -ENOMEM)
-			ret = -EAGAIN;
+	if (ret < 0)
 		return ret;
-	}
-	return ret == len ? 0 : -ENOMEM;
+	return ret == len ? 0 : -EFAULT;
 }
 
 #if !defined(__HAVE_ARCH_GATE_AREA)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 89fee2dcb03..6837a101437 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/migrate.h>
 #include <linux/page-isolation.h>
+#include <linux/pfn.h>
 
 #include <asm/tlbflush.h>
 
@@ -323,11 +324,11 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
 	BUG_ON(nr_pages % PAGES_PER_SECTION);
 
-	release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
-
 	sections_to_remove = nr_pages / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+		release_mem_region(pfn << PAGE_SHIFT,
+				   PAGES_PER_SECTION << PAGE_SHIFT);
 		ret = __remove_section(zone, __pfn_to_section(pfn));
 		if (ret)
 			break;
@@ -657,8 +658,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		 * We can skip free pages. And we can only deal with pages on
 		 * LRU.
 		 */
-		ret = isolate_lru_page(page, &source);
+		ret = isolate_lru_page(page);
 		if (!ret) { /* Success */
+			list_add_tail(&page->lru, &source);
 			move_pages--;
 		} else {
 			/* Becasue we don't have big zone->lock. we should
@@ -849,10 +851,19 @@ failed_removal:
 
 	return ret;
 }
+
+int remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn, end_pfn;
+
+	start_pfn = PFN_DOWN(start);
+	end_pfn = start_pfn + PFN_DOWN(size);
+	return offline_pages(start_pfn, end_pfn, 120 * HZ);
+}
 #else
 int remove_memory(u64 start, u64 size)
 {
 	return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(remove_memory);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
+EXPORT_SYMBOL_GPL(remove_memory);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83369058ec1..36f42573a33 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -93,6 +93,8 @@
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 /* Internal flags */
 #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0)	/* Skip checks for continuous vmas */
 #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1)		/* Invert check for nodemask */
@@ -762,8 +764,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
 	/*
 	 * Avoid migrating a page that is shared with others.
 	 */
-	if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)
-		isolate_lru_page(page, pagelist);
+	if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
+		if (!isolate_lru_page(page)) {
+			list_add_tail(&page->lru, pagelist);
+		}
+	}
 }
 
 static struct page *new_node_page(struct page *page, unsigned long node, int **x)
@@ -2197,7 +2202,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
 	if (PageSwapCache(page))
 		md->swapcache++;
 
-	if (PageActive(page))
+	if (PageActive(page) || PageUnevictable(page))
 		md->active++;
 
 	if (PageWriteback(page))
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a80136b23b..6602941bfab 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -37,36 +37,6 @@
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
 
 /*
- * Isolate one page from the LRU lists. If successful put it onto
- * the indicated list with elevated page count.
- *
- * Result:
- *  -EBUSY: page not on LRU list
- *  0: page removed from LRU list and added to the specified list.
- */
-int isolate_lru_page(struct page *page, struct list_head *pagelist)
-{
-	int ret = -EBUSY;
-
-	if (PageLRU(page)) {
-		struct zone *zone = page_zone(page);
-
-		spin_lock_irq(&zone->lru_lock);
-		if (PageLRU(page) && get_page_unless_zero(page)) {
-			ret = 0;
-			ClearPageLRU(page);
-			if (PageActive(page))
-				del_page_from_active_list(zone, page);
-			else
-				del_page_from_inactive_list(zone, page);
-			list_add_tail(&page->lru, pagelist);
-		}
-		spin_unlock_irq(&zone->lru_lock);
-	}
-	return ret;
-}
-
-/*
  * migrate_prep() needs to be called before we start compiling a list of pages
  * to be migrated using isolate_lru_page().
  */
@@ -83,23 +53,9 @@ int migrate_prep(void)
 	return 0;
 }
 
-static inline void move_to_lru(struct page *page)
-{
-	if (PageActive(page)) {
-		/*
-		 * lru_cache_add_active checks that
-		 * the PG_active bit is off.
-		 */
-		ClearPageActive(page);
-		lru_cache_add_active(page);
-	} else {
-		lru_cache_add(page);
-	}
-	put_page(page);
-}
-
 /*
- * Add isolated pages on the list back to the LRU.
+ * Add isolated pages on the list back to the LRU under page lock
+ * to avoid leaking evictable pages back onto unevictable list.
  *
  * returns the number of pages put back.
  */
@@ -111,7 +67,7 @@ int putback_lru_pages(struct list_head *l)
 
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
-		move_to_lru(page);
+		putback_lru_page(page);
 		count++;
 	}
 	return count;
@@ -374,8 +330,6 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
 
 	spin_unlock_irq(&mapping->tree_lock);
-	if (!PageSwapCache(newpage))
-		mem_cgroup_uncharge_cache_page(page);
 
 	return 0;
 }
@@ -385,6 +339,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
  */
 static void migrate_page_copy(struct page *newpage, struct page *page)
 {
+	int anon;
+
 	copy_highpage(newpage, page);
 
 	if (PageError(page))
@@ -393,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
 		SetPageReferenced(newpage);
 	if (PageUptodate(page))
 		SetPageUptodate(newpage);
-	if (PageActive(page))
+	if (TestClearPageActive(page)) {
+		VM_BUG_ON(PageUnevictable(page));
 		SetPageActive(newpage);
+	} else
+		unevictable_migrate_page(newpage, page);
 	if (PageChecked(page))
 		SetPageChecked(newpage);
 	if (PageMappedToDisk(page))
@@ -412,14 +371,20 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
 		__set_page_dirty_nobuffers(newpage);
  	}
 
+	mlock_migrate_page(newpage, page);
+
 #ifdef CONFIG_SWAP
 	ClearPageSwapCache(page);
 #endif
-	ClearPageActive(page);
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
+	/* page->mapping contains a flag for PageAnon() */
+	anon = PageAnon(page);
 	page->mapping = NULL;
 
+	if (!anon) /* This page was removed from radix-tree. */
+		mem_cgroup_uncharge_cache_page(page);
+
 	/*
 	 * If any waiters have accumulated on the new page then
 	 * wake them up.
@@ -594,6 +559,10 @@ static int fallback_migrate_page(struct address_space *mapping,
  *
  * The new page will have replaced the old page if this function
  * is successful.
+ *
+ * Return value:
+ *   < 0 - error code
+ *  == 0 - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page)
 {
@@ -611,6 +580,8 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 	/* Prepare mapping for the new page.*/
 	newpage->index = page->index;
 	newpage->mapping = page->mapping;
+	if (PageSwapBacked(page))
+		SetPageSwapBacked(newpage);
 
 	mapping = page_mapping(page);
 	if (!mapping)
@@ -654,9 +625,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	if (!newpage)
 		return -ENOMEM;
 
-	if (page_count(page) == 1)
+	if (page_count(page) == 1) {
 		/* page was freed from under us. So we are done. */
 		goto move_newpage;
+	}
 
 	charge = mem_cgroup_prepare_migration(page, newpage);
 	if (charge == -ENOMEM) {
@@ -730,7 +702,6 @@ rcu_unlock:
 		rcu_read_unlock();
 
 unlock:
-
 	unlock_page(page);
 
 	if (rc != -EAGAIN) {
@@ -741,17 +712,19 @@ unlock:
  		 * restored.
  		 */
  		list_del(&page->lru);
- 		move_to_lru(page);
+		putback_lru_page(page);
 	}
 
 move_newpage:
 	if (!charge)
 		mem_cgroup_end_migration(newpage);
+
 	/*
 	 * Move the new page to the LRU. If migration was not successful
 	 * then this will free the page.
 	 */
-	move_to_lru(newpage);
+	putback_lru_page(newpage);
+
 	if (result) {
 		if (rc)
 			*result = rc;
@@ -858,9 +831,11 @@ static struct page *new_page_node(struct page *p, unsigned long private,
  * Move a set of pages as indicated in the pm array. The addr
  * field must be set to the virtual address of the page to be moved
  * and the node number must contain a valid target node.
+ * The pm array ends with node = MAX_NUMNODES.
  */
-static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
-				int migrate_all)
+static int do_move_page_to_node_array(struct mm_struct *mm,
+				      struct page_to_node *pm,
+				      int migrate_all)
 {
 	int err;
 	struct page_to_node *pp;
@@ -914,7 +889,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
 				!migrate_all)
 			goto put_and_set;
 
-		err = isolate_lru_page(page, &pagelist);
+		err = isolate_lru_page(page);
+		if (!err)
+			list_add_tail(&page->lru, &pagelist);
 put_and_set:
 		/*
 		 * Either remove the duplicate refcount from
@@ -926,36 +903,118 @@ set_status:
 		pp->status = err;
 	}
 
+	err = 0;
 	if (!list_empty(&pagelist))
 		err = migrate_pages(&pagelist, new_page_node,
 				(unsigned long)pm);
-	else
-		err = -ENOENT;
 
 	up_read(&mm->mmap_sem);
 	return err;
 }
 
 /*
- * Determine the nodes of a list of pages. The addr in the pm array
- * must have been set to the virtual address of which we want to determine
- * the node number.
+ * Migrate an array of page address onto an array of nodes and fill
+ * the corresponding array of status.
  */
-static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
+static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
+			 unsigned long nr_pages,
+			 const void __user * __user *pages,
+			 const int __user *nodes,
+			 int __user *status, int flags)
 {
+	struct page_to_node *pm = NULL;
+	nodemask_t task_nodes;
+	int err = 0;
+	int i;
+
+	task_nodes = cpuset_mems_allowed(task);
+
+	/* Limit nr_pages so that the multiplication may not overflow */
+	if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
+		err = -E2BIG;
+		goto out;
+	}
+
+	pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
+	if (!pm) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Get parameters from user space and initialize the pm
+	 * array. Return various errors if the user did something wrong.
+	 */
+	for (i = 0; i < nr_pages; i++) {
+		const void __user *p;
+
+		err = -EFAULT;
+		if (get_user(p, pages + i))
+			goto out_pm;
+
+		pm[i].addr = (unsigned long)p;
+		if (nodes) {
+			int node;
+
+			if (get_user(node, nodes + i))
+				goto out_pm;
+
+			err = -ENODEV;
+			if (!node_state(node, N_HIGH_MEMORY))
+				goto out_pm;
+
+			err = -EACCES;
+			if (!node_isset(node, task_nodes))
+				goto out_pm;
+
+			pm[i].node = node;
+		} else
+			pm[i].node = 0;	/* anything to not match MAX_NUMNODES */
+	}
+	/* End marker */
+	pm[nr_pages].node = MAX_NUMNODES;
+
+	err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
+	if (err >= 0)
+		/* Return status information */
+		for (i = 0; i < nr_pages; i++)
+			if (put_user(pm[i].status, status + i))
+				err = -EFAULT;
+
+out_pm:
+	vfree(pm);
+out:
+	return err;
+}
+
+/*
+ * Determine the nodes of an array of pages and store it in an array of status.
+ */
+static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
+			 const void __user * __user *pages,
+			 int __user *status)
+{
+	unsigned long i;
+	int err;
+
 	down_read(&mm->mmap_sem);
 
-	for ( ; pm->node != MAX_NUMNODES; pm++) {
+	for (i = 0; i < nr_pages; i++) {
+		const void __user *p;
+		unsigned long addr;
 		struct vm_area_struct *vma;
 		struct page *page;
-		int err;
 
 		err = -EFAULT;
-		vma = find_vma(mm, pm->addr);
+		if (get_user(p, pages+i))
+			goto out;
+		addr = (unsigned long) p;
+
+		vma = find_vma(mm, addr);
 		if (!vma)
 			goto set_status;
 
-		page = follow_page(vma, pm->addr, 0);
+		page = follow_page(vma, addr, 0);
 
 		err = PTR_ERR(page);
 		if (IS_ERR(page))
@@ -968,11 +1027,13 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
 
 		err = page_to_nid(page);
 set_status:
-		pm->status = err;
+		put_user(err, status+i);
 	}
+	err = 0;
 
+out:
 	up_read(&mm->mmap_sem);
-	return 0;
+	return err;
 }
 
 /*
@@ -984,12 +1045,9 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 			const int __user *nodes,
 			int __user *status, int flags)
 {
-	int err = 0;
-	int i;
 	struct task_struct *task;
-	nodemask_t task_nodes;
 	struct mm_struct *mm;
-	struct page_to_node *pm = NULL;
+	int err;
 
 	/* Check flags */
 	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1021,75 +1079,21 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 	    (current->uid != task->suid) && (current->uid != task->uid) &&
 	    !capable(CAP_SYS_NICE)) {
 		err = -EPERM;
-		goto out2;
+		goto out;
 	}
 
  	err = security_task_movememory(task);
  	if (err)
- 		goto out2;
-
-
-	task_nodes = cpuset_mems_allowed(task);
-
-	/* Limit nr_pages so that the multiplication may not overflow */
-	if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
-		err = -E2BIG;
-		goto out2;
-	}
-
-	pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
-	if (!pm) {
-		err = -ENOMEM;
-		goto out2;
-	}
-
-	/*
-	 * Get parameters from user space and initialize the pm
-	 * array. Return various errors if the user did something wrong.
-	 */
-	for (i = 0; i < nr_pages; i++) {
-		const void __user *p;
-
-		err = -EFAULT;
-		if (get_user(p, pages + i))
-			goto out;
-
-		pm[i].addr = (unsigned long)p;
-		if (nodes) {
-			int node;
-
-			if (get_user(node, nodes + i))
-				goto out;
-
-			err = -ENODEV;
-			if (!node_state(node, N_HIGH_MEMORY))
-				goto out;
-
-			err = -EACCES;
-			if (!node_isset(node, task_nodes))
-				goto out;
+		goto out;
 
-			pm[i].node = node;
-		} else
-			pm[i].node = 0;	/* anything to not match MAX_NUMNODES */
+	if (nodes) {
+		err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
+				    flags);
+	} else {
+		err = do_pages_stat(mm, nr_pages, pages, status);
 	}
-	/* End marker */
-	pm[nr_pages].node = MAX_NUMNODES;
-
-	if (nodes)
-		err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
-	else
-		err = do_pages_stat(mm, pm);
-
-	if (err >= 0)
-		/* Return status information */
-		for (i = 0; i < nr_pages; i++)
-			if (put_user(pm[i].status, status + i))
-				err = -EFAULT;
 
 out:
-	vfree(pm);
-out2:
 	mmput(mm);
 	return err;
 }
diff --git a/mm/mlock.c b/mm/mlock.c
index 01fbe93eff5..008ea70b7af 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -8,10 +8,18 @@
 #include <linux/capability.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagemap.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/rmap.h>
+#include <linux/mmzone.h>
+#include <linux/hugetlb.h>
+
+#include "internal.h"
 
 int can_do_mlock(void)
 {
@@ -23,17 +31,381 @@ int can_do_mlock(void)
 }
 EXPORT_SYMBOL(can_do_mlock);
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * Mlocked pages are marked with PageMlocked() flag for efficient testing
+ * in vmscan and, possibly, the fault path; and to support semi-accurate
+ * statistics.
+ *
+ * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
+ * be placed on the LRU "unevictable" list, rather than the [in]active lists.
+ * The unevictable list is an LRU sibling list to the [in]active lists.
+ * PageUnevictable is set to indicate the unevictable state.
+ *
+ * When lazy mlocking via vmscan, it is important to ensure that the
+ * vma's VM_LOCKED status is not concurrently being modified, otherwise we
+ * may have mlocked a page that is being munlocked. So lazy mlock must take
+ * the mmap_sem for read, and verify that the vma really is locked
+ * (see mm/rmap.c).
+ */
+
+/*
+ *  LRU accounting for clear_page_mlock()
+ */
+void __clear_page_mlock(struct page *page)
+{
+	VM_BUG_ON(!PageLocked(page));
+
+	if (!page->mapping) {	/* truncated ? */
+		return;
+	}
+
+	dec_zone_page_state(page, NR_MLOCK);
+	count_vm_event(UNEVICTABLE_PGCLEARED);
+	if (!isolate_lru_page(page)) {
+		putback_lru_page(page);
+	} else {
+		/*
+		 * Page not on the LRU yet.  Flush all pagevecs and retry.
+		 */
+		lru_add_drain_all();
+		if (!isolate_lru_page(page))
+			putback_lru_page(page);
+		else if (PageUnevictable(page))
+			count_vm_event(UNEVICTABLE_PGSTRANDED);
+
+	}
+}
+
+/*
+ * Mark page as mlocked if not already.
+ * If page on LRU, isolate and putback to move to unevictable list.
+ */
+void mlock_vma_page(struct page *page)
+{
+	BUG_ON(!PageLocked(page));
+
+	if (!TestSetPageMlocked(page)) {
+		inc_zone_page_state(page, NR_MLOCK);
+		count_vm_event(UNEVICTABLE_PGMLOCKED);
+		if (!isolate_lru_page(page))
+			putback_lru_page(page);
+	}
+}
+
+/*
+ * called from munlock()/munmap() path with page supposedly on the LRU.
+ *
+ * Note:  unlike mlock_vma_page(), we can't just clear the PageMlocked
+ * [in try_to_munlock()] and then attempt to isolate the page.  We must
+ * isolate the page to keep others from messing with its unevictable
+ * and mlocked state while trying to munlock.  However, we pre-clear the
+ * mlocked state anyway as we might lose the isolation race and we might
+ * not get another chance to clear PageMlocked.  If we successfully
+ * isolate the page and try_to_munlock() detects other VM_LOCKED vmas
+ * mapping the page, it will restore the PageMlocked state, unless the page
+ * is mapped in a non-linear vma.  So, we go ahead and SetPageMlocked(),
+ * perhaps redundantly.
+ * If we lose the isolation race, and the page is mapped by other VM_LOCKED
+ * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap()
+ * either of which will restore the PageMlocked state by calling
+ * mlock_vma_page() above, if it can grab the vma's mmap sem.
+ */
+static void munlock_vma_page(struct page *page)
+{
+	BUG_ON(!PageLocked(page));
+
+	if (TestClearPageMlocked(page)) {
+		dec_zone_page_state(page, NR_MLOCK);
+		if (!isolate_lru_page(page)) {
+			int ret = try_to_munlock(page);
+			/*
+			 * did try_to_unlock() succeed or punt?
+			 */
+			if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN)
+				count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+
+			putback_lru_page(page);
+		} else {
+			/*
+			 * We lost the race.  let try_to_unmap() deal
+			 * with it.  At least we get the page state and
+			 * mlock stats right.  However, page is still on
+			 * the noreclaim list.  We'll fix that up when
+			 * the page is eventually freed or we scan the
+			 * noreclaim list.
+			 */
+			if (PageUnevictable(page))
+				count_vm_event(UNEVICTABLE_PGSTRANDED);
+			else
+				count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+		}
+	}
+}
+
+/**
+ * __mlock_vma_pages_range() -  mlock/munlock a range of pages in the vma.
+ * @vma:   target vma
+ * @start: start address
+ * @end:   end address
+ * @mlock: 0 indicate munlock, otherwise mlock.
+ *
+ * If @mlock == 0, unlock an mlocked range;
+ * else mlock the range of pages.  This takes care of making the pages present ,
+ * too.
+ *
+ * return 0 on success, negative error code on error.
+ *
+ * vma->vm_mm->mmap_sem must be held for at least read.
+ */
+static long __mlock_vma_pages_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end,
+				   int mlock)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long addr = start;
+	struct page *pages[16]; /* 16 gives a reasonable batch */
+	int nr_pages = (end - start) / PAGE_SIZE;
+	int ret;
+	int gup_flags = 0;
+
+	VM_BUG_ON(start & ~PAGE_MASK);
+	VM_BUG_ON(end   & ~PAGE_MASK);
+	VM_BUG_ON(start < vma->vm_start);
+	VM_BUG_ON(end   > vma->vm_end);
+	VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
+		  (atomic_read(&mm->mm_users) != 0));
+
+	/*
+	 * mlock:   don't page populate if page has PROT_NONE permission.
+	 * munlock: the pages always do munlock althrough
+	 *          its has PROT_NONE permission.
+	 */
+	if (!mlock)
+		gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS;
+
+	if (vma->vm_flags & VM_WRITE)
+		gup_flags |= GUP_FLAGS_WRITE;
+
+	lru_add_drain_all();	/* push cached pages to LRU */
+
+	while (nr_pages > 0) {
+		int i;
+
+		cond_resched();
+
+		/*
+		 * get_user_pages makes pages present if we are
+		 * setting mlock. and this extra reference count will
+		 * disable migration of this page.  However, page may
+		 * still be truncated out from under us.
+		 */
+		ret = __get_user_pages(current, mm, addr,
+				min_t(int, nr_pages, ARRAY_SIZE(pages)),
+				gup_flags, pages, NULL);
+		/*
+		 * This can happen for, e.g., VM_NONLINEAR regions before
+		 * a page has been allocated and mapped at a given offset,
+		 * or for addresses that map beyond end of a file.
+		 * We'll mlock the the pages if/when they get faulted in.
+		 */
+		if (ret < 0)
+			break;
+		if (ret == 0) {
+			/*
+			 * We know the vma is there, so the only time
+			 * we cannot get a single page should be an
+			 * error (ret < 0) case.
+			 */
+			WARN_ON(1);
+			break;
+		}
+
+		lru_add_drain();	/* push cached pages to LRU */
+
+		for (i = 0; i < ret; i++) {
+			struct page *page = pages[i];
+
+			lock_page(page);
+			/*
+			 * Because we lock page here and migration is blocked
+			 * by the elevated reference, we need only check for
+			 * page truncation (file-cache only).
+			 */
+			if (page->mapping) {
+				if (mlock)
+					mlock_vma_page(page);
+				else
+					munlock_vma_page(page);
+			}
+			unlock_page(page);
+			put_page(page);		/* ref from get_user_pages() */
+
+			/*
+			 * here we assume that get_user_pages() has given us
+			 * a list of virtually contiguous pages.
+			 */
+			addr += PAGE_SIZE;	/* for next get_user_pages() */
+			nr_pages--;
+		}
+		ret = 0;
+	}
+
+	lru_add_drain_all();	/* to update stats */
+
+	return ret;	/* count entire vma as locked_vm */
+}
+
+/*
+ * convert get_user_pages() return value to posix mlock() error
+ */
+static int __mlock_posix_error_return(long retval)
+{
+	if (retval == -EFAULT)
+		retval = -ENOMEM;
+	else if (retval == -ENOMEM)
+		retval = -EAGAIN;
+	return retval;
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+
+/*
+ * Just make pages present if VM_LOCKED.  No-op if unlocking.
+ */
+static long __mlock_vma_pages_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end,
+				   int mlock)
+{
+	if (mlock && (vma->vm_flags & VM_LOCKED))
+		return make_pages_present(start, end);
+	return 0;
+}
+
+static inline int __mlock_posix_error_return(long retval)
+{
+	return 0;
+}
+
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
+/**
+ * mlock_vma_pages_range() - mlock pages in specified vma range.
+ * @vma - the vma containing the specfied address range
+ * @start - starting address in @vma to mlock
+ * @end   - end address [+1] in @vma to mlock
+ *
+ * For mmap()/mremap()/expansion of mlocked vma.
+ *
+ * return 0 on success for "normal" vmas.
+ *
+ * return number of pages [> 0] to be removed from locked_vm on success
+ * of "special" vmas.
+ *
+ * return negative error if vma spanning @start-@range disappears while
+ * mmap semaphore is dropped.  Unlikely?
+ */
+long mlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int nr_pages = (end - start) / PAGE_SIZE;
+	BUG_ON(!(vma->vm_flags & VM_LOCKED));
+
+	/*
+	 * filter unlockable vmas
+	 */
+	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+		goto no_mlock;
+
+	if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+			is_vm_hugetlb_page(vma) ||
+			vma == get_gate_vma(current))) {
+		long error;
+		downgrade_write(&mm->mmap_sem);
+
+		error = __mlock_vma_pages_range(vma, start, end, 1);
+
+		up_read(&mm->mmap_sem);
+		/* vma can change or disappear */
+		down_write(&mm->mmap_sem);
+		vma = find_vma(mm, start);
+		/* non-NULL vma must contain @start, but need to check @end */
+		if (!vma ||  end > vma->vm_end)
+			return -ENOMEM;
+
+		return 0;	/* hide other errors from mmap(), et al */
+	}
+
+	/*
+	 * User mapped kernel pages or huge pages:
+	 * make these pages present to populate the ptes, but
+	 * fall thru' to reset VM_LOCKED--no need to unlock, and
+	 * return nr_pages so these don't get counted against task's
+	 * locked limit.  huge pages are already counted against
+	 * locked vm limit.
+	 */
+	make_pages_present(start, end);
+
+no_mlock:
+	vma->vm_flags &= ~VM_LOCKED;	/* and don't come back! */
+	return nr_pages;		/* error or pages NOT mlocked */
+}
+
+
+/*
+ * munlock_vma_pages_range() - munlock all pages in the vma range.'
+ * @vma - vma containing range to be munlock()ed.
+ * @start - start address in @vma of the range
+ * @end - end of range in @vma.
+ *
+ *  For mremap(), munmap() and exit().
+ *
+ * Called with @vma VM_LOCKED.
+ *
+ * Returns with VM_LOCKED cleared.  Callers must be prepared to
+ * deal with this.
+ *
+ * We don't save and restore VM_LOCKED here because pages are
+ * still on lru.  In unmap path, pages might be scanned by reclaim
+ * and re-mlocked by try_to_{munlock|unmap} before we unmap and
+ * free them.  This will result in freeing mlocked pages.
+ */
+void munlock_vma_pages_range(struct vm_area_struct *vma,
+			   unsigned long start, unsigned long end)
+{
+	vma->vm_flags &= ~VM_LOCKED;
+	__mlock_vma_pages_range(vma, start, end, 0);
+}
+
+/*
+ * mlock_fixup  - handle mlock[all]/munlock[all] requests.
+ *
+ * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
+ * munlock is a no-op.  However, for some special vmas, we go ahead and
+ * populate the ptes via make_pages_present().
+ *
+ * For vmas that pass the filters, merge/split as appropriate.
+ */
 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	unsigned long start, unsigned long end, unsigned int newflags)
 {
-	struct mm_struct * mm = vma->vm_mm;
+	struct mm_struct *mm = vma->vm_mm;
 	pgoff_t pgoff;
-	int pages;
+	int nr_pages;
 	int ret = 0;
-
-	if (newflags == vma->vm_flags) {
-		*prev = vma;
-		goto out;
+	int lock = newflags & VM_LOCKED;
+
+	if (newflags == vma->vm_flags ||
+			(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		goto out;	/* don't set VM_LOCKED,  don't count */
+
+	if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+			is_vm_hugetlb_page(vma) ||
+			vma == get_gate_vma(current)) {
+		if (lock)
+			make_pages_present(start, end);
+		goto out;	/* don't set VM_LOCKED,  don't count */
 	}
 
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
@@ -44,8 +416,6 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		goto success;
 	}
 
-	*prev = vma;
-
 	if (start != vma->vm_start) {
 		ret = split_vma(mm, vma, start, 1);
 		if (ret)
@@ -60,24 +430,61 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 
 success:
 	/*
+	 * Keep track of amount of locked VM.
+	 */
+	nr_pages = (end - start) >> PAGE_SHIFT;
+	if (!lock)
+		nr_pages = -nr_pages;
+	mm->locked_vm += nr_pages;
+
+	/*
 	 * vm_flags is protected by the mmap_sem held in write mode.
 	 * It's okay if try_to_unmap_one unmaps a page just after we
-	 * set VM_LOCKED, make_pages_present below will bring it back.
+	 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
 	 */
 	vma->vm_flags = newflags;
 
-	/*
-	 * Keep track of amount of locked VM.
-	 */
-	pages = (end - start) >> PAGE_SHIFT;
-	if (newflags & VM_LOCKED) {
-		pages = -pages;
-		if (!(newflags & VM_IO))
-			ret = make_pages_present(start, end);
+	if (lock) {
+		/*
+		 * mmap_sem is currently held for write.  Downgrade the write
+		 * lock to a read lock so that other faults, mmap scans, ...
+		 * while we fault in all pages.
+		 */
+		downgrade_write(&mm->mmap_sem);
+
+		ret = __mlock_vma_pages_range(vma, start, end, 1);
+
+		/*
+		 * Need to reacquire mmap sem in write mode, as our callers
+		 * expect this.  We have no support for atomically upgrading
+		 * a sem to write, so we need to check for ranges while sem
+		 * is unlocked.
+		 */
+		up_read(&mm->mmap_sem);
+		/* vma can change or disappear */
+		down_write(&mm->mmap_sem);
+		*prev = find_vma(mm, start);
+		/* non-NULL *prev must contain @start, but need to check @end */
+		if (!(*prev) || end > (*prev)->vm_end)
+			ret = -ENOMEM;
+		else if (ret > 0) {
+			mm->locked_vm -= ret;
+			ret = 0;
+		} else
+			ret = __mlock_posix_error_return(ret); /* translate if needed */
+	} else {
+		/*
+		 * TODO:  for unlocking, pages will already be resident, so
+		 * we don't need to wait for allocations/reclaim/pagein, ...
+		 * However, unlocking a very large region can still take a
+		 * while.  Should we downgrade the semaphore for both lock
+		 * AND unlock ?
+		 */
+		__mlock_vma_pages_range(vma, start, end, 0);
 	}
 
-	mm->locked_vm -= pages;
 out:
+	*prev = vma;
 	return ret;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index e7a5a68a9c2..74f4d158022 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -410,7 +410,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
 }
 
-static inline void __vma_link_file(struct vm_area_struct *vma)
+static void __vma_link_file(struct vm_area_struct *vma)
 {
 	struct file * file;
 
@@ -662,8 +662,6 @@ again:			remove_next = 1 + (end > next->vm_end);
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
-
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
 			struct file *file, unsigned long vm_flags)
 {
@@ -972,6 +970,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
 			return -EPERM;
 		vm_flags |= VM_LOCKED;
 	}
+
 	/* mlock MCL_FUTURE? */
 	if (vm_flags & VM_LOCKED) {
 		unsigned long locked, lock_limit;
@@ -1139,10 +1138,12 @@ munmap_back:
 	 * The VM_SHARED test is necessary because shmem_zero_setup
 	 * will create the file object for a shared anonymous map below.
 	 */
-	if (!file && !(vm_flags & VM_SHARED) &&
-	    vma_merge(mm, prev, addr, addr + len, vm_flags,
-					NULL, NULL, pgoff, NULL))
-		goto out;
+	if (!file && !(vm_flags & VM_SHARED)) {
+		vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+					NULL, NULL, pgoff, NULL);
+		if (vma)
+			goto out;
+	}
 
 	/*
 	 * Determine the object being mapped and call the appropriate
@@ -1224,10 +1225,14 @@ out:
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
-		mm->locked_vm += len >> PAGE_SHIFT;
-		make_pages_present(addr, addr + len);
-	}
-	if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
+		/*
+		 * makes pages present; downgrades, drops, reacquires mmap_sem
+		 */
+		long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
+		if (nr_pages < 0)
+			return nr_pages;	/* vma gone! */
+		mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
+	} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
 		make_pages_present(addr, addr + len);
 	return addr;
 
@@ -1586,7 +1591,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
 #ifndef CONFIG_IA64
-static inline
+static
 #endif
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
@@ -1636,7 +1641,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-static inline int expand_downwards(struct vm_area_struct *vma,
+static int expand_downwards(struct vm_area_struct *vma,
 				   unsigned long address)
 {
 	int error;
@@ -1698,10 +1703,12 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 	vma = find_vma_prev(mm, addr, &prev);
 	if (vma && (vma->vm_start <= addr))
 		return vma;
-	if (!prev || expand_stack(prev, addr))
+	if (expand_stack(prev, addr))
 		return NULL;
-	if (prev->vm_flags & VM_LOCKED)
-		make_pages_present(addr, prev->vm_end);
+	if (prev->vm_flags & VM_LOCKED) {
+		if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
+			return NULL;	/* vma gone! */
+	}
 	return prev;
 }
 #else
@@ -1727,8 +1734,10 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
 	start = vma->vm_start;
 	if (expand_stack(vma, addr))
 		return NULL;
-	if (vma->vm_flags & VM_LOCKED)
-		make_pages_present(addr, start);
+	if (vma->vm_flags & VM_LOCKED) {
+		if (mlock_vma_pages_range(vma, addr, start) < 0)
+			return NULL;	/* vma gone! */
+	}
 	return vma;
 }
 #endif
@@ -1747,8 +1756,6 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
 		long nrpages = vma_pages(vma);
 
 		mm->total_vm -= nrpages;
-		if (vma->vm_flags & VM_LOCKED)
-			mm->locked_vm -= nrpages;
 		vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
 		vma = remove_vma(vma);
 	} while (vma);
@@ -1914,6 +1921,20 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 	vma = prev? prev->vm_next: mm->mmap;
 
 	/*
+	 * unlock any mlock()ed ranges before detaching vmas
+	 */
+	if (mm->locked_vm) {
+		struct vm_area_struct *tmp = vma;
+		while (tmp && tmp->vm_start < end) {
+			if (tmp->vm_flags & VM_LOCKED) {
+				mm->locked_vm -= vma_pages(tmp);
+				munlock_vma_pages_all(tmp);
+			}
+			tmp = tmp->vm_next;
+		}
+	}
+
+	/*
 	 * Remove the vma's, and unmap the actual pages
 	 */
 	detach_vmas_to_be_unmapped(mm, vma, prev, end);
@@ -2025,8 +2046,9 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 		return -ENOMEM;
 
 	/* Can we just expand an old private anonymous mapping? */
-	if (vma_merge(mm, prev, addr, addr + len, flags,
-					NULL, NULL, pgoff, NULL))
+	vma = vma_merge(mm, prev, addr, addr + len, flags,
+					NULL, NULL, pgoff, NULL);
+	if (vma)
 		goto out;
 
 	/*
@@ -2048,8 +2070,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 out:
 	mm->total_vm += len >> PAGE_SHIFT;
 	if (flags & VM_LOCKED) {
-		mm->locked_vm += len >> PAGE_SHIFT;
-		make_pages_present(addr, addr + len);
+		if (!mlock_vma_pages_range(vma, addr, addr + len))
+			mm->locked_vm += (len >> PAGE_SHIFT);
 	}
 	return addr;
 }
@@ -2060,7 +2082,7 @@ EXPORT_SYMBOL(do_brk);
 void exit_mmap(struct mm_struct *mm)
 {
 	struct mmu_gather *tlb;
-	struct vm_area_struct *vma = mm->mmap;
+	struct vm_area_struct *vma;
 	unsigned long nr_accounted = 0;
 	unsigned long end;
 
@@ -2068,6 +2090,15 @@ void exit_mmap(struct mm_struct *mm)
 	arch_exit_mmap(mm);
 	mmu_notifier_release(mm);
 
+	if (mm->locked_vm) {
+		vma = mm->mmap;
+		while (vma) {
+			if (vma->vm_flags & VM_LOCKED)
+				munlock_vma_pages_all(vma);
+			vma = vma->vm_next;
+		}
+	}
+	vma = mm->mmap;
 	lru_add_drain();
 	flush_cache_mm(mm);
 	tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/mremap.c b/mm/mremap.c
index 1a7743923c8..58a2908f42f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -24,6 +24,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
@@ -238,8 +240,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += new_len >> PAGE_SHIFT;
 		if (new_len > old_len)
-			make_pages_present(new_addr + old_len,
-					   new_addr + new_len);
+			mlock_vma_pages_range(new_vma, new_addr + old_len,
+						       new_addr + new_len);
 	}
 
 	return new_addr;
@@ -379,7 +381,7 @@ unsigned long do_mremap(unsigned long addr,
 			vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
 			if (vma->vm_flags & VM_LOCKED) {
 				mm->locked_vm += pages;
-				make_pages_present(addr + old_len,
+				mlock_vma_pages_range(vma, addr + old_len,
 						   addr + new_len);
 			}
 			ret = addr;
diff --git a/mm/nommu.c b/mm/nommu.c
index ed75bc962fb..2696b24f2bb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -34,6 +34,8 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 void *high_memory;
 struct page *mem_map;
 unsigned long max_mapnr;
@@ -128,20 +130,16 @@ unsigned int kobjsize(const void *objp)
 	return PAGE_SIZE << compound_order(page);
 }
 
-/*
- * get a list of pages in an address range belonging to the specified process
- * and indicate the VMA that covers each page
- * - this is potentially dodgy as we may end incrementing the page count of a
- *   slab page or a secondary page from a compound page
- * - don't permit access to VMAs that don't support it, such as I/O mappings
- */
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-	unsigned long start, int len, int write, int force,
-	struct page **pages, struct vm_area_struct **vmas)
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, int len, int flags,
+		struct page **pages, struct vm_area_struct **vmas)
 {
 	struct vm_area_struct *vma;
 	unsigned long vm_flags;
 	int i;
+	int write = !!(flags & GUP_FLAGS_WRITE);
+	int force = !!(flags & GUP_FLAGS_FORCE);
+	int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 
 	/* calculate required read or write permissions.
 	 * - if 'force' is set, we only require the "MAY" flags.
@@ -156,7 +154,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 		/* protect what we can, including chardevs */
 		if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
-		    !(vm_flags & vma->vm_flags))
+		    (!ignore && !(vm_flags & vma->vm_flags)))
 			goto finish_or_fault;
 
 		if (pages) {
@@ -174,6 +172,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 finish_or_fault:
 	return i ? : -EFAULT;
 }
+
+
+/*
+ * get a list of pages in an address range belonging to the specified process
+ * and indicate the VMA that covers each page
+ * - this is potentially dodgy as we may end incrementing the page count of a
+ *   slab page or a secondary page from a compound page
+ * - don't permit access to VMAs that don't support it, such as I/O mappings
+ */
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+	unsigned long start, int len, int write, int force,
+	struct page **pages, struct vm_area_struct **vmas)
+{
+	int flags = 0;
+
+	if (write)
+		flags |= GUP_FLAGS_WRITE;
+	if (force)
+		flags |= GUP_FLAGS_FORCE;
+
+	return __get_user_pages(tsk, mm,
+				start, len, flags,
+				pages, vmas);
+}
 EXPORT_SYMBOL(get_user_pages);
 
 DEFINE_RWLOCK(vmlist_lock);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b40f6d5f8fe..2970e35fd03 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -329,9 +329,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 		struct zone *z =
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
-		x += zone_page_state(z, NR_FREE_PAGES)
-			+ zone_page_state(z, NR_INACTIVE)
-			+ zone_page_state(z, NR_ACTIVE);
+		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -355,9 +353,7 @@ unsigned long determine_dirtyable_memory(void)
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES)
-		+ global_page_state(NR_INACTIVE)
-		+ global_page_state(NR_ACTIVE);
+	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9eb9eb92828..d0a240fbb8b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -44,7 +44,7 @@
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
-#include <linux/memcontrol.h>
+#include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 
 #include <asm/tlbflush.h>
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page)
 
 static void bad_page(struct page *page)
 {
-	void *pc = page_get_page_cgroup(page);
-
 	printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
 		"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
 		current->comm, page, (int)(2*sizeof(unsigned long)),
 		(unsigned long)page->flags, page->mapping,
 		page_mapcount(page), page_count(page));
-	if (pc) {
-		printk(KERN_EMERG "cgroup:%p\n", pc);
-		page_reset_bad_cgroup(page);
-	}
+
 	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
 		KERN_EMERG "Backtrace:\n");
 	dump_stack();
@@ -454,14 +449,16 @@ static inline void __free_one_page(struct page *page,
 
 static inline int free_pages_check(struct page *page)
 {
+	free_page_mlock(page);
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
-		(page_get_page_cgroup(page) != NULL) |
 		(page_count(page) != 0)  |
 		(page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
 		bad_page(page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
+	if (PageSwapBacked(page))
+		__ClearPageSwapBacked(page);
 	/*
 	 * For now, we report if PG_reserved was found set, but do not
 	 * clear it, and do not free the page.  But we shall soon need
@@ -600,7 +597,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
-		(page_get_page_cgroup(page) != NULL) |
 		(page_count(page) != 0)  |
 		(page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
 		bad_page(page);
@@ -614,7 +610,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
 			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
+			1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
+#ifdef CONFIG_UNEVICTABLE_LRU
+			| 1 << PG_mlocked
+#endif
+			);
 	set_page_private(page, 0);
 	set_page_refcounted(page);
 
@@ -1862,10 +1862,21 @@ void show_free_areas(void)
 		}
 	}
 
-	printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+	printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
+		" inactive_file:%lu"
+//TODO:  check/adjust line lengths
+#ifdef CONFIG_UNEVICTABLE_LRU
+		" unevictable:%lu"
+#endif
+		" dirty:%lu writeback:%lu unstable:%lu\n"
 		" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
-		global_page_state(NR_ACTIVE),
-		global_page_state(NR_INACTIVE),
+		global_page_state(NR_ACTIVE_ANON),
+		global_page_state(NR_ACTIVE_FILE),
+		global_page_state(NR_INACTIVE_ANON),
+		global_page_state(NR_INACTIVE_FILE),
+#ifdef CONFIG_UNEVICTABLE_LRU
+		global_page_state(NR_UNEVICTABLE),
+#endif
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
@@ -1888,8 +1899,13 @@ void show_free_areas(void)
 			" min:%lukB"
 			" low:%lukB"
 			" high:%lukB"
-			" active:%lukB"
-			" inactive:%lukB"
+			" active_anon:%lukB"
+			" inactive_anon:%lukB"
+			" active_file:%lukB"
+			" inactive_file:%lukB"
+#ifdef CONFIG_UNEVICTABLE_LRU
+			" unevictable:%lukB"
+#endif
 			" present:%lukB"
 			" pages_scanned:%lu"
 			" all_unreclaimable? %s"
@@ -1899,8 +1915,13 @@ void show_free_areas(void)
 			K(zone->pages_min),
 			K(zone->pages_low),
 			K(zone->pages_high),
-			K(zone_page_state(zone, NR_ACTIVE)),
-			K(zone_page_state(zone, NR_INACTIVE)),
+			K(zone_page_state(zone, NR_ACTIVE_ANON)),
+			K(zone_page_state(zone, NR_INACTIVE_ANON)),
+			K(zone_page_state(zone, NR_ACTIVE_FILE)),
+			K(zone_page_state(zone, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+			K(zone_page_state(zone, NR_UNEVICTABLE)),
+#endif
 			K(zone->present_pages),
 			zone->pages_scanned,
 			(zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3410,10 +3431,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 	pgdat->nr_zones = 0;
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	pgdat->kswapd_max_order = 0;
+	pgdat_page_cgroup_init(pgdat);
 	
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, memmap_pages;
+		enum lru_list l;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
 		realsize = size - zone_absent_pages_in_node(nid, j,
@@ -3428,8 +3451,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 			PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
 		if (realsize >= memmap_pages) {
 			realsize -= memmap_pages;
-			mminit_dprintk(MMINIT_TRACE, "memmap_init",
-				"%s zone: %lu pages used for memmap\n",
+			printk(KERN_DEBUG
+				"  %s zone: %lu pages used for memmap\n",
 				zone_names[j], memmap_pages);
 		} else
 			printk(KERN_WARNING
@@ -3439,8 +3462,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		/* Account for reserved pages */
 		if (j == 0 && realsize > dma_reserve) {
 			realsize -= dma_reserve;
-			mminit_dprintk(MMINIT_TRACE, "memmap_init",
-					"%s zone: %lu pages reserved\n",
+			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
 					zone_names[0], dma_reserve);
 		}
 
@@ -3465,10 +3487,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone->prev_priority = DEF_PRIORITY;
 
 		zone_pcp_init(zone);
-		INIT_LIST_HEAD(&zone->active_list);
-		INIT_LIST_HEAD(&zone->inactive_list);
-		zone->nr_scan_active = 0;
-		zone->nr_scan_inactive = 0;
+		for_each_lru(l) {
+			INIT_LIST_HEAD(&zone->lru[l].list);
+			zone->lru[l].nr_scan = 0;
+		}
+		zone->recent_rotated[0] = 0;
+		zone->recent_rotated[1] = 0;
+		zone->recent_scanned[0] = 0;
+		zone->recent_scanned[1] = 0;
 		zap_zone_vm_stats(zone);
 		zone->flags = 0;
 		if (!size)
@@ -4210,7 +4236,7 @@ void setup_per_zone_pages_min(void)
 	for_each_zone(zone) {
 		u64 tmp;
 
-		spin_lock_irqsave(&zone->lru_lock, flags);
+		spin_lock_irqsave(&zone->lock, flags);
 		tmp = (u64)pages_min * zone->present_pages;
 		do_div(tmp, lowmem_pages);
 		if (is_highmem(zone)) {
@@ -4242,13 +4268,53 @@ void setup_per_zone_pages_min(void)
 		zone->pages_low   = zone->pages_min + (tmp >> 2);
 		zone->pages_high  = zone->pages_min + (tmp >> 1);
 		setup_zone_migrate_reserve(zone);
-		spin_unlock_irqrestore(&zone->lru_lock, flags);
+		spin_unlock_irqrestore(&zone->lock, flags);
 	}
 
 	/* update totalreserve_pages */
 	calculate_totalreserve_pages();
 }
 
+/**
+ * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
+ *
+ * The inactive anon list should be small enough that the VM never has to
+ * do too much work, but large enough that each inactive page has a chance
+ * to be referenced again before it is swapped out.
+ *
+ * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
+ * INACTIVE_ANON pages on this zone's LRU, maintained by the
+ * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
+ * the anonymous pages are kept on the inactive list.
+ *
+ * total     target    max
+ * memory    ratio     inactive anon
+ * -------------------------------------
+ *   10MB       1         5MB
+ *  100MB       1        50MB
+ *    1GB       3       250MB
+ *   10GB      10       0.9GB
+ *  100GB      31         3GB
+ *    1TB     101        10GB
+ *   10TB     320        32GB
+ */
+void setup_per_zone_inactive_ratio(void)
+{
+	struct zone *zone;
+
+	for_each_zone(zone) {
+		unsigned int gb, ratio;
+
+		/* Zone size in gigabytes */
+		gb = zone->present_pages >> (30 - PAGE_SHIFT);
+		ratio = int_sqrt(10 * gb);
+		if (!ratio)
+			ratio = 1;
+
+		zone->inactive_ratio = ratio;
+	}
+}
+
 /*
  * Initialise min_free_kbytes.
  *
@@ -4286,6 +4352,7 @@ static int __init init_per_zone_pages_min(void)
 		min_free_kbytes = 65536;
 	setup_per_zone_pages_min();
 	setup_per_zone_lowmem_reserve();
+	setup_per_zone_inactive_ratio();
 	return 0;
 }
 module_init(init_per_zone_pages_min)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
new file mode 100644
index 00000000000..5d86550701f
--- /dev/null
+++ b/mm/page_cgroup.c
@@ -0,0 +1,237 @@
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/bit_spinlock.h>
+#include <linux/page_cgroup.h>
+#include <linux/hash.h>
+#include <linux/memory.h>
+
+static void __meminit
+__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
+{
+	pc->flags = 0;
+	pc->mem_cgroup = NULL;
+	pc->page = pfn_to_page(pfn);
+}
+static unsigned long total_usage;
+
+#if !defined(CONFIG_SPARSEMEM)
+
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+	pgdat->node_page_cgroup = NULL;
+}
+
+struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long offset;
+	struct page_cgroup *base;
+
+	base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
+	if (unlikely(!base))
+		return NULL;
+
+	offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
+	return base + offset;
+}
+
+static int __init alloc_node_page_cgroup(int nid)
+{
+	struct page_cgroup *base, *pc;
+	unsigned long table_size;
+	unsigned long start_pfn, nr_pages, index;
+
+	start_pfn = NODE_DATA(nid)->node_start_pfn;
+	nr_pages = NODE_DATA(nid)->node_spanned_pages;
+
+	table_size = sizeof(struct page_cgroup) * nr_pages;
+
+	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	if (!base)
+		return -ENOMEM;
+	for (index = 0; index < nr_pages; index++) {
+		pc = base + index;
+		__init_page_cgroup(pc, start_pfn + index);
+	}
+	NODE_DATA(nid)->node_page_cgroup = base;
+	total_usage += table_size;
+	return 0;
+}
+
+void __init page_cgroup_init(void)
+{
+
+	int nid, fail;
+
+	for_each_online_node(nid)  {
+		fail = alloc_node_page_cgroup(nid);
+		if (fail)
+			goto fail;
+	}
+	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
+	printk(KERN_INFO "please try cgroup_disable=memory option if you"
+	" don't want\n");
+	return;
+fail:
+	printk(KERN_CRIT "allocation of page_cgroup was failed.\n");
+	printk(KERN_CRIT "please try cgroup_disable=memory boot option\n");
+	panic("Out of memory");
+}
+
+#else /* CONFIG_FLAT_NODE_MEM_MAP */
+
+struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	struct mem_section *section = __pfn_to_section(pfn);
+
+	return section->page_cgroup + pfn;
+}
+
+int __meminit init_section_page_cgroup(unsigned long pfn)
+{
+	struct mem_section *section;
+	struct page_cgroup *base, *pc;
+	unsigned long table_size;
+	int nid, index;
+
+	section = __pfn_to_section(pfn);
+
+	if (section->page_cgroup)
+		return 0;
+
+	nid = page_to_nid(pfn_to_page(pfn));
+
+	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+	base = kmalloc_node(table_size, GFP_KERNEL, nid);
+	if (!base)
+		base = vmalloc_node(table_size, nid);
+
+	if (!base) {
+		printk(KERN_ERR "page cgroup allocation failure\n");
+		return -ENOMEM;
+	}
+
+	for (index = 0; index < PAGES_PER_SECTION; index++) {
+		pc = base + index;
+		__init_page_cgroup(pc, pfn + index);
+	}
+
+	section = __pfn_to_section(pfn);
+	section->page_cgroup = base - pfn;
+	total_usage += table_size;
+	return 0;
+}
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __free_page_cgroup(unsigned long pfn)
+{
+	struct mem_section *ms;
+	struct page_cgroup *base;
+
+	ms = __pfn_to_section(pfn);
+	if (!ms || !ms->page_cgroup)
+		return;
+	base = ms->page_cgroup + pfn;
+	ms->page_cgroup = NULL;
+	if (is_vmalloc_addr(base))
+		vfree(base);
+	else
+		kfree(base);
+}
+
+int online_page_cgroup(unsigned long start_pfn,
+			unsigned long nr_pages,
+			int nid)
+{
+	unsigned long start, end, pfn;
+	int fail = 0;
+
+	start = start_pfn & (PAGES_PER_SECTION - 1);
+	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
+
+	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
+		if (!pfn_present(pfn))
+			continue;
+		fail = init_section_page_cgroup(pfn);
+	}
+	if (!fail)
+		return 0;
+
+	/* rollback */
+	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+		__free_page_cgroup(pfn);
+
+	return -ENOMEM;
+}
+
+int offline_page_cgroup(unsigned long start_pfn,
+		unsigned long nr_pages, int nid)
+{
+	unsigned long start, end, pfn;
+
+	start = start_pfn & (PAGES_PER_SECTION - 1);
+	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
+
+	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+		__free_page_cgroup(pfn);
+	return 0;
+
+}
+
+static int page_cgroup_callback(struct notifier_block *self,
+			       unsigned long action, void *arg)
+{
+	struct memory_notify *mn = arg;
+	int ret = 0;
+	switch (action) {
+	case MEM_GOING_ONLINE:
+		ret = online_page_cgroup(mn->start_pfn,
+				   mn->nr_pages, mn->status_change_nid);
+		break;
+	case MEM_CANCEL_ONLINE:
+	case MEM_OFFLINE:
+		offline_page_cgroup(mn->start_pfn,
+				mn->nr_pages, mn->status_change_nid);
+		break;
+	case MEM_GOING_OFFLINE:
+		break;
+	case MEM_ONLINE:
+	case MEM_CANCEL_OFFLINE:
+		break;
+	}
+	ret = notifier_from_errno(ret);
+	return ret;
+}
+
+#endif
+
+void __init page_cgroup_init(void)
+{
+	unsigned long pfn;
+	int fail = 0;
+
+	for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
+		if (!pfn_present(pfn))
+			continue;
+		fail = init_section_page_cgroup(pfn);
+	}
+	if (fail) {
+		printk(KERN_CRIT "try cgroup_disable=memory boot option\n");
+		panic("Out of memory");
+	} else {
+		hotplug_memory_notifier(page_cgroup_callback, 0);
+	}
+	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
+	printk(KERN_INFO "please try cgroup_disable=memory option if you don't"
+	" want\n");
+}
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+	return;
+}
+
+#endif
diff --git a/mm/readahead.c b/mm/readahead.c
index 6cbd9a72fde..bec83c15a78 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -229,7 +229,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
  */
 unsigned long max_sane_readahead(unsigned long nr)
 {
-	return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
+	return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
 		+ node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 0383acfcb06..10993942d6c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -53,9 +53,47 @@
 
 #include <asm/tlbflush.h>
 
-struct kmem_cache *anon_vma_cachep;
+#include "internal.h"
 
-/* This must be called under the mmap_sem. */
+static struct kmem_cache *anon_vma_cachep;
+
+static inline struct anon_vma *anon_vma_alloc(void)
+{
+	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+}
+
+static inline void anon_vma_free(struct anon_vma *anon_vma)
+{
+	kmem_cache_free(anon_vma_cachep, anon_vma);
+}
+
+/**
+ * anon_vma_prepare - attach an anon_vma to a memory region
+ * @vma: the memory region in question
+ *
+ * This makes sure the memory mapping described by 'vma' has
+ * an 'anon_vma' attached to it, so that we can associate the
+ * anonymous pages mapped into it with that anon_vma.
+ *
+ * The common case will be that we already have one, but if
+ * if not we either need to find an adjacent mapping that we
+ * can re-use the anon_vma from (very common when the only
+ * reason for splitting a vma has been mprotect()), or we
+ * allocate a new one.
+ *
+ * Anon-vma allocations are very subtle, because we may have
+ * optimistically looked up an anon_vma in page_lock_anon_vma()
+ * and that may actually touch the spinlock even in the newly
+ * allocated vma (it depends on RCU to make sure that the
+ * anon_vma isn't actually destroyed).
+ *
+ * As a result, we need to do proper anon_vma locking even
+ * for the new allocation. At the same time, we do not want
+ * to do any locking for the common case of already having
+ * an anon_vma.
+ *
+ * This must be called with the mmap_sem held for reading.
+ */
 int anon_vma_prepare(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
@@ -63,20 +101,17 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 	might_sleep();
 	if (unlikely(!anon_vma)) {
 		struct mm_struct *mm = vma->vm_mm;
-		struct anon_vma *allocated, *locked;
+		struct anon_vma *allocated;
 
 		anon_vma = find_mergeable_anon_vma(vma);
-		if (anon_vma) {
-			allocated = NULL;
-			locked = anon_vma;
-			spin_lock(&locked->lock);
-		} else {
+		allocated = NULL;
+		if (!anon_vma) {
 			anon_vma = anon_vma_alloc();
 			if (unlikely(!anon_vma))
 				return -ENOMEM;
 			allocated = anon_vma;
-			locked = NULL;
 		}
+		spin_lock(&anon_vma->lock);
 
 		/* page_table_lock to protect against threads */
 		spin_lock(&mm->page_table_lock);
@@ -87,8 +122,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 		}
 		spin_unlock(&mm->page_table_lock);
 
-		if (locked)
-			spin_unlock(&locked->lock);
+		spin_unlock(&anon_vma->lock);
 		if (unlikely(allocated))
 			anon_vma_free(allocated);
 	}
@@ -157,7 +191,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma;
 	unsigned long anon_mapping;
@@ -177,7 +211,7 @@ out:
 	return NULL;
 }
 
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
 	spin_unlock(&anon_vma->lock);
 	rcu_read_unlock();
@@ -268,6 +302,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
 	return NULL;
 }
 
+/**
+ * page_mapped_in_vma - check whether a page is really mapped in a VMA
+ * @page: the page to test
+ * @vma: the VMA to test
+ *
+ * Returns 1 if the page is mapped into the page tables of the VMA, 0
+ * if the page is not mapped into the page tables of this VMA.  Only
+ * valid for normal file or anonymous VMAs.
+ */
+static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+{
+	unsigned long address;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	address = vma_address(page, vma);
+	if (address == -EFAULT)		/* out of vma range */
+		return 0;
+	pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
+	if (!pte)			/* the page is not in this mm */
+		return 0;
+	pte_unmap_unlock(pte, ptl);
+
+	return 1;
+}
+
 /*
  * Subfunctions of page_referenced: page_referenced_one called
  * repeatedly from either page_referenced_anon or page_referenced_file.
@@ -289,10 +349,17 @@ static int page_referenced_one(struct page *page,
 	if (!pte)
 		goto out;
 
+	/*
+	 * Don't want to elevate referenced for mlocked page that gets this far,
+	 * in order that it progresses to try_to_unmap and is moved to the
+	 * unevictable list.
+	 */
 	if (vma->vm_flags & VM_LOCKED) {
-		referenced++;
 		*mapcount = 1;	/* break early from loop */
-	} else if (ptep_clear_flush_young_notify(vma, address, pte))
+		goto out_unmap;
+	}
+
+	if (ptep_clear_flush_young_notify(vma, address, pte))
 		referenced++;
 
 	/* Pretend the page is referenced if the task has the
@@ -301,6 +368,7 @@ static int page_referenced_one(struct page *page,
 			rwsem_is_locked(&mm->mmap_sem))
 		referenced++;
 
+out_unmap:
 	(*mapcount)--;
 	pte_unmap_unlock(pte, ptl);
 out:
@@ -390,11 +458,6 @@ static int page_referenced_file(struct page *page,
 		 */
 		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
-		if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
-				  == (VM_LOCKED|VM_MAYSHARE)) {
-			referenced++;
-			break;
-		}
 		referenced += page_referenced_one(page, vma, &mapcount);
 		if (!mapcount)
 			break;
@@ -674,8 +737,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
 			page_clear_dirty(page);
 			set_page_dirty(page);
 		}
-
-		mem_cgroup_uncharge_page(page);
+		if (PageAnon(page))
+			mem_cgroup_uncharge_page(page);
 		__dec_zone_page_state(page,
 			PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
 		/*
@@ -717,11 +780,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 * If it's recently referenced (perhaps page_referenced
 	 * skipped over this mm) then we should reactivate it.
 	 */
-	if (!migration && ((vma->vm_flags & VM_LOCKED) ||
-			(ptep_clear_flush_young_notify(vma, address, pte)))) {
-		ret = SWAP_FAIL;
-		goto out_unmap;
-	}
+	if (!migration) {
+		if (vma->vm_flags & VM_LOCKED) {
+			ret = SWAP_MLOCK;
+			goto out_unmap;
+		}
+		if (ptep_clear_flush_young_notify(vma, address, pte)) {
+			ret = SWAP_FAIL;
+			goto out_unmap;
+		}
+  	}
 
 	/* Nuke the page table entry. */
 	flush_cache_page(vma, address, page_to_pfn(page));
@@ -802,12 +870,17 @@ out:
  * For very sparsely populated VMAs this is a little inefficient - chances are
  * there there won't be many ptes located within the scan cluster.  In this case
  * maybe we could scan further - to the end of the pte page, perhaps.
+ *
+ * Mlocked pages:  check VM_LOCKED under mmap_sem held for read, if we can
+ * acquire it without blocking.  If vma locked, mlock the pages in the cluster,
+ * rather than unmapping them.  If we encounter the "check_page" that vmscan is
+ * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
  */
 #define CLUSTER_SIZE	min(32*PAGE_SIZE, PMD_SIZE)
 #define CLUSTER_MASK	(~(CLUSTER_SIZE - 1))
 
-static void try_to_unmap_cluster(unsigned long cursor,
-	unsigned int *mapcount, struct vm_area_struct *vma)
+static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
+		struct vm_area_struct *vma, struct page *check_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
@@ -819,6 +892,8 @@ static void try_to_unmap_cluster(unsigned long cursor,
 	struct page *page;
 	unsigned long address;
 	unsigned long end;
+	int ret = SWAP_AGAIN;
+	int locked_vma = 0;
 
 	address = (vma->vm_start + cursor) & CLUSTER_MASK;
 	end = address + CLUSTER_SIZE;
@@ -829,15 +904,26 @@ static void try_to_unmap_cluster(unsigned long cursor,
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
-		return;
+		return ret;
 
 	pud = pud_offset(pgd, address);
 	if (!pud_present(*pud))
-		return;
+		return ret;
 
 	pmd = pmd_offset(pud, address);
 	if (!pmd_present(*pmd))
-		return;
+		return ret;
+
+	/*
+	 * MLOCK_PAGES => feature is configured.
+	 * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
+	 * keep the sem while scanning the cluster for mlocking pages.
+	 */
+	if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) {
+		locked_vma = (vma->vm_flags & VM_LOCKED);
+		if (!locked_vma)
+			up_read(&vma->vm_mm->mmap_sem); /* don't need it */
+	}
 
 	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
 
@@ -850,6 +936,13 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		page = vm_normal_page(vma, address, *pte);
 		BUG_ON(!page || PageAnon(page));
 
+		if (locked_vma) {
+			mlock_vma_page(page);   /* no-op if already mlocked */
+			if (page == check_page)
+				ret = SWAP_MLOCK;
+			continue;	/* don't unmap */
+		}
+
 		if (ptep_clear_flush_young_notify(vma, address, pte))
 			continue;
 
@@ -871,39 +964,104 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		(*mapcount)--;
 	}
 	pte_unmap_unlock(pte - 1, ptl);
+	if (locked_vma)
+		up_read(&vma->vm_mm->mmap_sem);
+	return ret;
 }
 
-static int try_to_unmap_anon(struct page *page, int migration)
+/*
+ * common handling for pages mapped in VM_LOCKED vmas
+ */
+static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
+{
+	int mlocked = 0;
+
+	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
+		if (vma->vm_flags & VM_LOCKED) {
+			mlock_vma_page(page);
+			mlocked++;	/* really mlocked the page */
+		}
+		up_read(&vma->vm_mm->mmap_sem);
+	}
+	return mlocked;
+}
+
+/**
+ * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
+ * rmap method
+ * @page: the page to unmap/unlock
+ * @unlock:  request for unlock rather than unmap [unlikely]
+ * @migration:  unmapping for migration - ignored if @unlock
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the anon_vma struct it points to.
+ *
+ * This function is only called from try_to_unmap/try_to_munlock for
+ * anonymous pages.
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * 'LOCKED.
+ */
+static int try_to_unmap_anon(struct page *page, int unlock, int migration)
 {
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
+	unsigned int mlocked = 0;
 	int ret = SWAP_AGAIN;
 
+	if (MLOCK_PAGES && unlikely(unlock))
+		ret = SWAP_SUCCESS;	/* default for try_to_munlock() */
+
 	anon_vma = page_lock_anon_vma(page);
 	if (!anon_vma)
 		return ret;
 
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
-		ret = try_to_unmap_one(page, vma, migration);
-		if (ret == SWAP_FAIL || !page_mapped(page))
-			break;
+		if (MLOCK_PAGES && unlikely(unlock)) {
+			if (!((vma->vm_flags & VM_LOCKED) &&
+			      page_mapped_in_vma(page, vma)))
+				continue;  /* must visit all unlocked vmas */
+			ret = SWAP_MLOCK;  /* saw at least one mlocked vma */
+		} else {
+			ret = try_to_unmap_one(page, vma, migration);
+			if (ret == SWAP_FAIL || !page_mapped(page))
+				break;
+		}
+		if (ret == SWAP_MLOCK) {
+			mlocked = try_to_mlock_page(page, vma);
+			if (mlocked)
+				break;	/* stop if actually mlocked page */
+		}
 	}
 
 	page_unlock_anon_vma(anon_vma);
+
+	if (mlocked)
+		ret = SWAP_MLOCK;	/* actually mlocked the page */
+	else if (ret == SWAP_MLOCK)
+		ret = SWAP_AGAIN;	/* saw VM_LOCKED vma */
+
 	return ret;
 }
 
 /**
- * try_to_unmap_file - unmap file page using the object-based rmap method
- * @page: the page to unmap
- * @migration: migration flag
+ * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
+ * @page: the page to unmap/unlock
+ * @unlock:  request for unlock rather than unmap [unlikely]
+ * @migration:  unmapping for migration - ignored if @unlock
  *
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the address_space struct it points to.
  *
- * This function is only called from try_to_unmap for object-based pages.
+ * This function is only called from try_to_unmap/try_to_munlock for
+ * object-based pages.
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * 'LOCKED.
  */
-static int try_to_unmap_file(struct page *page, int migration)
+static int try_to_unmap_file(struct page *page, int unlock, int migration)
 {
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -914,20 +1072,44 @@ static int try_to_unmap_file(struct page *page, int migration)
 	unsigned long max_nl_cursor = 0;
 	unsigned long max_nl_size = 0;
 	unsigned int mapcount;
+	unsigned int mlocked = 0;
+
+	if (MLOCK_PAGES && unlikely(unlock))
+		ret = SWAP_SUCCESS;	/* default for try_to_munlock() */
 
 	spin_lock(&mapping->i_mmap_lock);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
-		ret = try_to_unmap_one(page, vma, migration);
-		if (ret == SWAP_FAIL || !page_mapped(page))
-			goto out;
+		if (MLOCK_PAGES && unlikely(unlock)) {
+			if (!(vma->vm_flags & VM_LOCKED))
+				continue;	/* must visit all vmas */
+			ret = SWAP_MLOCK;
+		} else {
+			ret = try_to_unmap_one(page, vma, migration);
+			if (ret == SWAP_FAIL || !page_mapped(page))
+				goto out;
+		}
+		if (ret == SWAP_MLOCK) {
+			mlocked = try_to_mlock_page(page, vma);
+			if (mlocked)
+				break;  /* stop if actually mlocked page */
+		}
 	}
 
+	if (mlocked)
+		goto out;
+
 	if (list_empty(&mapping->i_mmap_nonlinear))
 		goto out;
 
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
-		if ((vma->vm_flags & VM_LOCKED) && !migration)
+		if (MLOCK_PAGES && unlikely(unlock)) {
+			if (!(vma->vm_flags & VM_LOCKED))
+				continue;	/* must visit all vmas */
+			ret = SWAP_MLOCK;	/* leave mlocked == 0 */
+			goto out;		/* no need to look further */
+		}
+		if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
 			continue;
 		cursor = (unsigned long) vma->vm_private_data;
 		if (cursor > max_nl_cursor)
@@ -937,7 +1119,7 @@ static int try_to_unmap_file(struct page *page, int migration)
 			max_nl_size = cursor;
 	}
 
-	if (max_nl_size == 0) {	/* any nonlinears locked or reserved */
+	if (max_nl_size == 0) {	/* all nonlinears locked or reserved ? */
 		ret = SWAP_FAIL;
 		goto out;
 	}
@@ -961,12 +1143,16 @@ static int try_to_unmap_file(struct page *page, int migration)
 	do {
 		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
-			if ((vma->vm_flags & VM_LOCKED) && !migration)
+			if (!MLOCK_PAGES && !migration &&
+			    (vma->vm_flags & VM_LOCKED))
 				continue;
 			cursor = (unsigned long) vma->vm_private_data;
 			while ( cursor < max_nl_cursor &&
 				cursor < vma->vm_end - vma->vm_start) {
-				try_to_unmap_cluster(cursor, &mapcount, vma);
+				ret = try_to_unmap_cluster(cursor, &mapcount,
+								vma, page);
+				if (ret == SWAP_MLOCK)
+					mlocked = 2;	/* to return below */
 				cursor += CLUSTER_SIZE;
 				vma->vm_private_data = (void *) cursor;
 				if ((int)mapcount <= 0)
@@ -987,6 +1173,10 @@ static int try_to_unmap_file(struct page *page, int migration)
 		vma->vm_private_data = NULL;
 out:
 	spin_unlock(&mapping->i_mmap_lock);
+	if (mlocked)
+		ret = SWAP_MLOCK;	/* actually mlocked the page */
+	else if (ret == SWAP_MLOCK)
+		ret = SWAP_AGAIN;	/* saw VM_LOCKED vma */
 	return ret;
 }
 
@@ -1002,6 +1192,7 @@ out:
  * SWAP_SUCCESS	- we succeeded in removing all mappings
  * SWAP_AGAIN	- we missed a mapping, try again later
  * SWAP_FAIL	- the page is unswappable
+ * SWAP_MLOCK	- page is mlocked.
  */
 int try_to_unmap(struct page *page, int migration)
 {
@@ -1010,12 +1201,36 @@ int try_to_unmap(struct page *page, int migration)
 	BUG_ON(!PageLocked(page));
 
 	if (PageAnon(page))
-		ret = try_to_unmap_anon(page, migration);
+		ret = try_to_unmap_anon(page, 0, migration);
 	else
-		ret = try_to_unmap_file(page, migration);
-
-	if (!page_mapped(page))
+		ret = try_to_unmap_file(page, 0, migration);
+	if (ret != SWAP_MLOCK && !page_mapped(page))
 		ret = SWAP_SUCCESS;
 	return ret;
 }
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/**
+ * try_to_munlock - try to munlock a page
+ * @page: the page to be munlocked
+ *
+ * Called from munlock code.  Checks all of the VMAs mapping the page
+ * to make sure nobody else has this page mlocked. The page will be
+ * returned with PG_mlocked cleared if no other vmas have it mlocked.
+ *
+ * Return values are:
+ *
+ * SWAP_SUCCESS	- no vma's holding page mlocked.
+ * SWAP_AGAIN	- page mapped in mlocked vma -- couldn't acquire mmap sem
+ * SWAP_MLOCK	- page is now mlocked.
+ */
+int try_to_munlock(struct page *page)
+{
+	VM_BUG_ON(!PageLocked(page) || PageLRU(page));
+
+	if (PageAnon(page))
+		return try_to_unmap_anon(page, 1, 0);
+	else
+		return try_to_unmap_file(page, 1, 0);
+}
+#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index d87958a5f03..d38d7e61fcd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -199,7 +199,7 @@ static struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
 	.ra_pages	= 0,	/* No readahead */
-	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
+	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
 	.unplug_io_fn	= default_unplug_io_fn,
 };
 
@@ -1367,6 +1367,7 @@ repeat:
 				error = -ENOMEM;
 				goto failed;
 			}
+			SetPageSwapBacked(filepage);
 
 			/* Precharge page while we can wait, compensate after */
 			error = mem_cgroup_cache_charge(filepage, current->mm,
@@ -1476,12 +1477,16 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 		if (!user_shm_lock(inode->i_size, user))
 			goto out_nomem;
 		info->flags |= VM_LOCKED;
+		mapping_set_unevictable(file->f_mapping);
 	}
 	if (!lock && (info->flags & VM_LOCKED) && user) {
 		user_shm_unlock(inode->i_size, user);
 		info->flags &= ~VM_LOCKED;
+		mapping_clear_unevictable(file->f_mapping);
+		scan_mapping_unevictable_pages(file->f_mapping);
 	}
 	retval = 0;
+
 out_nomem:
 	spin_unlock(&info->lock);
 	return retval;
diff --git a/mm/swap.c b/mm/swap.c
index 9e0cb311807..2152e48a7b8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,11 +31,12 @@
 #include <linux/backing-dev.h>
 #include <linux/memcontrol.h>
 
+#include "internal.h"
+
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs);
+static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 
 /*
@@ -116,8 +117,9 @@ static void pagevec_move_tail(struct pagevec *pvec)
 			zone = pagezone;
 			spin_lock(&zone->lru_lock);
 		}
-		if (PageLRU(page) && !PageActive(page)) {
-			list_move_tail(&page->lru, &zone->inactive_list);
+		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+			int lru = page_is_file_cache(page);
+			list_move_tail(&page->lru, &zone->lru[lru].list);
 			pgmoved++;
 		}
 	}
@@ -136,7 +138,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
 void  rotate_reclaimable_page(struct page *page)
 {
 	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
-	    PageLRU(page)) {
+	    !PageUnevictable(page) && PageLRU(page)) {
 		struct pagevec *pvec;
 		unsigned long flags;
 
@@ -157,12 +159,19 @@ void activate_page(struct page *page)
 	struct zone *zone = page_zone(page);
 
 	spin_lock_irq(&zone->lru_lock);
-	if (PageLRU(page) && !PageActive(page)) {
-		del_page_from_inactive_list(zone, page);
+	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+		int file = page_is_file_cache(page);
+		int lru = LRU_BASE + file;
+		del_page_from_lru_list(zone, page, lru);
+
 		SetPageActive(page);
-		add_page_to_active_list(zone, page);
+		lru += LRU_ACTIVE;
+		add_page_to_lru_list(zone, page, lru);
 		__count_vm_event(PGACTIVATE);
-		mem_cgroup_move_lists(page, true);
+		mem_cgroup_move_lists(page, lru);
+
+		zone->recent_rotated[!!file]++;
+		zone->recent_scanned[!!file]++;
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
@@ -176,7 +185,8 @@ void activate_page(struct page *page)
  */
 void mark_page_accessed(struct page *page)
 {
-	if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
+	if (!PageActive(page) && !PageUnevictable(page) &&
+			PageReferenced(page) && PageLRU(page)) {
 		activate_page(page);
 		ClearPageReferenced(page);
 	} else if (!PageReferenced(page)) {
@@ -186,28 +196,73 @@ void mark_page_accessed(struct page *page)
 
 EXPORT_SYMBOL(mark_page_accessed);
 
-/**
- * lru_cache_add: add a page to the page lists
- * @page: the page to add
- */
-void lru_cache_add(struct page *page)
+void __lru_cache_add(struct page *page, enum lru_list lru)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
+	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
 
 	page_cache_get(page);
 	if (!pagevec_add(pvec, page))
-		__pagevec_lru_add(pvec);
+		____pagevec_lru_add(pvec, lru);
 	put_cpu_var(lru_add_pvecs);
 }
 
-void lru_cache_add_active(struct page *page)
+/**
+ * lru_cache_add_lru - add a page to a page list
+ * @page: the page to be added to the LRU.
+ * @lru: the LRU list to which the page is added.
+ */
+void lru_cache_add_lru(struct page *page, enum lru_list lru)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
+	if (PageActive(page)) {
+		VM_BUG_ON(PageUnevictable(page));
+		ClearPageActive(page);
+	} else if (PageUnevictable(page)) {
+		VM_BUG_ON(PageActive(page));
+		ClearPageUnevictable(page);
+	}
 
-	page_cache_get(page);
-	if (!pagevec_add(pvec, page))
-		__pagevec_lru_add_active(pvec);
-	put_cpu_var(lru_add_active_pvecs);
+	VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
+	__lru_cache_add(page, lru);
+}
+
+/**
+ * add_page_to_unevictable_list - add a page to the unevictable list
+ * @page:  the page to be added to the unevictable list
+ *
+ * Add page directly to its zone's unevictable list.  To avoid races with
+ * tasks that might be making the page evictable, through eg. munlock,
+ * munmap or exit, while it's not on the lru, we want to add the page
+ * while it's locked or otherwise "invisible" to other tasks.  This is
+ * difficult to do when using the pagevec cache, so bypass that.
+ */
+void add_page_to_unevictable_list(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+
+	spin_lock_irq(&zone->lru_lock);
+	SetPageUnevictable(page);
+	SetPageLRU(page);
+	add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
+	spin_unlock_irq(&zone->lru_lock);
+}
+
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page:  the page to be added to LRU
+ * @vma:   vma in which page is mapped for determining reclaimability
+ *
+ * place @page on active or unevictable LRU list, depending on
+ * page_evictable().  Note that if the page is not evictable,
+ * it goes directly back onto it's zone's unevictable list.  It does
+ * NOT use a per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+					struct vm_area_struct *vma)
+{
+	if (page_evictable(page, vma))
+		lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
+	else
+		add_page_to_unevictable_list(page);
 }
 
 /*
@@ -217,15 +272,15 @@ void lru_cache_add_active(struct page *page)
  */
 static void drain_cpu_pagevecs(int cpu)
 {
+	struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
 	struct pagevec *pvec;
+	int lru;
 
-	pvec = &per_cpu(lru_add_pvecs, cpu);
-	if (pagevec_count(pvec))
-		__pagevec_lru_add(pvec);
-
-	pvec = &per_cpu(lru_add_active_pvecs, cpu);
-	if (pagevec_count(pvec))
-		__pagevec_lru_add_active(pvec);
+	for_each_lru(lru) {
+		pvec = &pvecs[lru - LRU_BASE];
+		if (pagevec_count(pvec))
+			____pagevec_lru_add(pvec, lru);
+	}
 
 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
 	if (pagevec_count(pvec)) {
@@ -244,7 +299,7 @@ void lru_add_drain(void)
 	put_cpu();
 }
 
-#ifdef CONFIG_NUMA
+#if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU)
 static void lru_add_drain_per_cpu(struct work_struct *dummy)
 {
 	lru_add_drain();
@@ -308,6 +363,7 @@ void release_pages(struct page **pages, int nr, int cold)
 
 		if (PageLRU(page)) {
 			struct zone *pagezone = page_zone(page);
+
 			if (pagezone != zone) {
 				if (zone)
 					spin_unlock_irqrestore(&zone->lru_lock,
@@ -380,10 +436,11 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
-void __pagevec_lru_add(struct pagevec *pvec)
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
 	int i;
 	struct zone *zone = NULL;
+	VM_BUG_ON(is_unevictable_lru(lru));
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
@@ -395,9 +452,13 @@ void __pagevec_lru_add(struct pagevec *pvec)
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
+		VM_BUG_ON(PageActive(page));
+		VM_BUG_ON(PageUnevictable(page));
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		add_page_to_inactive_list(zone, page);
+		if (is_active_lru(lru))
+			SetPageActive(page);
+		add_page_to_lru_list(zone, page, lru);
 	}
 	if (zone)
 		spin_unlock_irq(&zone->lru_lock);
@@ -405,48 +466,45 @@ void __pagevec_lru_add(struct pagevec *pvec)
 	pagevec_reinit(pvec);
 }
 
-EXPORT_SYMBOL(__pagevec_lru_add);
+EXPORT_SYMBOL(____pagevec_lru_add);
 
-void __pagevec_lru_add_active(struct pagevec *pvec)
+/*
+ * Try to drop buffers from the pages in a pagevec
+ */
+void pagevec_strip(struct pagevec *pvec)
 {
 	int i;
-	struct zone *zone = NULL;
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
-		struct zone *pagezone = page_zone(page);
 
-		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
-			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
+		if (PagePrivate(page) && trylock_page(page)) {
+			if (PagePrivate(page))
+				try_to_release_page(page, 0);
+			unlock_page(page);
 		}
-		VM_BUG_ON(PageLRU(page));
-		SetPageLRU(page);
-		VM_BUG_ON(PageActive(page));
-		SetPageActive(page);
-		add_page_to_active_list(zone, page);
 	}
-	if (zone)
-		spin_unlock_irq(&zone->lru_lock);
-	release_pages(pvec->pages, pvec->nr, pvec->cold);
-	pagevec_reinit(pvec);
 }
 
-/*
- * Try to drop buffers from the pages in a pagevec
+/**
+ * pagevec_swap_free - try to free swap space from the pages in a pagevec
+ * @pvec: pagevec with swapcache pages to free the swap space of
+ *
+ * The caller needs to hold an extra reference to each page and
+ * not hold the page lock on the pages.  This function uses a
+ * trylock on the page lock so it may not always free the swap
+ * space associated with a page.
  */
-void pagevec_strip(struct pagevec *pvec)
+void pagevec_swap_free(struct pagevec *pvec)
 {
 	int i;
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 
-		if (PagePrivate(page) && trylock_page(page)) {
-			if (PagePrivate(page))
-				try_to_release_page(page, 0);
+		if (PageSwapCache(page) && trylock_page(page)) {
+			if (PageSwapCache(page))
+				remove_exclusive_swap_page_ref(page);
 			unlock_page(page);
 		}
 	}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 797c3831cbe..3353c9029ce 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = {
 };
 
 static struct backing_dev_info swap_backing_dev_info = {
-	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
+	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
 	.unplug_io_fn	= swap_unplug_io_fn,
 };
 
@@ -75,6 +75,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageSwapCache(page));
 	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageSwapBacked(page));
 	error = radix_tree_preload(gfp_mask);
 	if (!error) {
 		page_cache_get(page);
@@ -302,17 +303,19 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * re-using the just freed swap entry for an existing page.
 		 * May fail (-ENOMEM) if radix-tree node allocation failed.
 		 */
-		set_page_locked(new_page);
+		__set_page_locked(new_page);
+		SetPageSwapBacked(new_page);
 		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
 		if (likely(!err)) {
 			/*
 			 * Initiate read into locked page and return.
 			 */
-			lru_cache_add_active(new_page);
+			lru_cache_add_anon(new_page);
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
-		clear_page_locked(new_page);
+		ClearPageSwapBacked(new_page);
+		__clear_page_locked(new_page);
 		swap_free(entry);
 	} while (err != -ENOMEM);
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1e330f2998f..90cb67a5417 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -344,7 +344,7 @@ int can_share_swap_page(struct page *page)
  * Work out if there are any other processes sharing this
  * swap cache page. Free it if you can. Return success.
  */
-int remove_exclusive_swap_page(struct page *page)
+static int remove_exclusive_swap_page_count(struct page *page, int count)
 {
 	int retval;
 	struct swap_info_struct * p;
@@ -357,7 +357,7 @@ int remove_exclusive_swap_page(struct page *page)
 		return 0;
 	if (PageWriteback(page))
 		return 0;
-	if (page_count(page) != 2) /* 2: us + cache */
+	if (page_count(page) != count) /* us + cache + ptes */
 		return 0;
 
 	entry.val = page_private(page);
@@ -370,7 +370,7 @@ int remove_exclusive_swap_page(struct page *page)
 	if (p->swap_map[swp_offset(entry)] == 1) {
 		/* Recheck the page count with the swapcache lock held.. */
 		spin_lock_irq(&swapper_space.tree_lock);
-		if ((page_count(page) == 2) && !PageWriteback(page)) {
+		if ((page_count(page) == count) && !PageWriteback(page)) {
 			__delete_from_swap_cache(page);
 			SetPageDirty(page);
 			retval = 1;
@@ -388,6 +388,25 @@ int remove_exclusive_swap_page(struct page *page)
 }
 
 /*
+ * Most of the time the page should have two references: one for the
+ * process and one for the swap cache.
+ */
+int remove_exclusive_swap_page(struct page *page)
+{
+	return remove_exclusive_swap_page_count(page, 2);
+}
+
+/*
+ * The pageout code holds an extra reference to the page.  That raises
+ * the reference count to test for to 2 for a page that is only in the
+ * swap cache plus 1 for each process that maps the page.
+ */
+int remove_exclusive_swap_page_ref(struct page *page)
+{
+	return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page));
+}
+
+/*
  * Free the swap entry like above, but also try to
  * free the page cache entry if it is the last user.
  */
@@ -403,7 +422,7 @@ void free_swap_and_cache(swp_entry_t entry)
 	if (p) {
 		if (swap_entry_free(p, swp_offset(entry)) == 1) {
 			page = find_get_page(&swapper_space, entry.val);
-			if (page && unlikely(!trylock_page(page))) {
+			if (page && !trylock_page(page)) {
 				page_cache_release(page);
 				page = NULL;
 			}
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
index 8d7a27a6335..3e67d575ee6 100644
--- a/mm/tiny-shmem.c
+++ b/mm/tiny-shmem.c
@@ -95,6 +95,7 @@ put_dentry:
 put_memory:
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /**
  * shmem_zero_setup - setup a shared anonymous mapping
diff --git a/mm/truncate.c b/mm/truncate.c
index e83e4b114ef..1229211104f 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -18,6 +18,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
 				   do_invalidatepage */
+#include "internal.h"
 
 
 /**
@@ -103,6 +104,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 
 	cancel_dirty_page(page, PAGE_CACHE_SIZE);
 
+	clear_page_mlock(page);
 	remove_from_page_cache(page);
 	ClearPageMappedToDisk(page);
 	page_cache_release(page);	/* pagecache ref */
@@ -127,6 +129,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
 	if (PagePrivate(page) && !try_to_release_page(page, 0))
 		return 0;
 
+	clear_page_mlock(page);
 	ret = remove_mapping(mapping, page);
 
 	return ret;
@@ -352,6 +355,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (PageDirty(page))
 		goto failed;
 
+	clear_page_mlock(page);
 	BUG_ON(PagePrivate(page));
 	__remove_from_page_cache(page);
 	spin_unlock_irq(&mapping->tree_lock);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index bba06c41fc5..65ae576030d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -8,6 +8,7 @@
  *  Numa awareness, Christoph Lameter, SGI, June 2005
  */
 
+#include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
@@ -16,18 +17,18 @@
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
 #include <linux/debugobjects.h>
-#include <linux/vmalloc.h>
 #include <linux/kallsyms.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
 
+#include <asm/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 
 
-DEFINE_RWLOCK(vmlist_lock);
-struct vm_struct *vmlist;
-
-static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
-			    int node, void *caller);
+/*** Page table manipulation functions ***/
 
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 {
@@ -40,8 +41,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
-						unsigned long end)
+static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -55,8 +55,7 @@ static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
 	} while (pmd++, addr = next, addr != end);
 }
 
-static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
-						unsigned long end)
+static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -70,12 +69,10 @@ static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
 	} while (pud++, addr = next, addr != end);
 }
 
-void unmap_kernel_range(unsigned long addr, unsigned long size)
+static void vunmap_page_range(unsigned long addr, unsigned long end)
 {
 	pgd_t *pgd;
 	unsigned long next;
-	unsigned long start = addr;
-	unsigned long end = addr + size;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset_k(addr);
@@ -86,35 +83,36 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
 			continue;
 		vunmap_pud_range(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
-	flush_tlb_kernel_range(start, end);
-}
-
-static void unmap_vm_area(struct vm_struct *area)
-{
-	unmap_kernel_range((unsigned long)area->addr, area->size);
 }
 
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-			unsigned long end, pgprot_t prot, struct page ***pages)
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
 	pte_t *pte;
 
+	/*
+	 * nr is a running index into the array which helps higher level
+	 * callers keep track of where we're up to.
+	 */
+
 	pte = pte_alloc_kernel(pmd, addr);
 	if (!pte)
 		return -ENOMEM;
 	do {
-		struct page *page = **pages;
-		WARN_ON(!pte_none(*pte));
-		if (!page)
+		struct page *page = pages[*nr];
+
+		if (WARN_ON(!pte_none(*pte)))
+			return -EBUSY;
+		if (WARN_ON(!page))
 			return -ENOMEM;
 		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
-		(*pages)++;
+		(*nr)++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	return 0;
 }
 
-static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
-			unsigned long end, pgprot_t prot, struct page ***pages)
+static int vmap_pmd_range(pud_t *pud, unsigned long addr,
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -124,14 +122,14 @@ static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
-		if (vmap_pte_range(pmd, addr, next, prot, pages))
+		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
 			return -ENOMEM;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
 
-static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
-			unsigned long end, pgprot_t prot, struct page ***pages)
+static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -141,57 +139,78 @@ static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (vmap_pmd_range(pud, addr, next, prot, pages))
+		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
 			return -ENOMEM;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+/*
+ * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
+ * will have pfns corresponding to the "pages" array.
+ *
+ * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
+ */
+static int vmap_page_range(unsigned long addr, unsigned long end,
+				pgprot_t prot, struct page **pages)
 {
 	pgd_t *pgd;
 	unsigned long next;
-	unsigned long addr = (unsigned long) area->addr;
-	unsigned long end = addr + area->size - PAGE_SIZE;
-	int err;
+	int err = 0;
+	int nr = 0;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = vmap_pud_range(pgd, addr, next, prot, pages);
+		err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
-	flush_cache_vmap((unsigned long) area->addr, end);
-	return err;
+	flush_cache_vmap(addr, end);
+
+	if (unlikely(err))
+		return err;
+	return nr;
+}
+
+static inline int is_vmalloc_or_module_addr(const void *x)
+{
+	/*
+	 * x86-64 and sparc64 put modules in a special place,
+	 * and fall back on vmalloc() if that fails. Others
+	 * just put it in the vmalloc space.
+	 */
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+	unsigned long addr = (unsigned long)x;
+	if (addr >= MODULES_VADDR && addr < MODULES_END)
+		return 1;
+#endif
+	return is_vmalloc_addr(x);
 }
-EXPORT_SYMBOL_GPL(map_vm_area);
 
 /*
- * Map a vmalloc()-space virtual address to the physical page.
+ * Walk a vmap address to the struct page it maps.
  */
 struct page *vmalloc_to_page(const void *vmalloc_addr)
 {
 	unsigned long addr = (unsigned long) vmalloc_addr;
 	struct page *page = NULL;
 	pgd_t *pgd = pgd_offset_k(addr);
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep, pte;
 
 	/*
 	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
 	 * architectures that do not vmalloc module space
 	 */
-	VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) &&
-			!is_module_address(addr));
+	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
 
 	if (!pgd_none(*pgd)) {
-		pud = pud_offset(pgd, addr);
+		pud_t *pud = pud_offset(pgd, addr);
 		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
+			pmd_t *pmd = pmd_offset(pud, addr);
 			if (!pmd_none(*pmd)) {
+				pte_t *ptep, pte;
+
 				ptep = pte_offset_map(pmd, addr);
 				pte = *ptep;
 				if (pte_present(pte))
@@ -213,13 +232,751 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
 }
 EXPORT_SYMBOL(vmalloc_to_pfn);
 
-static struct vm_struct *
-__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
-		unsigned long end, int node, gfp_t gfp_mask, void *caller)
+
+/*** Global kva allocator ***/
+
+#define VM_LAZY_FREE	0x01
+#define VM_LAZY_FREEING	0x02
+#define VM_VM_AREA	0x04
+
+struct vmap_area {
+	unsigned long va_start;
+	unsigned long va_end;
+	unsigned long flags;
+	struct rb_node rb_node;		/* address sorted rbtree */
+	struct list_head list;		/* address sorted list */
+	struct list_head purge_list;	/* "lazy purge" list */
+	void *private;
+	struct rcu_head rcu_head;
+};
+
+static DEFINE_SPINLOCK(vmap_area_lock);
+static struct rb_root vmap_area_root = RB_ROOT;
+static LIST_HEAD(vmap_area_list);
+
+static struct vmap_area *__find_vmap_area(unsigned long addr)
 {
-	struct vm_struct **p, *tmp, *area;
-	unsigned long align = 1;
+	struct rb_node *n = vmap_area_root.rb_node;
+
+	while (n) {
+		struct vmap_area *va;
+
+		va = rb_entry(n, struct vmap_area, rb_node);
+		if (addr < va->va_start)
+			n = n->rb_left;
+		else if (addr > va->va_start)
+			n = n->rb_right;
+		else
+			return va;
+	}
+
+	return NULL;
+}
+
+static void __insert_vmap_area(struct vmap_area *va)
+{
+	struct rb_node **p = &vmap_area_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct rb_node *tmp;
+
+	while (*p) {
+		struct vmap_area *tmp;
+
+		parent = *p;
+		tmp = rb_entry(parent, struct vmap_area, rb_node);
+		if (va->va_start < tmp->va_end)
+			p = &(*p)->rb_left;
+		else if (va->va_end > tmp->va_start)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&va->rb_node, parent, p);
+	rb_insert_color(&va->rb_node, &vmap_area_root);
+
+	/* address-sort this list so it is usable like the vmlist */
+	tmp = rb_prev(&va->rb_node);
+	if (tmp) {
+		struct vmap_area *prev;
+		prev = rb_entry(tmp, struct vmap_area, rb_node);
+		list_add_rcu(&va->list, &prev->list);
+	} else
+		list_add_rcu(&va->list, &vmap_area_list);
+}
+
+static void purge_vmap_area_lazy(void);
+
+/*
+ * Allocate a region of KVA of the specified size and alignment, within the
+ * vstart and vend.
+ */
+static struct vmap_area *alloc_vmap_area(unsigned long size,
+				unsigned long align,
+				unsigned long vstart, unsigned long vend,
+				int node, gfp_t gfp_mask)
+{
+	struct vmap_area *va;
+	struct rb_node *n;
 	unsigned long addr;
+	int purged = 0;
+
+	BUG_ON(size & ~PAGE_MASK);
+
+	addr = ALIGN(vstart, align);
+
+	va = kmalloc_node(sizeof(struct vmap_area),
+			gfp_mask & GFP_RECLAIM_MASK, node);
+	if (unlikely(!va))
+		return ERR_PTR(-ENOMEM);
+
+retry:
+	spin_lock(&vmap_area_lock);
+	/* XXX: could have a last_hole cache */
+	n = vmap_area_root.rb_node;
+	if (n) {
+		struct vmap_area *first = NULL;
+
+		do {
+			struct vmap_area *tmp;
+			tmp = rb_entry(n, struct vmap_area, rb_node);
+			if (tmp->va_end >= addr) {
+				if (!first && tmp->va_start < addr + size)
+					first = tmp;
+				n = n->rb_left;
+			} else {
+				first = tmp;
+				n = n->rb_right;
+			}
+		} while (n);
+
+		if (!first)
+			goto found;
+
+		if (first->va_end < addr) {
+			n = rb_next(&first->rb_node);
+			if (n)
+				first = rb_entry(n, struct vmap_area, rb_node);
+			else
+				goto found;
+		}
+
+		while (addr + size >= first->va_start && addr + size <= vend) {
+			addr = ALIGN(first->va_end + PAGE_SIZE, align);
+
+			n = rb_next(&first->rb_node);
+			if (n)
+				first = rb_entry(n, struct vmap_area, rb_node);
+			else
+				goto found;
+		}
+	}
+found:
+	if (addr + size > vend) {
+		spin_unlock(&vmap_area_lock);
+		if (!purged) {
+			purge_vmap_area_lazy();
+			purged = 1;
+			goto retry;
+		}
+		if (printk_ratelimit())
+			printk(KERN_WARNING "vmap allocation failed: "
+				 "use vmalloc=<size> to increase size.\n");
+		return ERR_PTR(-EBUSY);
+	}
+
+	BUG_ON(addr & (align-1));
+
+	va->va_start = addr;
+	va->va_end = addr + size;
+	va->flags = 0;
+	__insert_vmap_area(va);
+	spin_unlock(&vmap_area_lock);
+
+	return va;
+}
+
+static void rcu_free_va(struct rcu_head *head)
+{
+	struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
+
+	kfree(va);
+}
+
+static void __free_vmap_area(struct vmap_area *va)
+{
+	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
+	rb_erase(&va->rb_node, &vmap_area_root);
+	RB_CLEAR_NODE(&va->rb_node);
+	list_del_rcu(&va->list);
+
+	call_rcu(&va->rcu_head, rcu_free_va);
+}
+
+/*
+ * Free a region of KVA allocated by alloc_vmap_area
+ */
+static void free_vmap_area(struct vmap_area *va)
+{
+	spin_lock(&vmap_area_lock);
+	__free_vmap_area(va);
+	spin_unlock(&vmap_area_lock);
+}
+
+/*
+ * Clear the pagetable entries of a given vmap_area
+ */
+static void unmap_vmap_area(struct vmap_area *va)
+{
+	vunmap_page_range(va->va_start, va->va_end);
+}
+
+/*
+ * lazy_max_pages is the maximum amount of virtual address space we gather up
+ * before attempting to purge with a TLB flush.
+ *
+ * There is a tradeoff here: a larger number will cover more kernel page tables
+ * and take slightly longer to purge, but it will linearly reduce the number of
+ * global TLB flushes that must be performed. It would seem natural to scale
+ * this number up linearly with the number of CPUs (because vmapping activity
+ * could also scale linearly with the number of CPUs), however it is likely
+ * that in practice, workloads might be constrained in other ways that mean
+ * vmap activity will not scale linearly with CPUs. Also, I want to be
+ * conservative and not introduce a big latency on huge systems, so go with
+ * a less aggressive log scale. It will still be an improvement over the old
+ * code, and it will be simple to change the scale factor if we find that it
+ * becomes a problem on bigger systems.
+ */
+static unsigned long lazy_max_pages(void)
+{
+	unsigned int log;
+
+	log = fls(num_online_cpus());
+
+	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
+}
+
+static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+
+/*
+ * Purges all lazily-freed vmap areas.
+ *
+ * If sync is 0 then don't purge if there is already a purge in progress.
+ * If force_flush is 1, then flush kernel TLBs between *start and *end even
+ * if we found no lazy vmap areas to unmap (callers can use this to optimise
+ * their own TLB flushing).
+ * Returns with *start = min(*start, lowest purged address)
+ *              *end = max(*end, highest purged address)
+ */
+static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
+					int sync, int force_flush)
+{
+	static DEFINE_SPINLOCK(purge_lock);
+	LIST_HEAD(valist);
+	struct vmap_area *va;
+	int nr = 0;
+
+	/*
+	 * If sync is 0 but force_flush is 1, we'll go sync anyway but callers
+	 * should not expect such behaviour. This just simplifies locking for
+	 * the case that isn't actually used at the moment anyway.
+	 */
+	if (!sync && !force_flush) {
+		if (!spin_trylock(&purge_lock))
+			return;
+	} else
+		spin_lock(&purge_lock);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(va, &vmap_area_list, list) {
+		if (va->flags & VM_LAZY_FREE) {
+			if (va->va_start < *start)
+				*start = va->va_start;
+			if (va->va_end > *end)
+				*end = va->va_end;
+			nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
+			unmap_vmap_area(va);
+			list_add_tail(&va->purge_list, &valist);
+			va->flags |= VM_LAZY_FREEING;
+			va->flags &= ~VM_LAZY_FREE;
+		}
+	}
+	rcu_read_unlock();
+
+	if (nr) {
+		BUG_ON(nr > atomic_read(&vmap_lazy_nr));
+		atomic_sub(nr, &vmap_lazy_nr);
+	}
+
+	if (nr || force_flush)
+		flush_tlb_kernel_range(*start, *end);
+
+	if (nr) {
+		spin_lock(&vmap_area_lock);
+		list_for_each_entry(va, &valist, purge_list)
+			__free_vmap_area(va);
+		spin_unlock(&vmap_area_lock);
+	}
+	spin_unlock(&purge_lock);
+}
+
+/*
+ * Kick off a purge of the outstanding lazy areas.
+ */
+static void purge_vmap_area_lazy(void)
+{
+	unsigned long start = ULONG_MAX, end = 0;
+
+	__purge_vmap_area_lazy(&start, &end, 0, 0);
+}
+
+/*
+ * Free and unmap a vmap area
+ */
+static void free_unmap_vmap_area(struct vmap_area *va)
+{
+	va->flags |= VM_LAZY_FREE;
+	atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
+	if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
+		purge_vmap_area_lazy();
+}
+
+static struct vmap_area *find_vmap_area(unsigned long addr)
+{
+	struct vmap_area *va;
+
+	spin_lock(&vmap_area_lock);
+	va = __find_vmap_area(addr);
+	spin_unlock(&vmap_area_lock);
+
+	return va;
+}
+
+static void free_unmap_vmap_area_addr(unsigned long addr)
+{
+	struct vmap_area *va;
+
+	va = find_vmap_area(addr);
+	BUG_ON(!va);
+	free_unmap_vmap_area(va);
+}
+
+
+/*** Per cpu kva allocator ***/
+
+/*
+ * vmap space is limited especially on 32 bit architectures. Ensure there is
+ * room for at least 16 percpu vmap blocks per CPU.
+ */
+/*
+ * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
+ * to #define VMALLOC_SPACE		(VMALLOC_END-VMALLOC_START). Guess
+ * instead (we just need a rough idea)
+ */
+#if BITS_PER_LONG == 32
+#define VMALLOC_SPACE		(128UL*1024*1024)
+#else
+#define VMALLOC_SPACE		(128UL*1024*1024*1024)
+#endif
+
+#define VMALLOC_PAGES		(VMALLOC_SPACE / PAGE_SIZE)
+#define VMAP_MAX_ALLOC		BITS_PER_LONG	/* 256K with 4K pages */
+#define VMAP_BBMAP_BITS_MAX	1024	/* 4MB with 4K pages */
+#define VMAP_BBMAP_BITS_MIN	(VMAP_MAX_ALLOC*2)
+#define VMAP_MIN(x, y)		((x) < (y) ? (x) : (y)) /* can't use min() */
+#define VMAP_MAX(x, y)		((x) > (y) ? (x) : (y)) /* can't use max() */
+#define VMAP_BBMAP_BITS		VMAP_MIN(VMAP_BBMAP_BITS_MAX,		\
+					VMAP_MAX(VMAP_BBMAP_BITS_MIN,	\
+						VMALLOC_PAGES / NR_CPUS / 16))
+
+#define VMAP_BLOCK_SIZE		(VMAP_BBMAP_BITS * PAGE_SIZE)
+
+struct vmap_block_queue {
+	spinlock_t lock;
+	struct list_head free;
+	struct list_head dirty;
+	unsigned int nr_dirty;
+};
+
+struct vmap_block {
+	spinlock_t lock;
+	struct vmap_area *va;
+	struct vmap_block_queue *vbq;
+	unsigned long free, dirty;
+	DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
+	DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+	union {
+		struct {
+			struct list_head free_list;
+			struct list_head dirty_list;
+		};
+		struct rcu_head rcu_head;
+	};
+};
+
+/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
+
+/*
+ * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
+ * in the free path. Could get rid of this if we change the API to return a
+ * "cookie" from alloc, to be passed to free. But no big deal yet.
+ */
+static DEFINE_SPINLOCK(vmap_block_tree_lock);
+static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
+
+/*
+ * We should probably have a fallback mechanism to allocate virtual memory
+ * out of partially filled vmap blocks. However vmap block sizing should be
+ * fairly reasonable according to the vmalloc size, so it shouldn't be a
+ * big problem.
+ */
+
+static unsigned long addr_to_vb_idx(unsigned long addr)
+{
+	addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
+	addr /= VMAP_BLOCK_SIZE;
+	return addr;
+}
+
+static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
+{
+	struct vmap_block_queue *vbq;
+	struct vmap_block *vb;
+	struct vmap_area *va;
+	unsigned long vb_idx;
+	int node, err;
+
+	node = numa_node_id();
+
+	vb = kmalloc_node(sizeof(struct vmap_block),
+			gfp_mask & GFP_RECLAIM_MASK, node);
+	if (unlikely(!vb))
+		return ERR_PTR(-ENOMEM);
+
+	va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
+					VMALLOC_START, VMALLOC_END,
+					node, gfp_mask);
+	if (unlikely(IS_ERR(va))) {
+		kfree(vb);
+		return ERR_PTR(PTR_ERR(va));
+	}
+
+	err = radix_tree_preload(gfp_mask);
+	if (unlikely(err)) {
+		kfree(vb);
+		free_vmap_area(va);
+		return ERR_PTR(err);
+	}
+
+	spin_lock_init(&vb->lock);
+	vb->va = va;
+	vb->free = VMAP_BBMAP_BITS;
+	vb->dirty = 0;
+	bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
+	bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
+	INIT_LIST_HEAD(&vb->free_list);
+	INIT_LIST_HEAD(&vb->dirty_list);
+
+	vb_idx = addr_to_vb_idx(va->va_start);
+	spin_lock(&vmap_block_tree_lock);
+	err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
+	spin_unlock(&vmap_block_tree_lock);
+	BUG_ON(err);
+	radix_tree_preload_end();
+
+	vbq = &get_cpu_var(vmap_block_queue);
+	vb->vbq = vbq;
+	spin_lock(&vbq->lock);
+	list_add(&vb->free_list, &vbq->free);
+	spin_unlock(&vbq->lock);
+	put_cpu_var(vmap_cpu_blocks);
+
+	return vb;
+}
+
+static void rcu_free_vb(struct rcu_head *head)
+{
+	struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
+
+	kfree(vb);
+}
+
+static void free_vmap_block(struct vmap_block *vb)
+{
+	struct vmap_block *tmp;
+	unsigned long vb_idx;
+
+	spin_lock(&vb->vbq->lock);
+	if (!list_empty(&vb->free_list))
+		list_del(&vb->free_list);
+	if (!list_empty(&vb->dirty_list))
+		list_del(&vb->dirty_list);
+	spin_unlock(&vb->vbq->lock);
+
+	vb_idx = addr_to_vb_idx(vb->va->va_start);
+	spin_lock(&vmap_block_tree_lock);
+	tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
+	spin_unlock(&vmap_block_tree_lock);
+	BUG_ON(tmp != vb);
+
+	free_unmap_vmap_area(vb->va);
+	call_rcu(&vb->rcu_head, rcu_free_vb);
+}
+
+static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+{
+	struct vmap_block_queue *vbq;
+	struct vmap_block *vb;
+	unsigned long addr = 0;
+	unsigned int order;
+
+	BUG_ON(size & ~PAGE_MASK);
+	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+	order = get_order(size);
+
+again:
+	rcu_read_lock();
+	vbq = &get_cpu_var(vmap_block_queue);
+	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+		int i;
+
+		spin_lock(&vb->lock);
+		i = bitmap_find_free_region(vb->alloc_map,
+						VMAP_BBMAP_BITS, order);
+
+		if (i >= 0) {
+			addr = vb->va->va_start + (i << PAGE_SHIFT);
+			BUG_ON(addr_to_vb_idx(addr) !=
+					addr_to_vb_idx(vb->va->va_start));
+			vb->free -= 1UL << order;
+			if (vb->free == 0) {
+				spin_lock(&vbq->lock);
+				list_del_init(&vb->free_list);
+				spin_unlock(&vbq->lock);
+			}
+			spin_unlock(&vb->lock);
+			break;
+		}
+		spin_unlock(&vb->lock);
+	}
+	put_cpu_var(vmap_cpu_blocks);
+	rcu_read_unlock();
+
+	if (!addr) {
+		vb = new_vmap_block(gfp_mask);
+		if (IS_ERR(vb))
+			return vb;
+		goto again;
+	}
+
+	return (void *)addr;
+}
+
+static void vb_free(const void *addr, unsigned long size)
+{
+	unsigned long offset;
+	unsigned long vb_idx;
+	unsigned int order;
+	struct vmap_block *vb;
+
+	BUG_ON(size & ~PAGE_MASK);
+	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+	order = get_order(size);
+
+	offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
+
+	vb_idx = addr_to_vb_idx((unsigned long)addr);
+	rcu_read_lock();
+	vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
+	rcu_read_unlock();
+	BUG_ON(!vb);
+
+	spin_lock(&vb->lock);
+	bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+	if (!vb->dirty) {
+		spin_lock(&vb->vbq->lock);
+		list_add(&vb->dirty_list, &vb->vbq->dirty);
+		spin_unlock(&vb->vbq->lock);
+	}
+	vb->dirty += 1UL << order;
+	if (vb->dirty == VMAP_BBMAP_BITS) {
+		BUG_ON(vb->free || !list_empty(&vb->free_list));
+		spin_unlock(&vb->lock);
+		free_vmap_block(vb);
+	} else
+		spin_unlock(&vb->lock);
+}
+
+/**
+ * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
+ *
+ * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
+ * to amortize TLB flushing overheads. What this means is that any page you
+ * have now, may, in a former life, have been mapped into kernel virtual
+ * address by the vmap layer and so there might be some CPUs with TLB entries
+ * still referencing that page (additional to the regular 1:1 kernel mapping).
+ *
+ * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
+ * be sure that none of the pages we have control over will have any aliases
+ * from the vmap layer.
+ */
+void vm_unmap_aliases(void)
+{
+	unsigned long start = ULONG_MAX, end = 0;
+	int cpu;
+	int flush = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+		struct vmap_block *vb;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+			int i;
+
+			spin_lock(&vb->lock);
+			i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
+			while (i < VMAP_BBMAP_BITS) {
+				unsigned long s, e;
+				int j;
+				j = find_next_zero_bit(vb->dirty_map,
+					VMAP_BBMAP_BITS, i);
+
+				s = vb->va->va_start + (i << PAGE_SHIFT);
+				e = vb->va->va_start + (j << PAGE_SHIFT);
+				vunmap_page_range(s, e);
+				flush = 1;
+
+				if (s < start)
+					start = s;
+				if (e > end)
+					end = e;
+
+				i = j;
+				i = find_next_bit(vb->dirty_map,
+							VMAP_BBMAP_BITS, i);
+			}
+			spin_unlock(&vb->lock);
+		}
+		rcu_read_unlock();
+	}
+
+	__purge_vmap_area_lazy(&start, &end, 1, flush);
+}
+EXPORT_SYMBOL_GPL(vm_unmap_aliases);
+
+/**
+ * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
+ * @mem: the pointer returned by vm_map_ram
+ * @count: the count passed to that vm_map_ram call (cannot unmap partial)
+ */
+void vm_unmap_ram(const void *mem, unsigned int count)
+{
+	unsigned long size = count << PAGE_SHIFT;
+	unsigned long addr = (unsigned long)mem;
+
+	BUG_ON(!addr);
+	BUG_ON(addr < VMALLOC_START);
+	BUG_ON(addr > VMALLOC_END);
+	BUG_ON(addr & (PAGE_SIZE-1));
+
+	debug_check_no_locks_freed(mem, size);
+
+	if (likely(count <= VMAP_MAX_ALLOC))
+		vb_free(mem, size);
+	else
+		free_unmap_vmap_area_addr(addr);
+}
+EXPORT_SYMBOL(vm_unmap_ram);
+
+/**
+ * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
+ * @pages: an array of pointers to the pages to be mapped
+ * @count: number of pages
+ * @node: prefer to allocate data structures on this node
+ * @prot: memory protection to use. PAGE_KERNEL for regular RAM
+ * @returns: a pointer to the address that has been mapped, or NULL on failure
+ */
+void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
+{
+	unsigned long size = count << PAGE_SHIFT;
+	unsigned long addr;
+	void *mem;
+
+	if (likely(count <= VMAP_MAX_ALLOC)) {
+		mem = vb_alloc(size, GFP_KERNEL);
+		if (IS_ERR(mem))
+			return NULL;
+		addr = (unsigned long)mem;
+	} else {
+		struct vmap_area *va;
+		va = alloc_vmap_area(size, PAGE_SIZE,
+				VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
+		if (IS_ERR(va))
+			return NULL;
+
+		addr = va->va_start;
+		mem = (void *)addr;
+	}
+	if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
+		vm_unmap_ram(mem, count);
+		return NULL;
+	}
+	return mem;
+}
+EXPORT_SYMBOL(vm_map_ram);
+
+void __init vmalloc_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct vmap_block_queue *vbq;
+
+		vbq = &per_cpu(vmap_block_queue, i);
+		spin_lock_init(&vbq->lock);
+		INIT_LIST_HEAD(&vbq->free);
+		INIT_LIST_HEAD(&vbq->dirty);
+		vbq->nr_dirty = 0;
+	}
+}
+
+void unmap_kernel_range(unsigned long addr, unsigned long size)
+{
+	unsigned long end = addr + size;
+	vunmap_page_range(addr, end);
+	flush_tlb_kernel_range(addr, end);
+}
+
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+{
+	unsigned long addr = (unsigned long)area->addr;
+	unsigned long end = addr + area->size - PAGE_SIZE;
+	int err;
+
+	err = vmap_page_range(addr, end, prot, *pages);
+	if (err > 0) {
+		*pages += err;
+		err = 0;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(map_vm_area);
+
+/*** Old vmalloc interfaces ***/
+DEFINE_RWLOCK(vmlist_lock);
+struct vm_struct *vmlist;
+
+static struct vm_struct *__get_vm_area_node(unsigned long size,
+		unsigned long flags, unsigned long start, unsigned long end,
+		int node, gfp_t gfp_mask, void *caller)
+{
+	static struct vmap_area *va;
+	struct vm_struct *area;
+	struct vm_struct *tmp, **p;
+	unsigned long align = 1;
 
 	BUG_ON(in_interrupt());
 	if (flags & VM_IOREMAP) {
@@ -232,13 +989,12 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
 
 		align = 1ul << bit;
 	}
-	addr = ALIGN(start, align);
+
 	size = PAGE_ALIGN(size);
 	if (unlikely(!size))
 		return NULL;
 
 	area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
-
 	if (unlikely(!area))
 		return NULL;
 
@@ -247,48 +1003,32 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
 	 */
 	size += PAGE_SIZE;
 
-	write_lock(&vmlist_lock);
-	for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
-		if ((unsigned long)tmp->addr < addr) {
-			if((unsigned long)tmp->addr + tmp->size >= addr)
-				addr = ALIGN(tmp->size + 
-					     (unsigned long)tmp->addr, align);
-			continue;
-		}
-		if ((size + addr) < addr)
-			goto out;
-		if (size + addr <= (unsigned long)tmp->addr)
-			goto found;
-		addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
-		if (addr > end - size)
-			goto out;
+	va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
+	if (IS_ERR(va)) {
+		kfree(area);
+		return NULL;
 	}
-	if ((size + addr) < addr)
-		goto out;
-	if (addr > end - size)
-		goto out;
-
-found:
-	area->next = *p;
-	*p = area;
 
 	area->flags = flags;
-	area->addr = (void *)addr;
+	area->addr = (void *)va->va_start;
 	area->size = size;
 	area->pages = NULL;
 	area->nr_pages = 0;
 	area->phys_addr = 0;
 	area->caller = caller;
+	va->private = area;
+	va->flags |= VM_VM_AREA;
+
+	write_lock(&vmlist_lock);
+	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
+		if (tmp->addr >= area->addr)
+			break;
+	}
+	area->next = *p;
+	*p = area;
 	write_unlock(&vmlist_lock);
 
 	return area;
-
-out:
-	write_unlock(&vmlist_lock);
-	kfree(area);
-	if (printk_ratelimit())
-		printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
-	return NULL;
 }
 
 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
@@ -328,39 +1068,15 @@ struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
 				  gfp_mask, __builtin_return_address(0));
 }
 
-/* Caller must hold vmlist_lock */
-static struct vm_struct *__find_vm_area(const void *addr)
+static struct vm_struct *find_vm_area(const void *addr)
 {
-	struct vm_struct *tmp;
-
-	for (tmp = vmlist; tmp != NULL; tmp = tmp->next) {
-		 if (tmp->addr == addr)
-			break;
-	}
-
-	return tmp;
-}
+	struct vmap_area *va;
 
-/* Caller must hold vmlist_lock */
-static struct vm_struct *__remove_vm_area(const void *addr)
-{
-	struct vm_struct **p, *tmp;
+	va = find_vmap_area((unsigned long)addr);
+	if (va && va->flags & VM_VM_AREA)
+		return va->private;
 
-	for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
-		 if (tmp->addr == addr)
-			 goto found;
-	}
 	return NULL;
-
-found:
-	unmap_vm_area(tmp);
-	*p = tmp->next;
-
-	/*
-	 * Remove the guard page.
-	 */
-	tmp->size -= PAGE_SIZE;
-	return tmp;
 }
 
 /**
@@ -373,11 +1089,24 @@ found:
  */
 struct vm_struct *remove_vm_area(const void *addr)
 {
-	struct vm_struct *v;
-	write_lock(&vmlist_lock);
-	v = __remove_vm_area(addr);
-	write_unlock(&vmlist_lock);
-	return v;
+	struct vmap_area *va;
+
+	va = find_vmap_area((unsigned long)addr);
+	if (va && va->flags & VM_VM_AREA) {
+		struct vm_struct *vm = va->private;
+		struct vm_struct *tmp, **p;
+		free_unmap_vmap_area(va);
+		vm->size -= PAGE_SIZE;
+
+		write_lock(&vmlist_lock);
+		for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
+			;
+		*p = tmp->next;
+		write_unlock(&vmlist_lock);
+
+		return vm;
+	}
+	return NULL;
 }
 
 static void __vunmap(const void *addr, int deallocate_pages)
@@ -487,6 +1216,8 @@ void *vmap(struct page **pages, unsigned int count,
 }
 EXPORT_SYMBOL(vmap);
 
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+			    int node, void *caller);
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, int node, void *caller)
 {
@@ -613,10 +1344,8 @@ void *vmalloc_user(unsigned long size)
 
 	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
 	if (ret) {
-		write_lock(&vmlist_lock);
-		area = __find_vm_area(ret);
+		area = find_vm_area(ret);
 		area->flags |= VM_USERMAP;
-		write_unlock(&vmlist_lock);
 	}
 	return ret;
 }
@@ -696,10 +1425,8 @@ void *vmalloc_32_user(unsigned long size)
 
 	ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL);
 	if (ret) {
-		write_lock(&vmlist_lock);
-		area = __find_vm_area(ret);
+		area = find_vm_area(ret);
 		area->flags |= VM_USERMAP;
-		write_unlock(&vmlist_lock);
 	}
 	return ret;
 }
@@ -800,26 +1527,25 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 	struct vm_struct *area;
 	unsigned long uaddr = vma->vm_start;
 	unsigned long usize = vma->vm_end - vma->vm_start;
-	int ret;
 
 	if ((PAGE_SIZE-1) & (unsigned long)addr)
 		return -EINVAL;
 
-	read_lock(&vmlist_lock);
-	area = __find_vm_area(addr);
+	area = find_vm_area(addr);
 	if (!area)
-		goto out_einval_locked;
+		return -EINVAL;
 
 	if (!(area->flags & VM_USERMAP))
-		goto out_einval_locked;
+		return -EINVAL;
 
 	if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
-		goto out_einval_locked;
-	read_unlock(&vmlist_lock);
+		return -EINVAL;
 
 	addr += pgoff << PAGE_SHIFT;
 	do {
 		struct page *page = vmalloc_to_page(addr);
+		int ret;
+
 		ret = vm_insert_page(vma, uaddr, page);
 		if (ret)
 			return ret;
@@ -832,11 +1558,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 	/* Prevent "things" like memory migration? VM_flags need a cleanup... */
 	vma->vm_flags |= VM_RESERVED;
 
-	return ret;
-
-out_einval_locked:
-	read_unlock(&vmlist_lock);
-	return -EINVAL;
+	return 0;
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ff1a58e7c1..3b5860294bb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -39,6 +39,7 @@
 #include <linux/freezer.h>
 #include <linux/memcontrol.h>
 #include <linux/delayacct.h>
+#include <linux/sysctl.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -78,7 +79,7 @@ struct scan_control {
 	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 			unsigned long *scanned, int order, int mode,
 			struct zone *z, struct mem_cgroup *mem_cont,
-			int active);
+			int active, int file);
 };
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -470,6 +471,85 @@ int remove_mapping(struct address_space *mapping, struct page *page)
 	return 0;
 }
 
+/**
+ * putback_lru_page - put previously isolated page onto appropriate LRU list
+ * @page: page to be put back to appropriate lru list
+ *
+ * Add previously isolated @page to appropriate LRU list.
+ * Page may still be unevictable for other reasons.
+ *
+ * lru_lock must not be held, interrupts must be enabled.
+ */
+#ifdef CONFIG_UNEVICTABLE_LRU
+void putback_lru_page(struct page *page)
+{
+	int lru;
+	int active = !!TestClearPageActive(page);
+	int was_unevictable = PageUnevictable(page);
+
+	VM_BUG_ON(PageLRU(page));
+
+redo:
+	ClearPageUnevictable(page);
+
+	if (page_evictable(page, NULL)) {
+		/*
+		 * For evictable pages, we can use the cache.
+		 * In event of a race, worst case is we end up with an
+		 * unevictable page on [in]active list.
+		 * We know how to handle that.
+		 */
+		lru = active + page_is_file_cache(page);
+		lru_cache_add_lru(page, lru);
+	} else {
+		/*
+		 * Put unevictable pages directly on zone's unevictable
+		 * list.
+		 */
+		lru = LRU_UNEVICTABLE;
+		add_page_to_unevictable_list(page);
+	}
+	mem_cgroup_move_lists(page, lru);
+
+	/*
+	 * page's status can change while we move it among lru. If an evictable
+	 * page is on unevictable list, it never be freed. To avoid that,
+	 * check after we added it to the list, again.
+	 */
+	if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
+		if (!isolate_lru_page(page)) {
+			put_page(page);
+			goto redo;
+		}
+		/* This means someone else dropped this page from LRU
+		 * So, it will be freed or putback to LRU again. There is
+		 * nothing to do here.
+		 */
+	}
+
+	if (was_unevictable && lru != LRU_UNEVICTABLE)
+		count_vm_event(UNEVICTABLE_PGRESCUED);
+	else if (!was_unevictable && lru == LRU_UNEVICTABLE)
+		count_vm_event(UNEVICTABLE_PGCULLED);
+
+	put_page(page);		/* drop ref from isolate */
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+
+void putback_lru_page(struct page *page)
+{
+	int lru;
+	VM_BUG_ON(PageLRU(page));
+
+	lru = !!TestClearPageActive(page) + page_is_file_cache(page);
+	lru_cache_add_lru(page, lru);
+	mem_cgroup_move_lists(page, lru);
+	put_page(page);
+}
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
+
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -503,6 +583,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
 		sc->nr_scanned++;
 
+		if (unlikely(!page_evictable(page, NULL)))
+			goto cull_mlocked;
+
 		if (!sc->may_swap && page_mapped(page))
 			goto keep_locked;
 
@@ -539,9 +622,19 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 * Anonymous process memory has backing store?
 		 * Try to allocate it some swap space here.
 		 */
-		if (PageAnon(page) && !PageSwapCache(page))
+		if (PageAnon(page) && !PageSwapCache(page)) {
+			switch (try_to_munlock(page)) {
+			case SWAP_FAIL:		/* shouldn't happen */
+			case SWAP_AGAIN:
+				goto keep_locked;
+			case SWAP_MLOCK:
+				goto cull_mlocked;
+			case SWAP_SUCCESS:
+				; /* fall thru'; add to swap cache */
+			}
 			if (!add_to_swap(page, GFP_ATOMIC))
 				goto activate_locked;
+		}
 #endif /* CONFIG_SWAP */
 
 		mapping = page_mapping(page);
@@ -556,6 +649,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				goto activate_locked;
 			case SWAP_AGAIN:
 				goto keep_locked;
+			case SWAP_MLOCK:
+				goto cull_mlocked;
 			case SWAP_SUCCESS:
 				; /* try to free the page below */
 			}
@@ -602,7 +697,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 * possible for a page to have PageDirty set, but it is actually
 		 * clean (all its buffers are clean).  This happens if the
 		 * buffers were written out directly, with submit_bh(). ext3
-		 * will do this, as well as the blockdev mapping. 
+		 * will do this, as well as the blockdev mapping.
 		 * try_to_release_page() will discover that cleanness and will
 		 * drop the buffers and mark the page clean - it can be freed.
 		 *
@@ -637,7 +732,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (!mapping || !__remove_mapping(mapping, page))
 			goto keep_locked;
 
-		unlock_page(page);
+		/*
+		 * At this point, we have no other references and there is
+		 * no way to pick any more up (removed from LRU, removed
+		 * from pagecache). Can use non-atomic bitops now (and
+		 * we obviously don't have to worry about waking up a process
+		 * waiting on the page lock, because there are no references.
+		 */
+		__clear_page_locked(page);
 free_it:
 		nr_reclaimed++;
 		if (!pagevec_add(&freed_pvec, page)) {
@@ -646,14 +748,23 @@ free_it:
 		}
 		continue;
 
+cull_mlocked:
+		unlock_page(page);
+		putback_lru_page(page);
+		continue;
+
 activate_locked:
+		/* Not a candidate for swapping, so reclaim swap space. */
+		if (PageSwapCache(page) && vm_swap_full())
+			remove_exclusive_swap_page_ref(page);
+		VM_BUG_ON(PageActive(page));
 		SetPageActive(page);
 		pgactivate++;
 keep_locked:
 		unlock_page(page);
 keep:
 		list_add(&page->lru, &ret_pages);
-		VM_BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
 	}
 	list_splice(&ret_pages, page_list);
 	if (pagevec_count(&freed_pvec))
@@ -677,7 +788,7 @@ keep:
  *
  * returns 0 on success, -ve errno on failure.
  */
-int __isolate_lru_page(struct page *page, int mode)
+int __isolate_lru_page(struct page *page, int mode, int file)
 {
 	int ret = -EINVAL;
 
@@ -693,6 +804,17 @@ int __isolate_lru_page(struct page *page, int mode)
 	if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
 		return ret;
 
+	if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
+		return ret;
+
+	/*
+	 * When this function is being called for lumpy reclaim, we
+	 * initially look into all LRU pages, active, inactive and
+	 * unevictable; only give shrink_page_list evictable pages.
+	 */
+	if (PageUnevictable(page))
+		return ret;
+
 	ret = -EBUSY;
 	if (likely(get_page_unless_zero(page))) {
 		/*
@@ -723,12 +845,13 @@ int __isolate_lru_page(struct page *page, int mode)
  * @scanned:	The number of pages that were scanned.
  * @order:	The caller's attempted allocation order
  * @mode:	One of the LRU isolation modes
+ * @file:	True [1] if isolating file [!anon] pages
  *
  * returns how many pages were moved onto *@dst.
  */
 static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 		struct list_head *src, struct list_head *dst,
-		unsigned long *scanned, int order, int mode)
+		unsigned long *scanned, int order, int mode, int file)
 {
 	unsigned long nr_taken = 0;
 	unsigned long scan;
@@ -745,7 +868,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		VM_BUG_ON(!PageLRU(page));
 
-		switch (__isolate_lru_page(page, mode)) {
+		switch (__isolate_lru_page(page, mode, file)) {
 		case 0:
 			list_move(&page->lru, dst);
 			nr_taken++;
@@ -788,10 +911,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 				break;
 
 			cursor_page = pfn_to_page(pfn);
+
 			/* Check that we have not crossed a zone boundary. */
 			if (unlikely(page_zone_id(cursor_page) != zone_id))
 				continue;
-			switch (__isolate_lru_page(cursor_page, mode)) {
+			switch (__isolate_lru_page(cursor_page, mode, file)) {
 			case 0:
 				list_move(&cursor_page->lru, dst);
 				nr_taken++;
@@ -802,7 +926,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 				/* else it is being freed elsewhere */
 				list_move(&cursor_page->lru, src);
 			default:
-				break;
+				break;	/* ! on LRU or wrong list */
 			}
 		}
 	}
@@ -816,40 +940,93 @@ static unsigned long isolate_pages_global(unsigned long nr,
 					unsigned long *scanned, int order,
 					int mode, struct zone *z,
 					struct mem_cgroup *mem_cont,
-					int active)
+					int active, int file)
 {
+	int lru = LRU_BASE;
 	if (active)
-		return isolate_lru_pages(nr, &z->active_list, dst,
-						scanned, order, mode);
-	else
-		return isolate_lru_pages(nr, &z->inactive_list, dst,
-						scanned, order, mode);
+		lru += LRU_ACTIVE;
+	if (file)
+		lru += LRU_FILE;
+	return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
+								mode, !!file);
 }
 
 /*
  * clear_active_flags() is a helper for shrink_active_list(), clearing
  * any active bits from the pages in the list.
  */
-static unsigned long clear_active_flags(struct list_head *page_list)
+static unsigned long clear_active_flags(struct list_head *page_list,
+					unsigned int *count)
 {
 	int nr_active = 0;
+	int lru;
 	struct page *page;
 
-	list_for_each_entry(page, page_list, lru)
+	list_for_each_entry(page, page_list, lru) {
+		lru = page_is_file_cache(page);
 		if (PageActive(page)) {
+			lru += LRU_ACTIVE;
 			ClearPageActive(page);
 			nr_active++;
 		}
+		count[lru]++;
+	}
 
 	return nr_active;
 }
 
+/**
+ * isolate_lru_page - tries to isolate a page from its LRU list
+ * @page: page to isolate from its LRU list
+ *
+ * Isolates a @page from an LRU list, clears PageLRU and adjusts the
+ * vmstat statistic corresponding to whatever LRU list the page was on.
+ *
+ * Returns 0 if the page was removed from an LRU list.
+ * Returns -EBUSY if the page was not on an LRU list.
+ *
+ * The returned page will have PageLRU() cleared.  If it was found on
+ * the active list, it will have PageActive set.  If it was found on
+ * the unevictable list, it will have the PageUnevictable bit set. That flag
+ * may need to be cleared by the caller before letting the page go.
+ *
+ * The vmstat statistic corresponding to the list on which the page was
+ * found will be decremented.
+ *
+ * Restrictions:
+ * (1) Must be called with an elevated refcount on the page. This is a
+ *     fundamentnal difference from isolate_lru_pages (which is called
+ *     without a stable reference).
+ * (2) the lru_lock must not be held.
+ * (3) interrupts must be enabled.
+ */
+int isolate_lru_page(struct page *page)
+{
+	int ret = -EBUSY;
+
+	if (PageLRU(page)) {
+		struct zone *zone = page_zone(page);
+
+		spin_lock_irq(&zone->lru_lock);
+		if (PageLRU(page) && get_page_unless_zero(page)) {
+			int lru = page_lru(page);
+			ret = 0;
+			ClearPageLRU(page);
+
+			del_page_from_lru_list(zone, page, lru);
+		}
+		spin_unlock_irq(&zone->lru_lock);
+	}
+	return ret;
+}
+
 /*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
  */
 static unsigned long shrink_inactive_list(unsigned long max_scan,
-				struct zone *zone, struct scan_control *sc)
+			struct zone *zone, struct scan_control *sc,
+			int priority, int file)
 {
 	LIST_HEAD(page_list);
 	struct pagevec pvec;
@@ -866,20 +1043,43 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		unsigned long nr_scan;
 		unsigned long nr_freed;
 		unsigned long nr_active;
+		unsigned int count[NR_LRU_LISTS] = { 0, };
+		int mode = ISOLATE_INACTIVE;
+
+		/*
+		 * If we need a large contiguous chunk of memory, or have
+		 * trouble getting a small set of contiguous pages, we
+		 * will reclaim both active and inactive pages.
+		 *
+		 * We use the same threshold as pageout congestion_wait below.
+		 */
+		if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+			mode = ISOLATE_BOTH;
+		else if (sc->order && priority < DEF_PRIORITY - 2)
+			mode = ISOLATE_BOTH;
 
 		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
-			     &page_list, &nr_scan, sc->order,
-			     (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
-					     ISOLATE_BOTH : ISOLATE_INACTIVE,
-				zone, sc->mem_cgroup, 0);
-		nr_active = clear_active_flags(&page_list);
+			     &page_list, &nr_scan, sc->order, mode,
+				zone, sc->mem_cgroup, 0, file);
+		nr_active = clear_active_flags(&page_list, count);
 		__count_vm_events(PGDEACTIVATE, nr_active);
 
-		__mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
-		__mod_zone_page_state(zone, NR_INACTIVE,
-						-(nr_taken - nr_active));
-		if (scan_global_lru(sc))
+		__mod_zone_page_state(zone, NR_ACTIVE_FILE,
+						-count[LRU_ACTIVE_FILE]);
+		__mod_zone_page_state(zone, NR_INACTIVE_FILE,
+						-count[LRU_INACTIVE_FILE]);
+		__mod_zone_page_state(zone, NR_ACTIVE_ANON,
+						-count[LRU_ACTIVE_ANON]);
+		__mod_zone_page_state(zone, NR_INACTIVE_ANON,
+						-count[LRU_INACTIVE_ANON]);
+
+		if (scan_global_lru(sc)) {
 			zone->pages_scanned += nr_scan;
+			zone->recent_scanned[0] += count[LRU_INACTIVE_ANON];
+			zone->recent_scanned[0] += count[LRU_ACTIVE_ANON];
+			zone->recent_scanned[1] += count[LRU_INACTIVE_FILE];
+			zone->recent_scanned[1] += count[LRU_ACTIVE_FILE];
+		}
 		spin_unlock_irq(&zone->lru_lock);
 
 		nr_scanned += nr_scan;
@@ -899,7 +1099,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 			 * The attempt at page out may have made some
 			 * of the pages active, mark them inactive again.
 			 */
-			nr_active = clear_active_flags(&page_list);
+			nr_active = clear_active_flags(&page_list, count);
 			count_vm_events(PGDEACTIVATE, nr_active);
 
 			nr_freed += shrink_page_list(&page_list, sc,
@@ -924,14 +1124,24 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		 * Put back any unfreeable pages.
 		 */
 		while (!list_empty(&page_list)) {
+			int lru;
 			page = lru_to_page(&page_list);
 			VM_BUG_ON(PageLRU(page));
-			SetPageLRU(page);
 			list_del(&page->lru);
-			if (PageActive(page))
-				add_page_to_active_list(zone, page);
-			else
-				add_page_to_inactive_list(zone, page);
+			if (unlikely(!page_evictable(page, NULL))) {
+				spin_unlock_irq(&zone->lru_lock);
+				putback_lru_page(page);
+				spin_lock_irq(&zone->lru_lock);
+				continue;
+			}
+			SetPageLRU(page);
+			lru = page_lru(page);
+			add_page_to_lru_list(zone, page, lru);
+			mem_cgroup_move_lists(page, lru);
+			if (PageActive(page) && scan_global_lru(sc)) {
+				int file = !!page_is_file_cache(page);
+				zone->recent_rotated[file]++;
+			}
 			if (!pagevec_add(&pvec, page)) {
 				spin_unlock_irq(&zone->lru_lock);
 				__pagevec_release(&pvec);
@@ -962,115 +1172,7 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
 
 static inline int zone_is_near_oom(struct zone *zone)
 {
-	return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
-				+ zone_page_state(zone, NR_INACTIVE))*3;
-}
-
-/*
- * Determine we should try to reclaim mapped pages.
- * This is called only when sc->mem_cgroup is NULL.
- */
-static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
-				int priority)
-{
-	long mapped_ratio;
-	long distress;
-	long swap_tendency;
-	long imbalance;
-	int reclaim_mapped = 0;
-	int prev_priority;
-
-	if (scan_global_lru(sc) && zone_is_near_oom(zone))
-		return 1;
-	/*
-	 * `distress' is a measure of how much trouble we're having
-	 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
-	 */
-	if (scan_global_lru(sc))
-		prev_priority = zone->prev_priority;
-	else
-		prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup);
-
-	distress = 100 >> min(prev_priority, priority);
-
-	/*
-	 * The point of this algorithm is to decide when to start
-	 * reclaiming mapped memory instead of just pagecache.  Work out
-	 * how much memory
-	 * is mapped.
-	 */
-	if (scan_global_lru(sc))
-		mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
-				global_page_state(NR_ANON_PAGES)) * 100) /
-					vm_total_pages;
-	else
-		mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup);
-
-	/*
-	 * Now decide how much we really want to unmap some pages.  The
-	 * mapped ratio is downgraded - just because there's a lot of
-	 * mapped memory doesn't necessarily mean that page reclaim
-	 * isn't succeeding.
-	 *
-	 * The distress ratio is important - we don't want to start
-	 * going oom.
-	 *
-	 * A 100% value of vm_swappiness overrides this algorithm
-	 * altogether.
-	 */
-	swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
-
-	/*
-	 * If there's huge imbalance between active and inactive
-	 * (think active 100 times larger than inactive) we should
-	 * become more permissive, or the system will take too much
-	 * cpu before it start swapping during memory pressure.
-	 * Distress is about avoiding early-oom, this is about
-	 * making swappiness graceful despite setting it to low
-	 * values.
-	 *
-	 * Avoid div by zero with nr_inactive+1, and max resulting
-	 * value is vm_total_pages.
-	 */
-	if (scan_global_lru(sc)) {
-		imbalance  = zone_page_state(zone, NR_ACTIVE);
-		imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
-	} else
-		imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup);
-
-	/*
-	 * Reduce the effect of imbalance if swappiness is low,
-	 * this means for a swappiness very low, the imbalance
-	 * must be much higher than 100 for this logic to make
-	 * the difference.
-	 *
-	 * Max temporary value is vm_total_pages*100.
-	 */
-	imbalance *= (vm_swappiness + 1);
-	imbalance /= 100;
-
-	/*
-	 * If not much of the ram is mapped, makes the imbalance
-	 * less relevant, it's high priority we refill the inactive
-	 * list with mapped pages only in presence of high ratio of
-	 * mapped pages.
-	 *
-	 * Max temporary value is vm_total_pages*100.
-	 */
-	imbalance *= mapped_ratio;
-	imbalance /= 100;
-
-	/* apply imbalance feedback to swap_tendency */
-	swap_tendency += imbalance;
-
-	/*
-	 * Now use this metric to decide whether to start moving mapped
-	 * memory onto the inactive list.
-	 */
-	if (swap_tendency >= 100)
-		reclaim_mapped = 1;
-
-	return reclaim_mapped;
+	return zone->pages_scanned >= (zone_lru_pages(zone) * 3);
 }
 
 /*
@@ -1093,53 +1195,71 @@ static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
 
 
 static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
-				struct scan_control *sc, int priority)
+			struct scan_control *sc, int priority, int file)
 {
 	unsigned long pgmoved;
 	int pgdeactivate = 0;
 	unsigned long pgscanned;
 	LIST_HEAD(l_hold);	/* The pages which were snipped off */
-	LIST_HEAD(l_inactive);	/* Pages to go onto the inactive_list */
-	LIST_HEAD(l_active);	/* Pages to go onto the active_list */
+	LIST_HEAD(l_inactive);
 	struct page *page;
 	struct pagevec pvec;
-	int reclaim_mapped = 0;
-
-	if (sc->may_swap)
-		reclaim_mapped = calc_reclaim_mapped(sc, zone, priority);
+	enum lru_list lru;
 
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
 	pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
 					ISOLATE_ACTIVE, zone,
-					sc->mem_cgroup, 1);
+					sc->mem_cgroup, 1, file);
 	/*
 	 * zone->pages_scanned is used for detect zone's oom
 	 * mem_cgroup remembers nr_scan by itself.
 	 */
-	if (scan_global_lru(sc))
+	if (scan_global_lru(sc)) {
 		zone->pages_scanned += pgscanned;
+		zone->recent_scanned[!!file] += pgmoved;
+	}
 
-	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
+	if (file)
+		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
+	else
+		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
 	spin_unlock_irq(&zone->lru_lock);
 
+	pgmoved = 0;
 	while (!list_empty(&l_hold)) {
 		cond_resched();
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
-		if (page_mapped(page)) {
-			if (!reclaim_mapped ||
-			    (total_swap_pages == 0 && PageAnon(page)) ||
-			    page_referenced(page, 0, sc->mem_cgroup)) {
-				list_add(&page->lru, &l_active);
-				continue;
-			}
+
+		if (unlikely(!page_evictable(page, NULL))) {
+			putback_lru_page(page);
+			continue;
 		}
+
+		/* page_referenced clears PageReferenced */
+		if (page_mapping_inuse(page) &&
+		    page_referenced(page, 0, sc->mem_cgroup))
+			pgmoved++;
+
 		list_add(&page->lru, &l_inactive);
 	}
 
+	/*
+	 * Count referenced pages from currently used mappings as
+	 * rotated, even though they are moved to the inactive list.
+	 * This helps balance scan pressure between file and anonymous
+	 * pages in get_scan_ratio.
+	 */
+	zone->recent_rotated[!!file] += pgmoved;
+
+	/*
+	 * Move the pages to the [file or anon] inactive list.
+	 */
 	pagevec_init(&pvec, 1);
+
 	pgmoved = 0;
+	lru = LRU_BASE + file * LRU_FILE;
 	spin_lock_irq(&zone->lru_lock);
 	while (!list_empty(&l_inactive)) {
 		page = lru_to_page(&l_inactive);
@@ -1149,11 +1269,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		VM_BUG_ON(!PageActive(page));
 		ClearPageActive(page);
 
-		list_move(&page->lru, &zone->inactive_list);
-		mem_cgroup_move_lists(page, false);
+		list_move(&page->lru, &zone->lru[lru].list);
+		mem_cgroup_move_lists(page, lru);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
-			__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
+			__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
 			spin_unlock_irq(&zone->lru_lock);
 			pgdeactivate += pgmoved;
 			pgmoved = 0;
@@ -1163,104 +1283,189 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			spin_lock_irq(&zone->lru_lock);
 		}
 	}
-	__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
 	pgdeactivate += pgmoved;
 	if (buffer_heads_over_limit) {
 		spin_unlock_irq(&zone->lru_lock);
 		pagevec_strip(&pvec);
 		spin_lock_irq(&zone->lru_lock);
 	}
-
-	pgmoved = 0;
-	while (!list_empty(&l_active)) {
-		page = lru_to_page(&l_active);
-		prefetchw_prev_lru_page(page, &l_active, flags);
-		VM_BUG_ON(PageLRU(page));
-		SetPageLRU(page);
-		VM_BUG_ON(!PageActive(page));
-
-		list_move(&page->lru, &zone->active_list);
-		mem_cgroup_move_lists(page, true);
-		pgmoved++;
-		if (!pagevec_add(&pvec, page)) {
-			__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
-			pgmoved = 0;
-			spin_unlock_irq(&zone->lru_lock);
-			__pagevec_release(&pvec);
-			spin_lock_irq(&zone->lru_lock);
-		}
-	}
-	__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
-
 	__count_zone_vm_events(PGREFILL, zone, pgscanned);
 	__count_vm_events(PGDEACTIVATE, pgdeactivate);
 	spin_unlock_irq(&zone->lru_lock);
+	if (vm_swap_full())
+		pagevec_swap_free(&pvec);
 
 	pagevec_release(&pvec);
 }
 
+static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+	struct zone *zone, struct scan_control *sc, int priority)
+{
+	int file = is_file_lru(lru);
+
+	if (lru == LRU_ACTIVE_FILE) {
+		shrink_active_list(nr_to_scan, zone, sc, priority, file);
+		return 0;
+	}
+
+	if (lru == LRU_ACTIVE_ANON &&
+	    (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
+		shrink_active_list(nr_to_scan, zone, sc, priority, file);
+		return 0;
+	}
+	return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
+}
+
+/*
+ * Determine how aggressively the anon and file LRU lists should be
+ * scanned.  The relative value of each set of LRU lists is determined
+ * by looking at the fraction of the pages scanned we did rotate back
+ * onto the active list instead of evict.
+ *
+ * percent[0] specifies how much pressure to put on ram/swap backed
+ * memory, while percent[1] determines pressure on the file LRUs.
+ */
+static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
+					unsigned long *percent)
+{
+	unsigned long anon, file, free;
+	unsigned long anon_prio, file_prio;
+	unsigned long ap, fp;
+
+	anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
+		zone_page_state(zone, NR_INACTIVE_ANON);
+	file  = zone_page_state(zone, NR_ACTIVE_FILE) +
+		zone_page_state(zone, NR_INACTIVE_FILE);
+	free  = zone_page_state(zone, NR_FREE_PAGES);
+
+	/* If we have no swap space, do not bother scanning anon pages. */
+	if (nr_swap_pages <= 0) {
+		percent[0] = 0;
+		percent[1] = 100;
+		return;
+	}
+
+	/* If we have very few page cache pages, force-scan anon pages. */
+	if (unlikely(file + free <= zone->pages_high)) {
+		percent[0] = 100;
+		percent[1] = 0;
+		return;
+	}
+
+	/*
+	 * OK, so we have swap space and a fair amount of page cache
+	 * pages.  We use the recently rotated / recently scanned
+	 * ratios to determine how valuable each cache is.
+	 *
+	 * Because workloads change over time (and to avoid overflow)
+	 * we keep these statistics as a floating average, which ends
+	 * up weighing recent references more than old ones.
+	 *
+	 * anon in [0], file in [1]
+	 */
+	if (unlikely(zone->recent_scanned[0] > anon / 4)) {
+		spin_lock_irq(&zone->lru_lock);
+		zone->recent_scanned[0] /= 2;
+		zone->recent_rotated[0] /= 2;
+		spin_unlock_irq(&zone->lru_lock);
+	}
+
+	if (unlikely(zone->recent_scanned[1] > file / 4)) {
+		spin_lock_irq(&zone->lru_lock);
+		zone->recent_scanned[1] /= 2;
+		zone->recent_rotated[1] /= 2;
+		spin_unlock_irq(&zone->lru_lock);
+	}
+
+	/*
+	 * With swappiness at 100, anonymous and file have the same priority.
+	 * This scanning priority is essentially the inverse of IO cost.
+	 */
+	anon_prio = sc->swappiness;
+	file_prio = 200 - sc->swappiness;
+
+	/*
+	 *                  anon       recent_rotated[0]
+	 * %anon = 100 * ----------- / ----------------- * IO cost
+	 *               anon + file      rotate_sum
+	 */
+	ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
+	ap /= zone->recent_rotated[0] + 1;
+
+	fp = (file_prio + 1) * (zone->recent_scanned[1] + 1);
+	fp /= zone->recent_rotated[1] + 1;
+
+	/* Normalize to percentages */
+	percent[0] = 100 * ap / (ap + fp + 1);
+	percent[1] = 100 - percent[0];
+}
+
+
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
 static unsigned long shrink_zone(int priority, struct zone *zone,
 				struct scan_control *sc)
 {
-	unsigned long nr_active;
-	unsigned long nr_inactive;
+	unsigned long nr[NR_LRU_LISTS];
 	unsigned long nr_to_scan;
 	unsigned long nr_reclaimed = 0;
+	unsigned long percent[2];	/* anon @ 0; file @ 1 */
+	enum lru_list l;
 
-	if (scan_global_lru(sc)) {
-		/*
-		 * Add one to nr_to_scan just to make sure that the kernel
-		 * will slowly sift through the active list.
-		 */
-		zone->nr_scan_active +=
-			(zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
-		nr_active = zone->nr_scan_active;
-		zone->nr_scan_inactive +=
-			(zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
-		nr_inactive = zone->nr_scan_inactive;
-		if (nr_inactive >= sc->swap_cluster_max)
-			zone->nr_scan_inactive = 0;
-		else
-			nr_inactive = 0;
-
-		if (nr_active >= sc->swap_cluster_max)
-			zone->nr_scan_active = 0;
-		else
-			nr_active = 0;
-	} else {
-		/*
-		 * This reclaim occurs not because zone memory shortage but
-		 * because memory controller hits its limit.
-		 * Then, don't modify zone reclaim related data.
-		 */
-		nr_active = mem_cgroup_calc_reclaim_active(sc->mem_cgroup,
-					zone, priority);
-
-		nr_inactive = mem_cgroup_calc_reclaim_inactive(sc->mem_cgroup,
-					zone, priority);
-	}
+	get_scan_ratio(zone, sc, percent);
 
+	for_each_evictable_lru(l) {
+		if (scan_global_lru(sc)) {
+			int file = is_file_lru(l);
+			int scan;
 
-	while (nr_active || nr_inactive) {
-		if (nr_active) {
-			nr_to_scan = min(nr_active,
-					(unsigned long)sc->swap_cluster_max);
-			nr_active -= nr_to_scan;
-			shrink_active_list(nr_to_scan, zone, sc, priority);
+			scan = zone_page_state(zone, NR_LRU_BASE + l);
+			if (priority) {
+				scan >>= priority;
+				scan = (scan * percent[file]) / 100;
+			}
+			zone->lru[l].nr_scan += scan;
+			nr[l] = zone->lru[l].nr_scan;
+			if (nr[l] >= sc->swap_cluster_max)
+				zone->lru[l].nr_scan = 0;
+			else
+				nr[l] = 0;
+		} else {
+			/*
+			 * This reclaim occurs not because zone memory shortage
+			 * but because memory controller hits its limit.
+			 * Don't modify zone reclaim related data.
+			 */
+			nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
+								priority, l);
 		}
+	}
 
-		if (nr_inactive) {
-			nr_to_scan = min(nr_inactive,
+	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+					nr[LRU_INACTIVE_FILE]) {
+		for_each_evictable_lru(l) {
+			if (nr[l]) {
+				nr_to_scan = min(nr[l],
 					(unsigned long)sc->swap_cluster_max);
-			nr_inactive -= nr_to_scan;
-			nr_reclaimed += shrink_inactive_list(nr_to_scan, zone,
-								sc);
+				nr[l] -= nr_to_scan;
+
+				nr_reclaimed += shrink_list(l, nr_to_scan,
+							zone, sc, priority);
+			}
 		}
 	}
 
+	/*
+	 * Even if we did not try to evict anon pages at all, we want to
+	 * rebalance the anon lru active/inactive ratio.
+	 */
+	if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
+		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+	else if (!scan_global_lru(sc))
+		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+
 	throttle_vm_writeout(sc->gfp_mask);
 	return nr_reclaimed;
 }
@@ -1321,7 +1526,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
 
 	return nr_reclaimed;
 }
- 
+
 /*
  * This is the main entry point to direct page reclaim.
  *
@@ -1364,8 +1569,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 				continue;
 
-			lru_pages += zone_page_state(zone, NR_ACTIVE)
-					+ zone_page_state(zone, NR_INACTIVE);
+			lru_pages += zone_lru_pages(zone);
 		}
 	}
 
@@ -1555,6 +1759,14 @@ loop_again:
 			    priority != DEF_PRIORITY)
 				continue;
 
+			/*
+			 * Do some background aging of the anon list, to give
+			 * pages a chance to be referenced before reclaiming.
+			 */
+			if (inactive_anon_is_low(zone))
+				shrink_active_list(SWAP_CLUSTER_MAX, zone,
+							&sc, priority, 0);
+
 			if (!zone_watermark_ok(zone, order, zone->pages_high,
 					       0, 0)) {
 				end_zone = i;
@@ -1567,8 +1779,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone_page_state(zone, NR_ACTIVE)
-					+ zone_page_state(zone, NR_INACTIVE);
+			lru_pages += zone_lru_pages(zone);
 		}
 
 		/*
@@ -1612,8 +1823,7 @@ loop_again:
 			if (zone_is_all_unreclaimable(zone))
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-				(zone_page_state(zone, NR_ACTIVE)
-				+ zone_page_state(zone, NR_INACTIVE)) * 6)
+						(zone_lru_pages(zone) * 6))
 					zone_set_flag(zone,
 						      ZONE_ALL_UNRECLAIMABLE);
 			/*
@@ -1667,7 +1877,7 @@ out:
 
 /*
  * The background pageout daemon, started as a kernel thread
- * from the init process. 
+ * from the init process.
  *
  * This basically trickles out pages so that we have _some_
  * free memory available even if there is no other activity
@@ -1761,6 +1971,14 @@ void wakeup_kswapd(struct zone *zone, int order)
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
+unsigned long global_lru_pages(void)
+{
+	return global_page_state(NR_ACTIVE_ANON)
+		+ global_page_state(NR_ACTIVE_FILE)
+		+ global_page_state(NR_INACTIVE_ANON)
+		+ global_page_state(NR_INACTIVE_FILE);
+}
+
 #ifdef CONFIG_PM
 /*
  * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
@@ -1774,6 +1992,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 {
 	struct zone *zone;
 	unsigned long nr_to_scan, ret = 0;
+	enum lru_list l;
 
 	for_each_zone(zone) {
 
@@ -1783,38 +2002,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 		if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
 			continue;
 
-		/* For pass = 0 we don't shrink the active list */
-		if (pass > 0) {
-			zone->nr_scan_active +=
-				(zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
-			if (zone->nr_scan_active >= nr_pages || pass > 3) {
-				zone->nr_scan_active = 0;
+		for_each_evictable_lru(l) {
+			/* For pass = 0, we don't shrink the active list */
+			if (pass == 0 &&
+				(l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
+				continue;
+
+			zone->lru[l].nr_scan +=
+				(zone_page_state(zone, NR_LRU_BASE + l)
+								>> prio) + 1;
+			if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
+				zone->lru[l].nr_scan = 0;
 				nr_to_scan = min(nr_pages,
-					zone_page_state(zone, NR_ACTIVE));
-				shrink_active_list(nr_to_scan, zone, sc, prio);
+					zone_page_state(zone,
+							NR_LRU_BASE + l));
+				ret += shrink_list(l, nr_to_scan, zone,
+								sc, prio);
+				if (ret >= nr_pages)
+					return ret;
 			}
 		}
-
-		zone->nr_scan_inactive +=
-			(zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
-		if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
-			zone->nr_scan_inactive = 0;
-			nr_to_scan = min(nr_pages,
-				zone_page_state(zone, NR_INACTIVE));
-			ret += shrink_inactive_list(nr_to_scan, zone, sc);
-			if (ret >= nr_pages)
-				return ret;
-		}
 	}
 
 	return ret;
 }
 
-static unsigned long count_lru_pages(void)
-{
-	return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
-}
-
 /*
  * Try to free `nr_pages' of memory, system-wide, and return the number of
  * freed pages.
@@ -1840,7 +2052,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 	current->reclaim_state = &reclaim_state;
 
-	lru_pages = count_lru_pages();
+	lru_pages = global_lru_pages();
 	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
@@ -1883,7 +2095,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 			reclaim_state.reclaimed_slab = 0;
 			shrink_slab(sc.nr_scanned, sc.gfp_mask,
-					count_lru_pages());
+					global_lru_pages());
 			ret += reclaim_state.reclaimed_slab;
 			if (ret >= nr_pages)
 				goto out;
@@ -1900,7 +2112,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 	if (!ret) {
 		do {
 			reclaim_state.reclaimed_slab = 0;
-			shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
+			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
 			ret += reclaim_state.reclaimed_slab;
 		} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
 	}
@@ -2128,3 +2340,285 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	return ret;
 }
 #endif
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * page_evictable - test whether a page is evictable
+ * @page: the page to test
+ * @vma: the VMA in which the page is or will be mapped, may be NULL
+ *
+ * Test whether page is evictable--i.e., should be placed on active/inactive
+ * lists vs unevictable list.  The vma argument is !NULL when called from the
+ * fault path to determine how to instantate a new page.
+ *
+ * Reasons page might not be evictable:
+ * (1) page's mapping marked unevictable
+ * (2) page is part of an mlocked VMA
+ *
+ */
+int page_evictable(struct page *page, struct vm_area_struct *vma)
+{
+
+	if (mapping_unevictable(page_mapping(page)))
+		return 0;
+
+	if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
+		return 0;
+
+	return 1;
+}
+
+static void show_page_path(struct page *page)
+{
+	char buf[256];
+	if (page_is_file_cache(page)) {
+		struct address_space *mapping = page->mapping;
+		struct dentry *dentry;
+		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+		spin_lock(&mapping->i_mmap_lock);
+		dentry = d_find_alias(mapping->host);
+		printk(KERN_INFO "rescued: %s %lu\n",
+		       dentry_path(dentry, buf, 256), pgoff);
+		spin_unlock(&mapping->i_mmap_lock);
+	} else {
+#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
+		struct anon_vma *anon_vma;
+		struct vm_area_struct *vma;
+
+		anon_vma = page_lock_anon_vma(page);
+		if (!anon_vma)
+			return;
+
+		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+			printk(KERN_INFO "rescued: anon %s\n",
+			       vma->vm_mm->owner->comm);
+			break;
+		}
+		page_unlock_anon_vma(anon_vma);
+#endif
+	}
+}
+
+
+/**
+ * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
+ * @page: page to check evictability and move to appropriate lru list
+ * @zone: zone page is in
+ *
+ * Checks a page for evictability and moves the page to the appropriate
+ * zone lru list.
+ *
+ * Restrictions: zone->lru_lock must be held, page must be on LRU and must
+ * have PageUnevictable set.
+ */
+static void check_move_unevictable_page(struct page *page, struct zone *zone)
+{
+	VM_BUG_ON(PageActive(page));
+
+retry:
+	ClearPageUnevictable(page);
+	if (page_evictable(page, NULL)) {
+		enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+
+		show_page_path(page);
+
+		__dec_zone_state(zone, NR_UNEVICTABLE);
+		list_move(&page->lru, &zone->lru[l].list);
+		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
+		__count_vm_event(UNEVICTABLE_PGRESCUED);
+	} else {
+		/*
+		 * rotate unevictable list
+		 */
+		SetPageUnevictable(page);
+		list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+		if (page_evictable(page, NULL))
+			goto retry;
+	}
+}
+
+/**
+ * scan_mapping_unevictable_pages - scan an address space for evictable pages
+ * @mapping: struct address_space to scan for evictable pages
+ *
+ * Scan all pages in mapping.  Check unevictable pages for
+ * evictability and move them to the appropriate zone lru list.
+ */
+void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+	pgoff_t next = 0;
+	pgoff_t end   = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
+			 PAGE_CACHE_SHIFT;
+	struct zone *zone;
+	struct pagevec pvec;
+
+	if (mapping->nrpages == 0)
+		return;
+
+	pagevec_init(&pvec, 0);
+	while (next < end &&
+		pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+		int i;
+		int pg_scanned = 0;
+
+		zone = NULL;
+
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+			pgoff_t page_index = page->index;
+			struct zone *pagezone = page_zone(page);
+
+			pg_scanned++;
+			if (page_index > next)
+				next = page_index;
+			next++;
+
+			if (pagezone != zone) {
+				if (zone)
+					spin_unlock_irq(&zone->lru_lock);
+				zone = pagezone;
+				spin_lock_irq(&zone->lru_lock);
+			}
+
+			if (PageLRU(page) && PageUnevictable(page))
+				check_move_unevictable_page(page, zone);
+		}
+		if (zone)
+			spin_unlock_irq(&zone->lru_lock);
+		pagevec_release(&pvec);
+
+		count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
+	}
+
+}
+
+/**
+ * scan_zone_unevictable_pages - check unevictable list for evictable pages
+ * @zone - zone of which to scan the unevictable list
+ *
+ * Scan @zone's unevictable LRU lists to check for pages that have become
+ * evictable.  Move those that have to @zone's inactive list where they
+ * become candidates for reclaim, unless shrink_inactive_zone() decides
+ * to reactivate them.  Pages that are still unevictable are rotated
+ * back onto @zone's unevictable list.
+ */
+#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */
+void scan_zone_unevictable_pages(struct zone *zone)
+{
+	struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
+	unsigned long scan;
+	unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
+
+	while (nr_to_scan > 0) {
+		unsigned long batch_size = min(nr_to_scan,
+						SCAN_UNEVICTABLE_BATCH_SIZE);
+
+		spin_lock_irq(&zone->lru_lock);
+		for (scan = 0;  scan < batch_size; scan++) {
+			struct page *page = lru_to_page(l_unevictable);
+
+			if (!trylock_page(page))
+				continue;
+
+			prefetchw_prev_lru_page(page, l_unevictable, flags);
+
+			if (likely(PageLRU(page) && PageUnevictable(page)))
+				check_move_unevictable_page(page, zone);
+
+			unlock_page(page);
+		}
+		spin_unlock_irq(&zone->lru_lock);
+
+		nr_to_scan -= batch_size;
+	}
+}
+
+
+/**
+ * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages
+ *
+ * A really big hammer:  scan all zones' unevictable LRU lists to check for
+ * pages that have become evictable.  Move those back to the zones'
+ * inactive list where they become candidates for reclaim.
+ * This occurs when, e.g., we have unswappable pages on the unevictable lists,
+ * and we add swap to the system.  As such, it runs in the context of a task
+ * that has possibly/probably made some previously unevictable pages
+ * evictable.
+ */
+void scan_all_zones_unevictable_pages(void)
+{
+	struct zone *zone;
+
+	for_each_zone(zone) {
+		scan_zone_unevictable_pages(zone);
+	}
+}
+
+/*
+ * scan_unevictable_pages [vm] sysctl handler.  On demand re-scan of
+ * all nodes' unevictable lists for evictable pages
+ */
+unsigned long scan_unevictable_pages;
+
+int scan_unevictable_handler(struct ctl_table *table, int write,
+			   struct file *file, void __user *buffer,
+			   size_t *length, loff_t *ppos)
+{
+	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+
+	if (write && *(unsigned long *)table->data)
+		scan_all_zones_unevictable_pages();
+
+	scan_unevictable_pages = 0;
+	return 0;
+}
+
+/*
+ * per node 'scan_unevictable_pages' attribute.  On demand re-scan of
+ * a specified node's per zone unevictable lists for evictable pages.
+ */
+
+static ssize_t read_scan_unevictable_node(struct sys_device *dev,
+					  struct sysdev_attribute *attr,
+					  char *buf)
+{
+	return sprintf(buf, "0\n");	/* always zero; should fit... */
+}
+
+static ssize_t write_scan_unevictable_node(struct sys_device *dev,
+					   struct sysdev_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
+	struct zone *zone;
+	unsigned long res;
+	unsigned long req = strict_strtoul(buf, 10, &res);
+
+	if (!req)
+		return 1;	/* zero is no-op */
+
+	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
+		if (!populated_zone(zone))
+			continue;
+		scan_zone_unevictable_pages(zone);
+	}
+	return 1;
+}
+
+
+static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
+			read_scan_unevictable_node,
+			write_scan_unevictable_node);
+
+int scan_unevictable_register_node(struct node *node)
+{
+	return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
+}
+
+void scan_unevictable_unregister_node(struct node *node)
+{
+	sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
+}
+
+#endif
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d7826af2fb0..9343227c5c6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -619,8 +619,14 @@ const struct seq_operations pagetypeinfo_op = {
 static const char * const vmstat_text[] = {
 	/* Zoned VM counters */
 	"nr_free_pages",
-	"nr_inactive",
-	"nr_active",
+	"nr_inactive_anon",
+	"nr_active_anon",
+	"nr_inactive_file",
+	"nr_active_file",
+#ifdef CONFIG_UNEVICTABLE_LRU
+	"nr_unevictable",
+	"nr_mlock",
+#endif
 	"nr_anon_pages",
 	"nr_mapped",
 	"nr_file_pages",
@@ -675,6 +681,16 @@ static const char * const vmstat_text[] = {
 	"htlb_buddy_alloc_success",
 	"htlb_buddy_alloc_fail",
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+	"unevictable_pgs_culled",
+	"unevictable_pgs_scanned",
+	"unevictable_pgs_rescued",
+	"unevictable_pgs_mlocked",
+	"unevictable_pgs_munlocked",
+	"unevictable_pgs_cleared",
+	"unevictable_pgs_stranded",
+	"unevictable_pgs_mlockfreed",
+#endif
 #endif
 };
 
@@ -688,7 +704,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n        min      %lu"
 		   "\n        low      %lu"
 		   "\n        high     %lu"
-		   "\n        scanned  %lu (a: %lu i: %lu)"
+		   "\n        scanned  %lu (aa: %lu ia: %lu af: %lu if: %lu)"
 		   "\n        spanned  %lu"
 		   "\n        present  %lu",
 		   zone_page_state(zone, NR_FREE_PAGES),
@@ -696,7 +712,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   zone->pages_low,
 		   zone->pages_high,
 		   zone->pages_scanned,
-		   zone->nr_scan_active, zone->nr_scan_inactive,
+		   zone->lru[LRU_ACTIVE_ANON].nr_scan,
+		   zone->lru[LRU_INACTIVE_ANON].nr_scan,
+		   zone->lru[LRU_ACTIVE_FILE].nr_scan,
+		   zone->lru[LRU_INACTIVE_FILE].nr_scan,
 		   zone->spanned_pages,
 		   zone->present_pages);
 
@@ -733,10 +752,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 	seq_printf(m,
 		   "\n  all_unreclaimable: %u"
 		   "\n  prev_priority:     %i"
-		   "\n  start_pfn:         %lu",
+		   "\n  start_pfn:         %lu"
+		   "\n  inactive_ratio:    %u",
 			   zone_is_all_unreclaimable(zone),
 		   zone->prev_priority,
-		   zone->zone_start_pfn);
+		   zone->zone_start_pfn,
+		   zone->inactive_ratio);
 	seq_putc(m, '\n');
 }
 
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 519219480db..1041b7bd12e 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -4,10 +4,9 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
 9pnet-objs := \
 	mod.o \
 	client.o \
-	conv.o \
 	error.o \
-	fcprint.o \
 	util.o \
+	protocol.o \
 	trans_fd.o \
 
 9pnet_virtio-objs := \
diff --git a/net/9p/client.c b/net/9p/client.c
index e053e06028a..bbac2f72b4d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -33,12 +33,9 @@
 #include <linux/uaccess.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
-#include <net/9p/transport.h>
 #include <net/9p/client.h>
-
-static struct p9_fid *p9_fid_create(struct p9_client *clnt);
-static void p9_fid_destroy(struct p9_fid *fid);
-static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu);
+#include <net/9p/transport.h>
+#include "protocol.h"
 
 /*
   * Client Option Parsing (code inspired by NFS code)
@@ -59,6 +56,9 @@ static const match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
+
 /**
  * v9fs_parse_options - parse mount options into session structure
  * @options: options string passed from mount
@@ -124,31 +124,585 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	return ret;
 }
 
+/**
+ * p9_tag_alloc - lookup/allocate a request by tag
+ * @c: client session to lookup tag within
+ * @tag: numeric id for transaction
+ *
+ * this is a simple array lookup, but will grow the
+ * request_slots as necessary to accomodate transaction
+ * ids which did not previously have a slot.
+ *
+ * this code relies on the client spinlock to manage locks, its
+ * possible we should switch to something else, but I'd rather
+ * stick with something low-overhead for the common case.
+ *
+ */
+
+static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
+{
+	unsigned long flags;
+	int row, col;
+	struct p9_req_t *req;
+
+	/* This looks up the original request by tag so we know which
+	 * buffer to read the data into */
+	tag++;
+
+	if (tag >= c->max_tag) {
+		spin_lock_irqsave(&c->lock, flags);
+		/* check again since original check was outside of lock */
+		while (tag >= c->max_tag) {
+			row = (tag / P9_ROW_MAXTAG);
+			c->reqs[row] = kcalloc(P9_ROW_MAXTAG,
+					sizeof(struct p9_req_t), GFP_ATOMIC);
+
+			if (!c->reqs[row]) {
+				printk(KERN_ERR "Couldn't grow tag array\n");
+				return ERR_PTR(-ENOMEM);
+			}
+			for (col = 0; col < P9_ROW_MAXTAG; col++) {
+				c->reqs[row][col].status = REQ_STATUS_IDLE;
+				c->reqs[row][col].tc = NULL;
+			}
+			c->max_tag += P9_ROW_MAXTAG;
+		}
+		spin_unlock_irqrestore(&c->lock, flags);
+	}
+	row = tag / P9_ROW_MAXTAG;
+	col = tag % P9_ROW_MAXTAG;
+
+	req = &c->reqs[row][col];
+	if (!req->tc) {
+		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
+		if (!req->wq) {
+			printk(KERN_ERR "Couldn't grow tag array\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		init_waitqueue_head(req->wq);
+		req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+								GFP_KERNEL);
+		req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+								GFP_KERNEL);
+		if ((!req->tc) || (!req->rc)) {
+			printk(KERN_ERR "Couldn't grow tag array\n");
+			kfree(req->tc);
+			kfree(req->rc);
+			return ERR_PTR(-ENOMEM);
+		}
+		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
+		req->tc->capacity = c->msize;
+		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
+		req->rc->capacity = c->msize;
+	}
+
+	p9pdu_reset(req->tc);
+	p9pdu_reset(req->rc);
+
+	req->flush_tag = 0;
+	req->tc->tag = tag-1;
+	req->status = REQ_STATUS_ALLOC;
+
+	return &c->reqs[row][col];
+}
+
+/**
+ * p9_tag_lookup - lookup a request by tag
+ * @c: client session to lookup tag within
+ * @tag: numeric id for transaction
+ *
+ */
+
+struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
+{
+	int row, col;
+
+	/* This looks up the original request by tag so we know which
+	 * buffer to read the data into */
+	tag++;
+
+	BUG_ON(tag >= c->max_tag);
+
+	row = tag / P9_ROW_MAXTAG;
+	col = tag % P9_ROW_MAXTAG;
+
+	return &c->reqs[row][col];
+}
+EXPORT_SYMBOL(p9_tag_lookup);
+
+/**
+ * p9_tag_init - setup tags structure and contents
+ * @tags: tags structure from the client struct
+ *
+ * This initializes the tags structure for each client instance.
+ *
+ */
+
+static int p9_tag_init(struct p9_client *c)
+{
+	int err = 0;
+
+	c->tagpool = p9_idpool_create();
+	if (IS_ERR(c->tagpool)) {
+		err = PTR_ERR(c->tagpool);
+		c->tagpool = NULL;
+		goto error;
+	}
+
+	p9_idpool_get(c->tagpool); /* reserve tag 0 */
+
+	c->max_tag = 0;
+error:
+	return err;
+}
 
 /**
- * p9_client_rpc - sends 9P request and waits until a response is available.
- *      The function can be interrupted.
- * @c: client data
- * @tc: request to be sent
- * @rc: pointer where a pointer to the response is stored
+ * p9_tag_cleanup - cleans up tags structure and reclaims resources
+ * @tags: tags structure from the client struct
+ *
+ * This frees resources associated with the tags structure
+ *
  */
+static void p9_tag_cleanup(struct p9_client *c)
+{
+	int row, col;
+
+	/* check to insure all requests are idle */
+	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
+		for (col = 0; col < P9_ROW_MAXTAG; col++) {
+			if (c->reqs[row][col].status != REQ_STATUS_IDLE) {
+				P9_DPRINTK(P9_DEBUG_MUX,
+				  "Attempting to cleanup non-free tag %d,%d\n",
+				  row, col);
+				/* TODO: delay execution of cleanup */
+				return;
+			}
+		}
+	}
+
+	if (c->tagpool)
+		p9_idpool_destroy(c->tagpool);
+
+	/* free requests associated with tags */
+	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
+		for (col = 0; col < P9_ROW_MAXTAG; col++) {
+			kfree(c->reqs[row][col].wq);
+			kfree(c->reqs[row][col].tc);
+			kfree(c->reqs[row][col].rc);
+		}
+		kfree(c->reqs[row]);
+	}
+	c->max_tag = 0;
+}
+
+/**
+ * p9_free_req - free a request and clean-up as necessary
+ * c: client state
+ * r: request to release
+ *
+ */
+
+static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
+{
+	int tag = r->tc->tag;
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
+
+	r->status = REQ_STATUS_IDLE;
+	if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
+		p9_idpool_put(tag, c->tagpool);
+
+	/* if this was a flush request we have to free response fcall */
+	if (r->rc->id == P9_RFLUSH) {
+		kfree(r->tc);
+		kfree(r->rc);
+	}
+}
+
+/**
+ * p9_client_cb - call back from transport to client
+ * c: client state
+ * req: request received
+ *
+ */
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
+{
+	struct p9_req_t *other_req;
+	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+
+	if (req->status == REQ_STATUS_ERROR)
+		wake_up(req->wq);
+
+	if (req->flush_tag) { 			/* flush receive path */
+		P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag);
+		spin_lock_irqsave(&c->lock, flags);
+		other_req = p9_tag_lookup(c, req->flush_tag);
+		if (other_req->status != REQ_STATUS_FLSH) /* stale flush */
+			spin_unlock_irqrestore(&c->lock, flags);
+		else {
+			other_req->status = REQ_STATUS_FLSHD;
+			spin_unlock_irqrestore(&c->lock, flags);
+			wake_up(other_req->wq);
+		}
+		p9_free_req(c, req);
+	} else { 				/* normal receive path */
+		P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag);
+		spin_lock_irqsave(&c->lock, flags);
+		if (req->status != REQ_STATUS_FLSHD)
+			req->status = REQ_STATUS_RCVD;
+		spin_unlock_irqrestore(&c->lock, flags);
+		wake_up(req->wq);
+		P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+	}
+}
+EXPORT_SYMBOL(p9_client_cb);
+
+/**
+ * p9_parse_header - parse header arguments out of a packet
+ * @pdu: packet to parse
+ * @size: size of packet
+ * @type: type of request
+ * @tag: tag of packet
+ * @rewind: set if we need to rewind offset afterwards
+ */
+
 int
-p9_client_rpc(struct p9_client *c, struct p9_fcall *tc,
-	struct p9_fcall **rc)
+p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag,
+								int rewind)
 {
-	return c->trans->rpc(c->trans, tc, rc);
+	int8_t r_type;
+	int16_t r_tag;
+	int32_t r_size;
+	int offset = pdu->offset;
+	int err;
+
+	pdu->offset = 0;
+	if (pdu->size == 0)
+		pdu->size = 7;
+
+	err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag);
+	if (err)
+		goto rewind_and_exit;
+
+	pdu->size = r_size;
+	pdu->id = r_type;
+	pdu->tag = r_tag;
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size,
+							pdu->id, pdu->tag);
+
+	if (type)
+		*type = r_type;
+	if (tag)
+		*tag = r_tag;
+	if (size)
+		*size = r_size;
+
+
+rewind_and_exit:
+	if (rewind)
+		pdu->offset = offset;
+	return err;
 }
+EXPORT_SYMBOL(p9_parse_header);
+
+/**
+ * p9_check_errors - check 9p packet for error return and process it
+ * @c: current client instance
+ * @req: request to parse and check for error conditions
+ *
+ * returns error code if one is discovered, otherwise returns 0
+ *
+ * this will have to be more complicated if we have multiple
+ * error packet types
+ */
+
+static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
+{
+	int8_t type;
+	int err;
+
+	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
+		return err;
+	}
+
+	if (type == P9_RERROR) {
+		int ecode;
+		char *ename;
+
+		err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode);
+		if (err) {
+			P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
+									err);
+			return err;
+		}
+
+		if (c->dotu)
+			err = -ecode;
+
+		if (!err) {
+			err = p9_errstr2errno(ename, strlen(ename));
+
+			/* string match failed */
+			if (!err)
+				err = -ESERVERFAULT;
+		}
+
+		P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+
+		kfree(ename);
+	} else
+		err = 0;
+
+	return err;
+}
+
+/**
+ * p9_client_flush - flush (cancel) a request
+ * c: client state
+ * req: request to cancel
+ *
+ * This sents a flush for a particular requests and links
+ * the flush request to the original request.  The current
+ * code only supports a single flush request although the protocol
+ * allows for multiple flush requests to be sent for a single request.
+ *
+ */
+
+static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
+{
+	struct p9_req_t *req;
+	int16_t oldtag;
+	int err;
+
+	err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
+	if (err)
+		return err;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag);
+
+	req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->flush_tag = oldtag;
+
+	/* we don't free anything here because RPC isn't complete */
+	return 0;
+}
+
+/**
+ * p9_client_rpc - issue a request and wait for a response
+ * @c: client session
+ * @type: type of request
+ * @fmt: protocol format string (see protocol.c)
+ *
+ * Returns request structure (which client must free using p9_free_req)
+ */
+
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+{
+	va_list ap;
+	int tag, err;
+	struct p9_req_t *req;
+	unsigned long flags;
+	int sigpending;
+	int flushed = 0;
+
+	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
+
+	if (c->status != Connected)
+		return ERR_PTR(-EIO);
+
+	if (signal_pending(current)) {
+		sigpending = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+	} else
+		sigpending = 0;
+
+	tag = P9_NOTAG;
+	if (type != P9_TVERSION) {
+		tag = p9_idpool_get(c->tagpool);
+		if (tag < 0)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	req = p9_tag_alloc(c, tag);
+	if (IS_ERR(req))
+		return req;
+
+	/* marshall the data */
+	p9pdu_prepare(req->tc, tag, type);
+	va_start(ap, fmt);
+	err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap);
+	va_end(ap);
+	p9pdu_finalize(req->tc);
+
+	err = c->trans_mod->request(c, req);
+	if (err < 0) {
+		c->status = Disconnected;
+		goto reterr;
+	}
+
+	/* if it was a flush we just transmitted, return our tag */
+	if (type == P9_TFLUSH)
+		return req;
+again:
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
+	err = wait_event_interruptible(*req->wq,
+						req->status >= REQ_STATUS_RCVD);
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n",
+						req->wq, tag, err, flushed);
+
+	if (req->status == REQ_STATUS_ERROR) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+		err = req->t_err;
+	} else if (err == -ERESTARTSYS && flushed) {
+		P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n");
+		goto again;
+	} else if (req->status == REQ_STATUS_FLSHD) {
+		P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n");
+		err = -ERESTARTSYS;
+	}
+
+	if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) {
+		P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
+		spin_lock_irqsave(&c->lock, flags);
+		if (req->status == REQ_STATUS_SENT)
+			req->status = REQ_STATUS_FLSH;
+		spin_unlock_irqrestore(&c->lock, flags);
+		sigpending = 1;
+		flushed = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+
+		if (c->trans_mod->cancel(c, req)) {
+			err = p9_client_flush(c, req);
+			if (err == 0)
+				goto again;
+		}
+	}
+
+	if (sigpending) {
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		recalc_sigpending();
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+
+	if (err < 0)
+		goto reterr;
+
+	err = p9_check_errors(c, req);
+	if (!err) {
+		P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type);
+		return req;
+	}
+
+reterr:
+	P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type,
+									err);
+	p9_free_req(c, req);
+	return ERR_PTR(err);
+}
+
+static struct p9_fid *p9_fid_create(struct p9_client *clnt)
+{
+	int err;
+	struct p9_fid *fid;
+
+	P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt);
+	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
+	if (!fid)
+		return ERR_PTR(-ENOMEM);
+
+	fid->fid = p9_idpool_get(clnt->fidpool);
+	if (fid->fid < 0) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	memset(&fid->qid, 0, sizeof(struct p9_qid));
+	fid->mode = -1;
+	fid->rdir_fpos = 0;
+	fid->uid = current->fsuid;
+	fid->clnt = clnt;
+	fid->aux = NULL;
+
+	spin_lock(&clnt->lock);
+	list_add(&fid->flist, &clnt->fidlist);
+	spin_unlock(&clnt->lock);
+
+	return fid;
+
+error:
+	kfree(fid);
+	return ERR_PTR(err);
+}
+
+static void p9_fid_destroy(struct p9_fid *fid)
+{
+	struct p9_client *clnt;
+
+	P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid);
+	clnt = fid->clnt;
+	p9_idpool_put(fid->fid, clnt->fidpool);
+	spin_lock(&clnt->lock);
+	list_del(&fid->flist);
+	spin_unlock(&clnt->lock);
+	kfree(fid);
+}
+
+int p9_client_version(struct p9_client *c)
+{
+	int err = 0;
+	struct p9_req_t *req;
+	char *version;
+	int msize;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n",
+							c->msize, c->dotu);
+	req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize,
+				c->dotu ? "9P2000.u" : "9P2000");
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
+	if (!memcmp(version, "9P2000.u", 8))
+		c->dotu = 1;
+	else if (!memcmp(version, "9P2000", 6))
+		c->dotu = 0;
+	else {
+		err = -EREMOTEIO;
+		goto error;
+	}
+
+	if (msize < c->msize)
+		c->msize = msize;
+
+error:
+	kfree(version);
+	p9_free_req(c, req);
+
+	return err;
+}
+EXPORT_SYMBOL(p9_client_version);
 
 struct p9_client *p9_client_create(const char *dev_name, char *options)
 {
-	int err, n;
+	int err;
 	struct p9_client *clnt;
-	struct p9_fcall *tc, *rc;
-	struct p9_str *version;
 
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
 	if (!clnt)
 		return ERR_PTR(-ENOMEM);
@@ -164,6 +718,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto error;
 	}
 
+	p9_tag_init(clnt);
+
 	err = parse_opts(options, clnt);
 	if (err < 0)
 		goto error;
@@ -175,53 +731,23 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n",
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
 		clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
 
-
-	clnt->trans = clnt->trans_mod->create(dev_name, options, clnt->msize,
-								clnt->dotu);
-	if (IS_ERR(clnt->trans)) {
-		err = PTR_ERR(clnt->trans);
-		clnt->trans = NULL;
+	err = clnt->trans_mod->create(clnt, dev_name, options);
+	if (err)
 		goto error;
-	}
 
 	if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize)
 		clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ;
 
-	tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000");
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto error;
-	}
-
-	err = p9_client_rpc(clnt, tc, &rc);
+	err = p9_client_version(clnt);
 	if (err)
 		goto error;
 
-	version = &rc->params.rversion.version;
-	if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8))
-		clnt->dotu = 1;
-	else if (version->len == 6 && !memcmp(version->str, "9P2000", 6))
-		clnt->dotu = 0;
-	else {
-		err = -EREMOTEIO;
-		goto error;
-	}
-
-	n = rc->params.rversion.msize;
-	if (n < clnt->msize)
-		clnt->msize = n;
-
-	kfree(tc);
-	kfree(rc);
 	return clnt;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	p9_client_destroy(clnt);
 	return ERR_PTR(err);
 }
@@ -231,13 +757,10 @@ void p9_client_destroy(struct p9_client *clnt)
 {
 	struct p9_fid *fid, *fidptr;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt);
 
-	if (clnt->trans) {
-		clnt->trans->close(clnt->trans);
-		kfree(clnt->trans);
-		clnt->trans = NULL;
-	}
+	if (clnt->trans_mod)
+		clnt->trans_mod->close(clnt);
 
 	v9fs_put_trans(clnt->trans_mod);
 
@@ -247,6 +770,8 @@ void p9_client_destroy(struct p9_client *clnt)
 	if (clnt->fidpool)
 		p9_idpool_destroy(clnt->fidpool);
 
+	p9_tag_cleanup(clnt);
+
 	kfree(clnt);
 }
 EXPORT_SYMBOL(p9_client_destroy);
@@ -254,7 +779,7 @@ EXPORT_SYMBOL(p9_client_destroy);
 void p9_client_disconnect(struct p9_client *clnt)
 {
 	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
-	clnt->trans->status = Disconnected;
+	clnt->status = Disconnected;
 }
 EXPORT_SYMBOL(p9_client_disconnect);
 
@@ -262,14 +787,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 	char *uname, u32 n_uname, char *aname)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
+	struct p9_req_t *req;
 	struct p9_fid *fid;
+	struct p9_qid qid;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p afid %d uname %s aname %s\n",
-		clnt, afid?afid->fid:-1, uname, aname);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
+					afid ? afid->fid : -1, uname, aname);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 
 	fid = p9_fid_create(clnt);
 	if (IS_ERR(fid)) {
@@ -278,73 +802,77 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 		goto error;
 	}
 
-	tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname,
-		n_uname, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid,
+			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
+	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
 		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
+					qid.type, qid.path, qid.version);
 
-	memmove(&fid->qid, &rc->params.rattach.qid, sizeof(struct p9_qid));
-	kfree(tc);
-	kfree(rc);
+	memmove(&fid->qid, &qid, sizeof(struct p9_qid));
+
+	p9_free_req(clnt, req);
 	return fid;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	if (fid)
 		p9_fid_destroy(fid);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_attach);
 
-struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname,
-	u32 n_uname, char *aname)
+struct p9_fid *
+p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
-	struct p9_fid *fid;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	struct p9_fid *afid;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p uname %s aname %s\n", clnt, uname,
-									aname);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 
-	fid = p9_fid_create(clnt);
-	if (IS_ERR(fid)) {
-		err = PTR_ERR(fid);
-		fid = NULL;
+	afid = p9_fid_create(clnt);
+	if (IS_ERR(afid)) {
+		err = PTR_ERR(afid);
+		afid = NULL;
 		goto error;
 	}
 
-	tc = p9_create_tauth(fid->fid, uname, aname, n_uname, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TAUTH, "dss?d",
+			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
+	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
 		goto error;
+	}
 
-	memmove(&fid->qid, &rc->params.rauth.qid, sizeof(struct p9_qid));
-	kfree(tc);
-	kfree(rc);
-	return fid;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
+					qid.type, qid.path, qid.version);
+
+	memmove(&afid->qid, &qid, sizeof(struct p9_qid));
+	p9_free_req(clnt, req);
+	return afid;
 
 error:
-	kfree(tc);
-	kfree(rc);
-	if (fid)
-		p9_fid_destroy(fid);
+	if (afid)
+		p9_fid_destroy(afid);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_auth);
@@ -353,15 +881,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	int clone)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
 	struct p9_fid *fid;
+	struct p9_qid *wqids;
+	struct p9_req_t *req;
+	int16_t nwqids, count;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d nwname %d wname[0] %s\n",
-		oldfid->fid, nwname, wnames?wnames[0]:NULL);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = oldfid->clnt;
 	if (clone) {
 		fid = p9_fid_create(clnt);
@@ -375,53 +901,49 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	} else
 		fid = oldfid;
 
-	tc = p9_create_twalk(oldfid->fid, fid->fid, nwname, wnames);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n",
+		oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
+
+	req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
+								nwname, wnames);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
+	err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids);
 	if (err) {
-		if (rc && rc->id == P9_RWALK)
-			goto clunk_fid;
-		else
-			goto error;
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
+		goto clunk_fid;
 	}
+	p9_free_req(clnt, req);
 
-	if (rc->params.rwalk.nwqid != nwname) {
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
+
+	if (nwqids != nwname) {
 		err = -ENOENT;
 		goto clunk_fid;
 	}
 
+	for (count = 0; count < nwqids; count++)
+		P9_DPRINTK(P9_DEBUG_9P, "<<<     [%d] %x.%llx.%x\n",
+			count, wqids[count].type, wqids[count].path,
+			wqids[count].version);
+
 	if (nwname)
-		memmove(&fid->qid,
-			&rc->params.rwalk.wqids[rc->params.rwalk.nwqid - 1],
-			sizeof(struct p9_qid));
+		memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
 	else
 		fid->qid = oldfid->qid;
 
-	kfree(tc);
-	kfree(rc);
 	return fid;
 
 clunk_fid:
-	kfree(tc);
-	kfree(rc);
-	rc = NULL;
-	tc = p9_create_tclunk(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto error;
-	}
-
-	p9_client_rpc(clnt, tc, &rc);
+	p9_client_clunk(fid);
+	fid = NULL;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	if (fid && (fid != oldfid))
 		p9_fid_destroy(fid);
 
@@ -432,35 +954,39 @@ EXPORT_SYMBOL(p9_client_walk);
 int p9_client_open(struct p9_fid *fid, int mode)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d mode %d\n", fid->fid, mode);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
 	if (fid->mode != -1)
 		return -EINVAL;
 
-	tc = p9_create_topen(fid->fid, mode);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
+				qid.type, qid.path, qid.version, iounit);
 
 	fid->mode = mode;
-	fid->iounit = rc->params.ropen.iounit;
+	fid->iounit = iounit;
 
-done:
-	kfree(tc);
-	kfree(rc);
+free_and_error:
+	p9_free_req(clnt, req);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_open);
@@ -469,37 +995,41 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 		     char *extension)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d name %s perm %d mode %d\n", fid->fid,
-		name, perm, mode);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
+						fid->fid, name, perm, mode);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
 	if (fid->mode != -1)
 		return -EINVAL;
 
-	tc = p9_create_tcreate(fid->fid, name, perm, mode, extension,
-							       clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm,
+				mode, extension);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
+				qid.type, qid.path, qid.version, iounit);
 
 	fid->mode = mode;
-	fid->iounit = rc->params.ropen.iounit;
+	fid->iounit = iounit;
 
-done:
-	kfree(tc);
-	kfree(rc);
+free_and_error:
+	p9_free_req(clnt, req);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_fcreate);
@@ -507,31 +1037,25 @@ EXPORT_SYMBOL(p9_client_fcreate);
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tclunk(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
 
+	p9_free_req(clnt, req);
 	p9_fid_destroy(fid);
 
-done:
-	kfree(tc);
-	kfree(rc);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_clunk);
@@ -539,157 +1063,41 @@ EXPORT_SYMBOL(p9_client_clunk);
 int p9_client_remove(struct p9_fid *fid)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tremove(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
 
+	p9_free_req(clnt, req);
 	p9_fid_destroy(fid);
 
-done:
-	kfree(tc);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(p9_client_remove);
-
-int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count)
-{
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	clnt = fid->clnt;
-	total = 0;
-
-	rsize = fid->iounit;
-	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
-		rsize = clnt->msize - P9_IOHDRSZ;
-
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		tc = p9_create_tread(fid->fid, offset, rsize);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
-
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
-
-		n = rc->params.rread.count;
-		if (n > count)
-			n = count;
-
-		memmove(data, rc->params.rread.data, n);
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0 && n == rsize);
-
-	return total;
-
 error:
-	kfree(tc);
-	kfree(rc);
 	return err;
 }
-EXPORT_SYMBOL(p9_client_read);
-
-int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count)
-{
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	clnt = fid->clnt;
-	total = 0;
-
-	rsize = fid->iounit;
-	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
-		rsize = clnt->msize - P9_IOHDRSZ;
-
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		tc = p9_create_twrite(fid->fid, offset, rsize, data);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
-
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
-
-		n = rc->params.rread.count;
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0);
-
-	return total;
-
-error:
-	kfree(tc);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(p9_client_write);
+EXPORT_SYMBOL(p9_client_remove);
 
 int
-p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count)
+p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
+								u32 count)
 {
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
+	int err, rsize, total;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	char *dataptr;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid,
 					(long long unsigned) offset, count);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 	total = 0;
 
@@ -697,63 +1105,57 @@ p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count)
 	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
 		rsize = clnt->msize - P9_IOHDRSZ;
 
-	do {
-		if (count < rsize)
-			rsize = count;
+	if (count < rsize)
+		rsize = count;
 
-		tc = p9_create_tread(fid->fid, offset, rsize);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
+	req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
 
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
+	err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 
-		n = rc->params.rread.count;
-		if (n > count)
-			n = count;
+	if (data) {
+		memmove(data, dataptr, count);
+		data += count;
+	}
 
-		err = copy_to_user(data, rc->params.rread.data, n);
+	if (udata) {
+		err = copy_to_user(udata, dataptr, count);
 		if (err) {
 			err = -EFAULT;
-			goto error;
+			goto free_and_error;
 		}
+	}
 
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0 && n == rsize);
-
-	return total;
+	p9_free_req(clnt, req);
+	return count;
 
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	kfree(tc);
-	kfree(rc);
 	return err;
 }
-EXPORT_SYMBOL(p9_client_uread);
+EXPORT_SYMBOL(p9_client_read);
 
 int
-p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
-								   u32 count)
+p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
+							u64 offset, u32 count)
 {
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
+	int err, rsize, total;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n",
+				fid->fid, (long long unsigned) offset, count);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 	total = 0;
 
@@ -761,325 +1163,114 @@ p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
 	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
 		rsize = clnt->msize - P9_IOHDRSZ;
 
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		tc = p9_create_twrite_u(fid->fid, offset, rsize, data);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
+	if (count < rsize)
+		rsize = count;
+	if (data)
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset,
+								rsize, data);
+	else
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset,
+								rsize, udata);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
 
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
+	err = p9pdu_readf(req->rc, clnt->dotu, "d", &count);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
 
-		n = rc->params.rread.count;
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0);
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
 
-	return total;
+	p9_free_req(clnt, req);
+	return count;
 
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	kfree(tc);
-	kfree(rc);
 	return err;
 }
-EXPORT_SYMBOL(p9_client_uwrite);
-
-int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count)
-{
-	int n, total;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	n = 0;
-	total = 0;
-	while (count) {
-		n = p9_client_read(fid, data, offset, count);
-		if (n <= 0)
-			break;
-
-		data += n;
-		offset += n;
-		count -= n;
-		total += n;
-	}
-
-	if (n < 0)
-		total = n;
-
-	return total;
-}
-EXPORT_SYMBOL(p9_client_readn);
+EXPORT_SYMBOL(p9_client_write);
 
-struct p9_stat *p9_client_stat(struct p9_fid *fid)
+struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
-	struct p9_stat *ret;
+	struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL);
+	struct p9_req_t *req;
+	u16 ignored;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);
+
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
-	ret = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tstat(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto error;
-
-	ret = p9_clone_stat(&rc->params.rstat.stat, clnt->dotu);
-	if (IS_ERR(ret)) {
-		err = PTR_ERR(ret);
-		ret = NULL;
-		goto error;
+	err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret);
+	if (err) {
+		ret = ERR_PTR(err);
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
 	}
 
-	kfree(tc);
-	kfree(rc);
-	return ret;
-
+	P9_DPRINTK(P9_DEBUG_9P,
+		"<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+		"<<<    mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+		"<<<    name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+		"<<<    uid=%d gid=%d n_muid=%d\n",
+		ret->size, ret->type, ret->dev, ret->qid.type,
+		ret->qid.path, ret->qid.version, ret->mode,
+		ret->atime, ret->mtime, ret->length, ret->name,
+		ret->uid, ret->gid, ret->muid, ret->extension,
+		ret->n_uid, ret->n_gid, ret->n_muid);
+
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	kfree(tc);
-	kfree(rc);
-	kfree(ret);
-	return ERR_PTR(err);
+	return ret;
 }
 EXPORT_SYMBOL(p9_client_stat);
 
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
+	struct p9_req_t *req;
 	struct p9_client *clnt;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P,
+		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+		"     mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+		"     name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+		"     uid=%d gid=%d n_muid=%d\n",
+		wst->size, wst->type, wst->dev, wst->qid.type,
+		wst->qid.path, wst->qid.version, wst->mode,
+		wst->atime, wst->mtime, wst->length, wst->name,
+		wst->uid, wst->gid, wst->muid, wst->extension,
+		wst->n_uid, wst->n_gid, wst->n_muid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_twstat(fid->fid, wst, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
-	}
-
-	err = p9_client_rpc(clnt, tc, &rc);
-
-done:
-	kfree(tc);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(p9_client_wstat);
-
-struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset)
-{
-	int err, n, m;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-	struct p9_stat st, *ret;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu\n", fid->fid,
-						(long long unsigned) offset);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	ret = NULL;
-	clnt = fid->clnt;
-
-	/* if the offset is below or above the current response, free it */
-	if (offset < fid->rdir_fpos || (fid->rdir_fcall &&
-		offset >= fid->rdir_fpos+fid->rdir_fcall->params.rread.count)) {
-		fid->rdir_pos = 0;
-		if (fid->rdir_fcall)
-			fid->rdir_fpos += fid->rdir_fcall->params.rread.count;
-
-		kfree(fid->rdir_fcall);
-		fid->rdir_fcall = NULL;
-		if (offset < fid->rdir_fpos)
-			fid->rdir_fpos = 0;
-	}
-
-	if (!fid->rdir_fcall) {
-		n = fid->iounit;
-		if (!n || n > clnt->msize-P9_IOHDRSZ)
-			n = clnt->msize - P9_IOHDRSZ;
-
-		while (1) {
-			if (fid->rdir_fcall) {
-				fid->rdir_fpos +=
-					fid->rdir_fcall->params.rread.count;
-				kfree(fid->rdir_fcall);
-				fid->rdir_fcall = NULL;
-			}
-
-			tc = p9_create_tread(fid->fid, fid->rdir_fpos, n);
-			if (IS_ERR(tc)) {
-				err = PTR_ERR(tc);
-				tc = NULL;
-				goto error;
-			}
-
-			err = p9_client_rpc(clnt, tc, &rc);
-			if (err)
-				goto error;
-
-			n = rc->params.rread.count;
-			if (n == 0)
-				goto done;
-
-			fid->rdir_fcall = rc;
-			rc = NULL;
-			if (offset >= fid->rdir_fpos &&
-						offset < fid->rdir_fpos+n)
-				break;
-		}
-
-		fid->rdir_pos = 0;
-	}
-
-	m = offset - fid->rdir_fpos;
-	if (m < 0)
-		goto done;
-
-	n = p9_deserialize_stat(fid->rdir_fcall->params.rread.data + m,
-		fid->rdir_fcall->params.rread.count - m, &st, clnt->dotu);
-
-	if (!n) {
-		err = -EIO;
-		goto error;
-	}
-
-	fid->rdir_pos += n;
-	st.size = n;
-	ret = p9_clone_stat(&st, clnt->dotu);
-	if (IS_ERR(ret)) {
-		err = PTR_ERR(ret);
-		ret = NULL;
-		goto error;
-	}
-
-done:
-	kfree(tc);
-	kfree(rc);
-	return ret;
-
-error:
-	kfree(tc);
-	kfree(rc);
-	kfree(ret);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL(p9_client_dirread);
-
-static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu)
-{
-	int n;
-	char *p;
-	struct p9_stat *ret;
-
-	n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len +
-		st->muid.len;
-
-	if (dotu)
-		n += st->extension.len;
-
-	ret = kmalloc(n, GFP_KERNEL);
-	if (!ret)
-		return ERR_PTR(-ENOMEM);
-
-	memmove(ret, st, sizeof(struct p9_stat));
-	p = ((char *) ret) + sizeof(struct p9_stat);
-	memmove(p, st->name.str, st->name.len);
-	ret->name.str = p;
-	p += st->name.len;
-	memmove(p, st->uid.str, st->uid.len);
-	ret->uid.str = p;
-	p += st->uid.len;
-	memmove(p, st->gid.str, st->gid.len);
-	ret->gid.str = p;
-	p += st->gid.len;
-	memmove(p, st->muid.str, st->muid.len);
-	ret->muid.str = p;
-	p += st->muid.len;
-
-	if (dotu) {
-		memmove(p, st->extension.str, st->extension.len);
-		ret->extension.str = p;
-		p += st->extension.len;
-	}
-
-	return ret;
-}
-
-static struct p9_fid *p9_fid_create(struct p9_client *clnt)
-{
-	int err;
-	struct p9_fid *fid;
-
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
-	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
-	if (!fid)
-		return ERR_PTR(-ENOMEM);
-
-	fid->fid = p9_idpool_get(clnt->fidpool);
-	if (fid->fid < 0) {
-		err = -ENOSPC;
+	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	memset(&fid->qid, 0, sizeof(struct p9_qid));
-	fid->mode = -1;
-	fid->rdir_fpos = 0;
-	fid->rdir_pos = 0;
-	fid->rdir_fcall = NULL;
-	fid->uid = current->fsuid;
-	fid->clnt = clnt;
-	fid->aux = NULL;
-
-	spin_lock(&clnt->lock);
-	list_add(&fid->flist, &clnt->fidlist);
-	spin_unlock(&clnt->lock);
-
-	return fid;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
 
+	p9_free_req(clnt, req);
 error:
-	kfree(fid);
-	return ERR_PTR(err);
-}
-
-static void p9_fid_destroy(struct p9_fid *fid)
-{
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
-	clnt = fid->clnt;
-	p9_idpool_put(fid->fid, clnt->fidpool);
-	spin_lock(&clnt->lock);
-	list_del(&fid->flist);
-	spin_unlock(&clnt->lock);
-	kfree(fid->rdir_fcall);
-	kfree(fid);
+	return err;
 }
+EXPORT_SYMBOL(p9_client_wstat);
diff --git a/net/9p/conv.c b/net/9p/conv.c
deleted file mode 100644
index 5ad3a3bd73b..00000000000
--- a/net/9p/conv.c
+++ /dev/null
@@ -1,1054 +0,0 @@
-/*
- * net/9p/conv.c
- *
- * 9P protocol conversion functions
- *
- *  Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/idr.h>
-#include <linux/uaccess.h>
-#include <net/9p/9p.h>
-
-/*
- * Buffer to help with string parsing
- */
-struct cbuf {
-	unsigned char *sp;
-	unsigned char *p;
-	unsigned char *ep;
-};
-
-static inline void buf_init(struct cbuf *buf, void *data, int datalen)
-{
-	buf->sp = buf->p = data;
-	buf->ep = data + datalen;
-}
-
-static inline int buf_check_overflow(struct cbuf *buf)
-{
-	return buf->p > buf->ep;
-}
-
-static int buf_check_size(struct cbuf *buf, int len)
-{
-	if (buf->p + len > buf->ep) {
-		if (buf->p < buf->ep) {
-			P9_EPRINTK(KERN_ERR,
-				"buffer overflow: want %d has %d\n", len,
-				(int)(buf->ep - buf->p));
-			dump_stack();
-			buf->p = buf->ep + 1;
-		}
-
-		return 0;
-	}
-
-	return 1;
-}
-
-static void *buf_alloc(struct cbuf *buf, int len)
-{
-	void *ret = NULL;
-
-	if (buf_check_size(buf, len)) {
-		ret = buf->p;
-		buf->p += len;
-	}
-
-	return ret;
-}
-
-static void buf_put_int8(struct cbuf *buf, u8 val)
-{
-	if (buf_check_size(buf, 1)) {
-		buf->p[0] = val;
-		buf->p++;
-	}
-}
-
-static void buf_put_int16(struct cbuf *buf, u16 val)
-{
-	if (buf_check_size(buf, 2)) {
-		*(__le16 *) buf->p = cpu_to_le16(val);
-		buf->p += 2;
-	}
-}
-
-static void buf_put_int32(struct cbuf *buf, u32 val)
-{
-	if (buf_check_size(buf, 4)) {
-		*(__le32 *)buf->p = cpu_to_le32(val);
-		buf->p += 4;
-	}
-}
-
-static void buf_put_int64(struct cbuf *buf, u64 val)
-{
-	if (buf_check_size(buf, 8)) {
-		*(__le64 *)buf->p = cpu_to_le64(val);
-		buf->p += 8;
-	}
-}
-
-static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
-{
-	char *ret;
-
-	ret = NULL;
-	if (buf_check_size(buf, slen + 2)) {
-		buf_put_int16(buf, slen);
-		ret = buf->p;
-		memcpy(buf->p, s, slen);
-		buf->p += slen;
-	}
-
-	return ret;
-}
-
-static u8 buf_get_int8(struct cbuf *buf)
-{
-	u8 ret = 0;
-
-	if (buf_check_size(buf, 1)) {
-		ret = buf->p[0];
-		buf->p++;
-	}
-
-	return ret;
-}
-
-static u16 buf_get_int16(struct cbuf *buf)
-{
-	u16 ret = 0;
-
-	if (buf_check_size(buf, 2)) {
-		ret = le16_to_cpu(*(__le16 *)buf->p);
-		buf->p += 2;
-	}
-
-	return ret;
-}
-
-static u32 buf_get_int32(struct cbuf *buf)
-{
-	u32 ret = 0;
-
-	if (buf_check_size(buf, 4)) {
-		ret = le32_to_cpu(*(__le32 *)buf->p);
-		buf->p += 4;
-	}
-
-	return ret;
-}
-
-static u64 buf_get_int64(struct cbuf *buf)
-{
-	u64 ret = 0;
-
-	if (buf_check_size(buf, 8)) {
-		ret = le64_to_cpu(*(__le64 *)buf->p);
-		buf->p += 8;
-	}
-
-	return ret;
-}
-
-static void buf_get_str(struct cbuf *buf, struct p9_str *vstr)
-{
-	vstr->len = buf_get_int16(buf);
-	if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
-		vstr->str = buf->p;
-		buf->p += vstr->len;
-	} else {
-		vstr->len = 0;
-		vstr->str = NULL;
-	}
-}
-
-static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid)
-{
-	qid->type = buf_get_int8(bufp);
-	qid->version = buf_get_int32(bufp);
-	qid->path = buf_get_int64(bufp);
-}
-
-/**
- * p9_size_wstat - calculate the size of a variable length stat struct
- * @wstat: metadata (stat) structure
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-static int p9_size_wstat(struct p9_wstat *wstat, int dotu)
-{
-	int size = 0;
-
-	if (wstat == NULL) {
-		P9_EPRINTK(KERN_ERR, "p9_size_stat: got a NULL stat pointer\n");
-		return 0;
-	}
-
-	size =			/* 2 + *//* size[2] */
-	    2 +			/* type[2] */
-	    4 +			/* dev[4] */
-	    1 +			/* qid.type[1] */
-	    4 +			/* qid.vers[4] */
-	    8 +			/* qid.path[8] */
-	    4 +			/* mode[4] */
-	    4 +			/* atime[4] */
-	    4 +			/* mtime[4] */
-	    8 +			/* length[8] */
-	    8;			/* minimum sum of string lengths */
-
-	if (wstat->name)
-		size += strlen(wstat->name);
-	if (wstat->uid)
-		size += strlen(wstat->uid);
-	if (wstat->gid)
-		size += strlen(wstat->gid);
-	if (wstat->muid)
-		size += strlen(wstat->muid);
-
-	if (dotu) {
-		size += 4 +	/* n_uid[4] */
-		    4 +		/* n_gid[4] */
-		    4 +		/* n_muid[4] */
-		    2;		/* string length of extension[4] */
-		if (wstat->extension)
-			size += strlen(wstat->extension);
-	}
-
-	return size;
-}
-
-/**
- * buf_get_stat - safely decode a recieved metadata (stat) structure
- * @bufp: buffer to deserialize
- * @stat: metadata (stat) structure
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-static void
-buf_get_stat(struct cbuf *bufp, struct p9_stat *stat, int dotu)
-{
-	stat->size = buf_get_int16(bufp);
-	stat->type = buf_get_int16(bufp);
-	stat->dev = buf_get_int32(bufp);
-	stat->qid.type = buf_get_int8(bufp);
-	stat->qid.version = buf_get_int32(bufp);
-	stat->qid.path = buf_get_int64(bufp);
-	stat->mode = buf_get_int32(bufp);
-	stat->atime = buf_get_int32(bufp);
-	stat->mtime = buf_get_int32(bufp);
-	stat->length = buf_get_int64(bufp);
-	buf_get_str(bufp, &stat->name);
-	buf_get_str(bufp, &stat->uid);
-	buf_get_str(bufp, &stat->gid);
-	buf_get_str(bufp, &stat->muid);
-
-	if (dotu) {
-		buf_get_str(bufp, &stat->extension);
-		stat->n_uid = buf_get_int32(bufp);
-		stat->n_gid = buf_get_int32(bufp);
-		stat->n_muid = buf_get_int32(bufp);
-	}
-}
-
-/**
- * p9_deserialize_stat - decode a received metadata structure
- * @buf: buffer to deserialize
- * @buflen: length of received buffer
- * @stat: metadata structure to decode into
- * @dotu: non-zero if 9P2000.u
- *
- * Note: stat will point to the buf region.
- */
-
-int
-p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat,
-		int dotu)
-{
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-	unsigned char *p;
-
-	buf_init(bufp, buf, buflen);
-	p = bufp->p;
-	buf_get_stat(bufp, stat, dotu);
-
-	if (buf_check_overflow(bufp))
-		return 0;
-	else
-		return bufp->p - p;
-}
-EXPORT_SYMBOL(p9_deserialize_stat);
-
-/**
- * deserialize_fcall - unmarshal a response
- * @buf: recieved buffer
- * @buflen: length of received buffer
- * @rcall: fcall structure to populate
- * @rcalllen: length of fcall structure to populate
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-int
-p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *rcall,
-		       int dotu)
-{
-
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-	int i = 0;
-
-	buf_init(bufp, buf, buflen);
-
-	rcall->size = buf_get_int32(bufp);
-	rcall->id = buf_get_int8(bufp);
-	rcall->tag = buf_get_int16(bufp);
-
-	P9_DPRINTK(P9_DEBUG_CONV, "size %d id %d tag %d\n", rcall->size,
-							rcall->id, rcall->tag);
-
-	switch (rcall->id) {
-	default:
-		P9_EPRINTK(KERN_ERR, "unknown message type: %d\n", rcall->id);
-		return -EPROTO;
-	case P9_RVERSION:
-		rcall->params.rversion.msize = buf_get_int32(bufp);
-		buf_get_str(bufp, &rcall->params.rversion.version);
-		break;
-	case P9_RFLUSH:
-		break;
-	case P9_RATTACH:
-		rcall->params.rattach.qid.type = buf_get_int8(bufp);
-		rcall->params.rattach.qid.version = buf_get_int32(bufp);
-		rcall->params.rattach.qid.path = buf_get_int64(bufp);
-		break;
-	case P9_RWALK:
-		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-		if (rcall->params.rwalk.nwqid > P9_MAXWELEM) {
-			P9_EPRINTK(KERN_ERR,
-					"Rwalk with more than %d qids: %d\n",
-					P9_MAXWELEM, rcall->params.rwalk.nwqid);
-			return -EPROTO;
-		}
-
-		for (i = 0; i < rcall->params.rwalk.nwqid; i++)
-			buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
-		break;
-	case P9_ROPEN:
-		buf_get_qid(bufp, &rcall->params.ropen.qid);
-		rcall->params.ropen.iounit = buf_get_int32(bufp);
-		break;
-	case P9_RCREATE:
-		buf_get_qid(bufp, &rcall->params.rcreate.qid);
-		rcall->params.rcreate.iounit = buf_get_int32(bufp);
-		break;
-	case P9_RREAD:
-		rcall->params.rread.count = buf_get_int32(bufp);
-		rcall->params.rread.data = bufp->p;
-		buf_check_size(bufp, rcall->params.rread.count);
-		break;
-	case P9_RWRITE:
-		rcall->params.rwrite.count = buf_get_int32(bufp);
-		break;
-	case P9_RCLUNK:
-		break;
-	case P9_RREMOVE:
-		break;
-	case P9_RSTAT:
-		buf_get_int16(bufp);
-		buf_get_stat(bufp, &rcall->params.rstat.stat, dotu);
-		break;
-	case P9_RWSTAT:
-		break;
-	case P9_RERROR:
-		buf_get_str(bufp, &rcall->params.rerror.error);
-		if (dotu)
-			rcall->params.rerror.errno = buf_get_int16(bufp);
-		break;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "buffer overflow\n");
-		return -EIO;
-	}
-
-	return bufp->p - bufp->sp;
-}
-EXPORT_SYMBOL(p9_deserialize_fcall);
-
-static inline void p9_put_int8(struct cbuf *bufp, u8 val, u8 * p)
-{
-	*p = val;
-	buf_put_int8(bufp, val);
-}
-
-static inline void p9_put_int16(struct cbuf *bufp, u16 val, u16 * p)
-{
-	*p = val;
-	buf_put_int16(bufp, val);
-}
-
-static inline void p9_put_int32(struct cbuf *bufp, u32 val, u32 * p)
-{
-	*p = val;
-	buf_put_int32(bufp, val);
-}
-
-static inline void p9_put_int64(struct cbuf *bufp, u64 val, u64 * p)
-{
-	*p = val;
-	buf_put_int64(bufp, val);
-}
-
-static void
-p9_put_str(struct cbuf *bufp, char *data, struct p9_str *str)
-{
-	int len;
-	char *s;
-
-	if (data)
-		len = strlen(data);
-	else
-		len = 0;
-
-	s = buf_put_stringn(bufp, data, len);
-	if (str) {
-		str->len = len;
-		str->str = s;
-	}
-}
-
-static int
-p9_put_data(struct cbuf *bufp, const char *data, int count,
-		   unsigned char **pdata)
-{
-	*pdata = buf_alloc(bufp, count);
-	if (*pdata == NULL)
-		return -ENOMEM;
-	memmove(*pdata, data, count);
-	return 0;
-}
-
-static int
-p9_put_user_data(struct cbuf *bufp, const char __user *data, int count,
-		   unsigned char **pdata)
-{
-	*pdata = buf_alloc(bufp, count);
-	if (*pdata == NULL)
-		return -ENOMEM;
-	return copy_from_user(*pdata, data, count);
-}
-
-static void
-p9_put_wstat(struct cbuf *bufp, struct p9_wstat *wstat,
-	       struct p9_stat *stat, int statsz, int dotu)
-{
-	p9_put_int16(bufp, statsz, &stat->size);
-	p9_put_int16(bufp, wstat->type, &stat->type);
-	p9_put_int32(bufp, wstat->dev, &stat->dev);
-	p9_put_int8(bufp, wstat->qid.type, &stat->qid.type);
-	p9_put_int32(bufp, wstat->qid.version, &stat->qid.version);
-	p9_put_int64(bufp, wstat->qid.path, &stat->qid.path);
-	p9_put_int32(bufp, wstat->mode, &stat->mode);
-	p9_put_int32(bufp, wstat->atime, &stat->atime);
-	p9_put_int32(bufp, wstat->mtime, &stat->mtime);
-	p9_put_int64(bufp, wstat->length, &stat->length);
-
-	p9_put_str(bufp, wstat->name, &stat->name);
-	p9_put_str(bufp, wstat->uid, &stat->uid);
-	p9_put_str(bufp, wstat->gid, &stat->gid);
-	p9_put_str(bufp, wstat->muid, &stat->muid);
-
-	if (dotu) {
-		p9_put_str(bufp, wstat->extension, &stat->extension);
-		p9_put_int32(bufp, wstat->n_uid, &stat->n_uid);
-		p9_put_int32(bufp, wstat->n_gid, &stat->n_gid);
-		p9_put_int32(bufp, wstat->n_muid, &stat->n_muid);
-	}
-}
-
-static struct p9_fcall *
-p9_create_common(struct cbuf *bufp, u32 size, u8 id)
-{
-	struct p9_fcall *fc;
-
-	size += 4 + 1 + 2;	/* size[4] id[1] tag[2] */
-	fc = kmalloc(sizeof(struct p9_fcall) + size, GFP_KERNEL);
-	if (!fc)
-		return ERR_PTR(-ENOMEM);
-
-	fc->sdata = (char *)fc + sizeof(*fc);
-
-	buf_init(bufp, (char *)fc->sdata, size);
-	p9_put_int32(bufp, size, &fc->size);
-	p9_put_int8(bufp, id, &fc->id);
-	p9_put_int16(bufp, P9_NOTAG, &fc->tag);
-
-	return fc;
-}
-
-/**
- * p9_set_tag - set the tag field of an &p9_fcall structure
- * @fc: fcall structure to set tag within
- * @tag: tag id to set
- */
-
-void p9_set_tag(struct p9_fcall *fc, u16 tag)
-{
-	fc->tag = tag;
-	*(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
-}
-EXPORT_SYMBOL(p9_set_tag);
-
-/**
- * p9_create_tversion - allocates and creates a T_VERSION request
- * @msize: requested maximum data size
- * @version: version string to negotiate
- *
- */
-struct p9_fcall *p9_create_tversion(u32 msize, char *version)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 2 + strlen(version);	/* msize[4] version[s] */
-	fc = p9_create_common(bufp, size, P9_TVERSION);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, msize, &fc->params.tversion.msize);
-	p9_put_str(bufp, version, &fc->params.tversion.version);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tversion);
-
-/**
- * p9_create_tauth - allocates and creates a T_AUTH request
- * @afid: handle to use for authentication protocol
- * @uname: user name attempting to authenticate
- * @aname: mount specifier for remote server
- * @n_uname: numeric id for user attempting to authneticate
- * @dotu: 9P2000.u extension flag
- *
- */
-
-struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname,
-	u32 n_uname, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* afid[4] uname[s] aname[s] */
-	size = 4 + 2 + 2;
-	if (uname)
-		size += strlen(uname);
-
-	if (aname)
-		size += strlen(aname);
-
-	if (dotu)
-		size += 4;	/* n_uname */
-
-	fc = p9_create_common(bufp, size, P9_TAUTH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, afid, &fc->params.tauth.afid);
-	p9_put_str(bufp, uname, &fc->params.tauth.uname);
-	p9_put_str(bufp, aname, &fc->params.tauth.aname);
-	if (dotu)
-		p9_put_int32(bufp, n_uname, &fc->params.tauth.n_uname);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tauth);
-
-/**
- * p9_create_tattach - allocates and creates a T_ATTACH request
- * @fid: handle to use for the new mount point
- * @afid: handle to use for authentication protocol
- * @uname: user name attempting to attach
- * @aname: mount specifier for remote server
- * @n_uname: numeric id for user attempting to attach
- * @n_uname: numeric id for user attempting to attach
- * @dotu: 9P2000.u extension flag
- *
- */
-
-struct p9_fcall *
-p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname,
-	u32 n_uname, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] afid[4] uname[s] aname[s] */
-	size = 4 + 4 + 2 + 2;
-	if (uname)
-		size += strlen(uname);
-
-	if (aname)
-		size += strlen(aname);
-
-	if (dotu)
-		size += 4;	/* n_uname */
-
-	fc = p9_create_common(bufp, size, P9_TATTACH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tattach.fid);
-	p9_put_int32(bufp, afid, &fc->params.tattach.afid);
-	p9_put_str(bufp, uname, &fc->params.tattach.uname);
-	p9_put_str(bufp, aname, &fc->params.tattach.aname);
-	if (dotu)
-		p9_put_int32(bufp, n_uname, &fc->params.tattach.n_uname);
-
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tattach);
-
-/**
- * p9_create_tflush - allocates and creates a T_FLUSH request
- * @oldtag: tag id for the transaction we are attempting to cancel
- *
- */
-
-struct p9_fcall *p9_create_tflush(u16 oldtag)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 2;		/* oldtag[2] */
-	fc = p9_create_common(bufp, size, P9_TFLUSH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tflush);
-
-/**
- * p9_create_twalk - allocates and creates a T_FLUSH request
- * @fid: handle we are traversing from
- * @newfid: a new handle for this transaction
- * @nwname: number of path elements to traverse
- * @wnames: array of path elements
- *
- */
-
-struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname,
-				     char **wnames)
-{
-	int i, size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	if (nwname > P9_MAXWELEM) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "nwname > %d\n", P9_MAXWELEM);
-		return NULL;
-	}
-
-	size = 4 + 4 + 2;	/* fid[4] newfid[4] nwname[2] ... */
-	for (i = 0; i < nwname; i++) {
-		size += 2 + strlen(wnames[i]);	/* wname[s] */
-	}
-
-	fc = p9_create_common(bufp, size, P9_TWALK);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twalk.fid);
-	p9_put_int32(bufp, newfid, &fc->params.twalk.newfid);
-	p9_put_int16(bufp, nwname, &fc->params.twalk.nwname);
-	for (i = 0; i < nwname; i++) {
-		p9_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twalk);
-
-/**
- * p9_create_topen - allocates and creates a T_OPEN request
- * @fid: handle we are trying to open
- * @mode: what mode we are trying to open the file in
- *
- */
-
-struct p9_fcall *p9_create_topen(u32 fid, u8 mode)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 1;		/* fid[4] mode[1] */
-	fc = p9_create_common(bufp, size, P9_TOPEN);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.topen.fid);
-	p9_put_int8(bufp, mode, &fc->params.topen.mode);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_topen);
-
-/**
- * p9_create_tcreate - allocates and creates a T_CREATE request
- * @fid: handle of directory we are trying to create in
- * @name: name of the file we are trying to create
- * @perm: permissions for the file we are trying to create
- * @mode: what mode we are trying to open the file in
- * @extension: 9p2000.u extension string (for special files)
- * @dotu: 9p2000.u enabled flag
- *
- * Note: Plan 9 create semantics include opening the resulting file
- * which is why mode is included.
- */
-
-struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-	char *extension, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] name[s] perm[4] mode[1] */
-	size = 4 + 2 + strlen(name) + 4 + 1;
-	if (dotu) {
-		size += 2 +			/* extension[s] */
-		    (extension == NULL ? 0 : strlen(extension));
-	}
-
-	fc = p9_create_common(bufp, size, P9_TCREATE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tcreate.fid);
-	p9_put_str(bufp, name, &fc->params.tcreate.name);
-	p9_put_int32(bufp, perm, &fc->params.tcreate.perm);
-	p9_put_int8(bufp, mode, &fc->params.tcreate.mode);
-	if (dotu)
-		p9_put_str(bufp, extension, &fc->params.tcreate.extension);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tcreate);
-
-/**
- * p9_create_tread - allocates and creates a T_READ request
- * @fid: handle of the file we are trying to read
- * @offset: offset to start reading from
- * @count: how many bytes to read
- */
-
-struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 8 + 4;	/* fid[4] offset[8] count[4] */
-	fc = p9_create_common(bufp, size, P9_TREAD);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tread.fid);
-	p9_put_int64(bufp, offset, &fc->params.tread.offset);
-	p9_put_int32(bufp, count, &fc->params.tread.count);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tread);
-
-/**
- * p9_create_twrite - allocates and creates a T_WRITE request from the kernel
- * @fid: handle of the file we are trying to write
- * @offset: offset to start writing at
- * @count: how many bytes to write
- * @data: data to write
- *
- * This function will create a requst with data buffers from the kernel
- * such as the page cache.
- */
-
-struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count,
-				      const char *data)
-{
-	int size, err;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] offset[8] count[4] data[count] */
-	size = 4 + 8 + 4 + count;
-	fc = p9_create_common(bufp, size, P9_TWRITE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twrite.fid);
-	p9_put_int64(bufp, offset, &fc->params.twrite.offset);
-	p9_put_int32(bufp, count, &fc->params.twrite.count);
-	err = p9_put_data(bufp, data, count, &fc->params.twrite.data);
-	if (err) {
-		kfree(fc);
-		fc = ERR_PTR(err);
-		goto error;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twrite);
-
-/**
- * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace
- * @fid: handle of the file we are trying to write
- * @offset: offset to start writing at
- * @count: how many bytes to write
- * @data: data to write
- *
- * This function will create a request with data buffers from userspace
- */
-
-struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count,
-				      const char __user *data)
-{
-	int size, err;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] offset[8] count[4] data[count] */
-	size = 4 + 8 + 4 + count;
-	fc = p9_create_common(bufp, size, P9_TWRITE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twrite.fid);
-	p9_put_int64(bufp, offset, &fc->params.twrite.offset);
-	p9_put_int32(bufp, count, &fc->params.twrite.count);
-	err = p9_put_user_data(bufp, data, count, &fc->params.twrite.data);
-	if (err) {
-		kfree(fc);
-		fc = ERR_PTR(err);
-		goto error;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twrite_u);
-
-/**
- * p9_create_tclunk - allocate a request to forget about a file handle
- * @fid: handle of the file we closing or forgetting about
- *
- * clunk is used both to close open files and to discard transient handles
- * which may be created during meta-data operations and hierarchy traversal.
- */
-
-struct p9_fcall *p9_create_tclunk(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TCLUNK);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tclunk.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tclunk);
-
-/**
- * p9_create_tremove - allocate and create a request to remove a file
- * @fid: handle of the file or directory we are removing
- *
- */
-
-struct p9_fcall *p9_create_tremove(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TREMOVE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tremove.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tremove);
-
-/**
- * p9_create_tstat - allocate and populate a request for attributes
- * @fid: handle of the file or directory we are trying to get the attributes of
- *
- */
-
-struct p9_fcall *p9_create_tstat(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TSTAT);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tstat.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tstat);
-
-/**
- * p9_create_tstat - allocate and populate a request to change attributes
- * @fid: handle of the file or directory we are trying to change
- * @wstat: &p9_stat structure with attributes we wish to set
- * @dotu: 9p2000.u enabled flag
- *
- */
-
-struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat,
-				      int dotu)
-{
-	int size, statsz;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	statsz = p9_size_wstat(wstat, dotu);
-	size = 4 + 2 + 2 + statsz;	/* fid[4] stat[n] */
-	fc = p9_create_common(bufp, size, P9_TWSTAT);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twstat.fid);
-	buf_put_int16(bufp, statsz + 2);
-	p9_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, dotu);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twstat);
-
diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c
deleted file mode 100644
index 53dd8e28dd8..00000000000
--- a/net/9p/fcprint.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- *  net/9p/fcprint.c
- *
- *  Print 9P call.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/idr.h>
-#include <net/9p/9p.h>
-
-#ifdef CONFIG_NET_9P_DEBUG
-
-static int
-p9_printqid(char *buf, int buflen, struct p9_qid *q)
-{
-	int n;
-	char b[10];
-
-	n = 0;
-	if (q->type & P9_QTDIR)
-		b[n++] = 'd';
-	if (q->type & P9_QTAPPEND)
-		b[n++] = 'a';
-	if (q->type & P9_QTAUTH)
-		b[n++] = 'A';
-	if (q->type & P9_QTEXCL)
-		b[n++] = 'l';
-	if (q->type & P9_QTTMP)
-		b[n++] = 't';
-	if (q->type & P9_QTSYMLINK)
-		b[n++] = 'L';
-	b[n] = '\0';
-
-	return scnprintf(buf, buflen, "(%.16llx %x %s)",
-					(long long int) q->path, q->version, b);
-}
-
-static int
-p9_printperm(char *buf, int buflen, int perm)
-{
-	int n;
-	char b[15];
-
-	n = 0;
-	if (perm & P9_DMDIR)
-		b[n++] = 'd';
-	if (perm & P9_DMAPPEND)
-		b[n++] = 'a';
-	if (perm & P9_DMAUTH)
-		b[n++] = 'A';
-	if (perm & P9_DMEXCL)
-		b[n++] = 'l';
-	if (perm & P9_DMTMP)
-		b[n++] = 't';
-	if (perm & P9_DMDEVICE)
-		b[n++] = 'D';
-	if (perm & P9_DMSOCKET)
-		b[n++] = 'S';
-	if (perm & P9_DMNAMEDPIPE)
-		b[n++] = 'P';
-	if (perm & P9_DMSYMLINK)
-		b[n++] = 'L';
-	b[n] = '\0';
-
-	return scnprintf(buf, buflen, "%s%03o", b, perm&077);
-}
-
-static int
-p9_printstat(char *buf, int buflen, struct p9_stat *st, int extended)
-{
-	int n;
-
-	n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
-		st->name.str, st->uid.len, st->uid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
-
-	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
-
-	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
-
-	n += scnprintf(buf+n, buflen-n, " q ");
-	n += p9_printqid(buf+n, buflen-n, &st->qid);
-	n += scnprintf(buf+n, buflen-n, " m ");
-	n += p9_printperm(buf+n, buflen-n, st->mode);
-	n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
-		st->atime, st->mtime, (long long int) st->length);
-
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
-			st->extension.len, st->extension.str);
-
-	return n;
-}
-
-static int
-p9_dumpdata(char *buf, int buflen, u8 *data, int datalen)
-{
-	int i, n;
-
-	i = n = 0;
-	while (i < datalen) {
-		n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
-		if (i%4 == 3)
-			n += scnprintf(buf + n, buflen - n, " ");
-		if (i%32 == 31)
-			n += scnprintf(buf + n, buflen - n, "\n");
-
-		i++;
-	}
-	n += scnprintf(buf + n, buflen - n, "\n");
-
-	return n;
-}
-
-static int
-p9_printdata(char *buf, int buflen, u8 *data, int datalen)
-{
-	return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16);
-}
-
-/**
- * p9_printfcall - decode and print a protocol structure into a buffer
- * @buf: buffer to deposit decoded structure into
- * @buflen: available space in buffer
- * @fc: protocol rpc structure of type &p9_fcall
- * @extended: whether or not session is operating with extended protocol
- */
-
-int
-p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended)
-{
-	int i, ret, type, tag;
-
-	if (!fc)
-		return scnprintf(buf, buflen, "<NULL>");
-
-	type = fc->id;
-	tag = fc->tag;
-
-	ret = 0;
-	switch (type) {
-	case P9_TVERSION:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Tversion tag %u msize %u version '%.*s'", tag,
-				fc->params.tversion.msize,
-				fc->params.tversion.version.len,
-				fc->params.tversion.version.str);
-		break;
-
-	case P9_RVERSION:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Rversion tag %u msize %u version '%.*s'", tag,
-				fc->params.rversion.msize,
-				fc->params.rversion.version.len,
-				fc->params.rversion.version.str);
-		break;
-
-	case P9_TAUTH:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
-			fc->params.tauth.afid, fc->params.tauth.uname.len,
-			fc->params.tauth.uname.str, fc->params.tauth.aname.len,
-			fc->params.tauth.aname.str);
-		break;
-
-	case P9_RAUTH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
-		p9_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
-		break;
-
-	case P9_TATTACH:
-		ret += scnprintf(buf+ret, buflen-ret,
-		 "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", tag,
-		 fc->params.tattach.fid, fc->params.tattach.afid,
-		 fc->params.tattach.uname.len, fc->params.tattach.uname.str,
-		 fc->params.tattach.aname.len, fc->params.tattach.aname.str);
-		break;
-
-	case P9_RATTACH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ",
-									tag);
-		p9_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
-		break;
-
-	case P9_RERROR:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Rerror tag %u ename '%.*s'", tag,
-				fc->params.rerror.error.len,
-				fc->params.rerror.error.str);
-		if (extended)
-			ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
-				fc->params.rerror.errno);
-		break;
-
-	case P9_TFLUSH:
-		ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
-			tag, fc->params.tflush.oldtag);
-		break;
-
-	case P9_RFLUSH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
-		break;
-
-	case P9_TWALK:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Twalk tag %u fid %d newfid %d nwname %d", tag,
-			fc->params.twalk.fid, fc->params.twalk.newfid,
-			fc->params.twalk.nwname);
-		for (i = 0; i < fc->params.twalk.nwname; i++)
-			ret += scnprintf(buf+ret, buflen-ret, " '%.*s'",
-				fc->params.twalk.wnames[i].len,
-				fc->params.twalk.wnames[i].str);
-		break;
-
-	case P9_RWALK:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
-			tag, fc->params.rwalk.nwqid);
-		for (i = 0; i < fc->params.rwalk.nwqid; i++)
-			ret += p9_printqid(buf+ret, buflen-ret,
-				&fc->params.rwalk.wqids[i]);
-		break;
-
-	case P9_TOPEN:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Topen tag %u fid %d mode %d", tag,
-			fc->params.topen.fid, fc->params.topen.mode);
-		break;
-
-	case P9_ROPEN:
-		ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
-		ret += p9_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
-		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
-			fc->params.ropen.iounit);
-		break;
-
-	case P9_TCREATE:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tcreate tag %u fid %d name '%.*s' perm ", tag,
-			fc->params.tcreate.fid, fc->params.tcreate.name.len,
-			fc->params.tcreate.name.str);
-
-		ret += p9_printperm(buf+ret, buflen-ret,
-						fc->params.tcreate.perm);
-		ret += scnprintf(buf+ret, buflen-ret, " mode %d",
-			fc->params.tcreate.mode);
-		break;
-
-	case P9_RCREATE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
-		ret += p9_printqid(buf+ret, buflen-ret,
-						&fc->params.rcreate.qid);
-		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
-			fc->params.rcreate.iounit);
-		break;
-
-	case P9_TREAD:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tread tag %u fid %d offset %lld count %u", tag,
-			fc->params.tread.fid,
-			(long long int) fc->params.tread.offset,
-			fc->params.tread.count);
-		break;
-
-	case P9_RREAD:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Rread tag %u count %u data ", tag,
-			fc->params.rread.count);
-		ret += p9_printdata(buf+ret, buflen-ret, fc->params.rread.data,
-			fc->params.rread.count);
-		break;
-
-	case P9_TWRITE:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Twrite tag %u fid %d offset %lld count %u data ",
-			tag, fc->params.twrite.fid,
-			(long long int) fc->params.twrite.offset,
-			fc->params.twrite.count);
-		ret += p9_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
-			fc->params.twrite.count);
-		break;
-
-	case P9_RWRITE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
-			tag, fc->params.rwrite.count);
-		break;
-
-	case P9_TCLUNK:
-		ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
-			tag, fc->params.tclunk.fid);
-		break;
-
-	case P9_RCLUNK:
-		ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
-		break;
-
-	case P9_TREMOVE:
-		ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
-			tag, fc->params.tremove.fid);
-		break;
-
-	case P9_RREMOVE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
-		break;
-
-	case P9_TSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
-			tag, fc->params.tstat.fid);
-		break;
-
-	case P9_RSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
-		ret += p9_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
-			extended);
-		break;
-
-	case P9_TWSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
-			tag, fc->params.twstat.fid);
-		ret += p9_printstat(buf+ret, buflen-ret,
-					&fc->params.twstat.stat, extended);
-		break;
-
-	case P9_RWSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
-		break;
-
-	default:
-		ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
-		break;
-	}
-
-	return ret;
-}
-#else
-int
-p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended)
-{
-	return 0;
-}
-#endif /* CONFIG_NET_9P_DEBUG */
-EXPORT_SYMBOL(p9_printfcall);
-
diff --git a/net/9p/mod.c b/net/9p/mod.c
index 1084feb24cb..cf8a4128cd5 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -29,6 +29,7 @@
 #include <net/9p/9p.h>
 #include <linux/fs.h>
 #include <linux/parser.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
new file mode 100644
index 00000000000..29be5243908
--- /dev/null
+++ b/net/9p/protocol.c
@@ -0,0 +1,558 @@
+/*
+ * net/9p/protocol.c
+ *
+ * 9P Protocol Support Code
+ *
+ *  Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  Base on code from Anthony Liguori <aliguori@us.ibm.com>
+ *  Copyright (C) 2008 by IBM, Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include "protocol.h"
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef offset_of
+#define offset_of(type, memb) \
+	((unsigned long)(&((type *)0)->memb))
+#endif
+#ifndef container_of
+#define container_of(obj, type, memb) \
+	((type *)(((char *)obj) - offset_of(type, memb)))
+#endif
+
+static int
+p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+
+void
+p9pdu_dump(int way, struct p9_fcall *pdu)
+{
+	int i, n;
+	u8 *data = pdu->sdata;
+	int datalen = pdu->size;
+	char buf[255];
+	int buflen = 255;
+
+	i = n = 0;
+	if (datalen > (buflen-16))
+		datalen = buflen-16;
+	while (i < datalen) {
+		n += scnprintf(buf + n, buflen - n, "%02x ", data[i]);
+		if (i%4 == 3)
+			n += scnprintf(buf + n, buflen - n, " ");
+		if (i%32 == 31)
+			n += scnprintf(buf + n, buflen - n, "\n");
+
+		i++;
+	}
+	n += scnprintf(buf + n, buflen - n, "\n");
+
+	if (way)
+		P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf);
+	else
+		P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf);
+}
+EXPORT_SYMBOL(p9pdu_dump);
+
+void p9stat_free(struct p9_wstat *stbuf)
+{
+	kfree(stbuf->name);
+	kfree(stbuf->uid);
+	kfree(stbuf->gid);
+	kfree(stbuf->muid);
+	kfree(stbuf->extension);
+}
+EXPORT_SYMBOL(p9stat_free);
+
+static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
+{
+	size_t len = MIN(pdu->size - pdu->offset, size);
+	memcpy(data, &pdu->sdata[pdu->offset], len);
+	pdu->offset += len;
+	return size - len;
+}
+
+static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
+{
+	size_t len = MIN(pdu->capacity - pdu->size, size);
+	memcpy(&pdu->sdata[pdu->size], data, len);
+	pdu->size += len;
+	return size - len;
+}
+
+static size_t
+pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
+{
+	size_t len = MIN(pdu->capacity - pdu->size, size);
+	int err = copy_from_user(&pdu->sdata[pdu->size], udata, len);
+	if (err)
+		printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
+
+	pdu->size += len;
+	return size - len;
+}
+
+/*
+	b - int8_t
+	w - int16_t
+	d - int32_t
+	q - int64_t
+	s - string
+	S - stat
+	Q - qid
+	D - data blob (int32_t size followed by void *, results are not freed)
+	T - array of strings (int16_t count, followed by strings)
+	R - array of qids (int16_t count, followed by qids)
+	? - if optional = 1, continue parsing
+*/
+
+static int
+p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+{
+	const char *ptr;
+	int errcode = 0;
+
+	for (ptr = fmt; *ptr; ptr++) {
+		switch (*ptr) {
+		case 'b':{
+				int8_t *val = va_arg(ap, int8_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+			}
+			break;
+		case 'w':{
+				int16_t *val = va_arg(ap, int16_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le16(*val);
+			}
+			break;
+		case 'd':{
+				int32_t *val = va_arg(ap, int32_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le32(*val);
+			}
+			break;
+		case 'q':{
+				int64_t *val = va_arg(ap, int64_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le64(*val);
+			}
+			break;
+		case 's':{
+				char **ptr = va_arg(ap, char **);
+				int16_t len;
+				int size;
+
+				errcode = p9pdu_readf(pdu, optional, "w", &len);
+				if (errcode)
+					break;
+
+				size = MAX(len, 0);
+
+				*ptr = kmalloc(size + 1, GFP_KERNEL);
+				if (*ptr == NULL) {
+					errcode = -EFAULT;
+					break;
+				}
+				if (pdu_read(pdu, *ptr, size)) {
+					errcode = -EFAULT;
+					kfree(*ptr);
+					*ptr = NULL;
+				} else
+					(*ptr)[size] = 0;
+			}
+			break;
+		case 'Q':{
+				struct p9_qid *qid =
+				    va_arg(ap, struct p9_qid *);
+
+				errcode = p9pdu_readf(pdu, optional, "bdq",
+						      &qid->type, &qid->version,
+						      &qid->path);
+			}
+			break;
+		case 'S':{
+				struct p9_wstat *stbuf =
+				    va_arg(ap, struct p9_wstat *);
+
+				memset(stbuf, 0, sizeof(struct p9_wstat));
+				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
+									-1;
+				errcode =
+				    p9pdu_readf(pdu, optional,
+						"wwdQdddqssss?sddd",
+						&stbuf->size, &stbuf->type,
+						&stbuf->dev, &stbuf->qid,
+						&stbuf->mode, &stbuf->atime,
+						&stbuf->mtime, &stbuf->length,
+						&stbuf->name, &stbuf->uid,
+						&stbuf->gid, &stbuf->muid,
+						&stbuf->extension,
+						&stbuf->n_uid, &stbuf->n_gid,
+						&stbuf->n_muid);
+				if (errcode)
+					p9stat_free(stbuf);
+			}
+			break;
+		case 'D':{
+				int32_t *count = va_arg(ap, int32_t *);
+				void **data = va_arg(ap, void **);
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "d", count);
+				if (!errcode) {
+					*count =
+					    MIN(*count,
+						pdu->size - pdu->offset);
+					*data = &pdu->sdata[pdu->offset];
+				}
+			}
+			break;
+		case 'T':{
+				int16_t *nwname = va_arg(ap, int16_t *);
+				char ***wnames = va_arg(ap, char ***);
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "w", nwname);
+				if (!errcode) {
+					*wnames =
+					    kmalloc(sizeof(char *) * *nwname,
+						    GFP_KERNEL);
+					if (!*wnames)
+						errcode = -ENOMEM;
+				}
+
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < *nwname; i++) {
+						errcode =
+						    p9pdu_readf(pdu, optional,
+								"s",
+								&(*wnames)[i]);
+						if (errcode)
+							break;
+					}
+				}
+
+				if (errcode) {
+					if (*wnames) {
+						int i;
+
+						for (i = 0; i < *nwname; i++)
+							kfree((*wnames)[i]);
+					}
+					kfree(*wnames);
+					*wnames = NULL;
+				}
+			}
+			break;
+		case 'R':{
+				int16_t *nwqid = va_arg(ap, int16_t *);
+				struct p9_qid **wqids =
+				    va_arg(ap, struct p9_qid **);
+
+				*wqids = NULL;
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "w", nwqid);
+				if (!errcode) {
+					*wqids =
+					    kmalloc(*nwqid *
+						    sizeof(struct p9_qid),
+						    GFP_KERNEL);
+					if (*wqids == NULL)
+						errcode = -ENOMEM;
+				}
+
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < *nwqid; i++) {
+						errcode =
+						    p9pdu_readf(pdu, optional,
+								"Q",
+								&(*wqids)[i]);
+						if (errcode)
+							break;
+					}
+				}
+
+				if (errcode) {
+					kfree(*wqids);
+					*wqids = NULL;
+				}
+			}
+			break;
+		case '?':
+			if (!optional)
+				return 0;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		if (errcode)
+			break;
+	}
+
+	return errcode;
+}
+
+int
+p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+{
+	const char *ptr;
+	int errcode = 0;
+
+	for (ptr = fmt; *ptr; ptr++) {
+		switch (*ptr) {
+		case 'b':{
+				int8_t val = va_arg(ap, int);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'w':{
+				int16_t val = va_arg(ap, int);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'd':{
+				int32_t val = va_arg(ap, int32_t);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'q':{
+				int64_t val = va_arg(ap, int64_t);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 's':{
+				const char *ptr = va_arg(ap, const char *);
+				int16_t len = 0;
+				if (ptr)
+					len = MIN(strlen(ptr), USHORT_MAX);
+
+				errcode = p9pdu_writef(pdu, optional, "w", len);
+				if (!errcode && pdu_write(pdu, ptr, len))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'Q':{
+				const struct p9_qid *qid =
+				    va_arg(ap, const struct p9_qid *);
+				errcode =
+				    p9pdu_writef(pdu, optional, "bdq",
+						 qid->type, qid->version,
+						 qid->path);
+			} break;
+		case 'S':{
+				const struct p9_wstat *stbuf =
+				    va_arg(ap, const struct p9_wstat *);
+				errcode =
+				    p9pdu_writef(pdu, optional,
+						 "wwdQdddqssss?sddd",
+						 stbuf->size, stbuf->type,
+						 stbuf->dev, &stbuf->qid,
+						 stbuf->mode, stbuf->atime,
+						 stbuf->mtime, stbuf->length,
+						 stbuf->name, stbuf->uid,
+						 stbuf->gid, stbuf->muid,
+						 stbuf->extension, stbuf->n_uid,
+						 stbuf->n_gid, stbuf->n_muid);
+			} break;
+		case 'D':{
+				int32_t count = va_arg(ap, int32_t);
+				const void *data = va_arg(ap, const void *);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "d", count);
+				if (!errcode && pdu_write(pdu, data, count))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'U':{
+				int32_t count = va_arg(ap, int32_t);
+				const char __user *udata =
+						va_arg(ap, const void *);
+				errcode =
+				    p9pdu_writef(pdu, optional, "d", count);
+				if (!errcode && pdu_write_u(pdu, udata, count))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'T':{
+				int16_t nwname = va_arg(ap, int);
+				const char **wnames = va_arg(ap, const char **);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "w", nwname);
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < nwname; i++) {
+						errcode =
+						    p9pdu_writef(pdu, optional,
+								 "s",
+								 wnames[i]);
+						if (errcode)
+							break;
+					}
+				}
+			}
+			break;
+		case 'R':{
+				int16_t nwqid = va_arg(ap, int);
+				struct p9_qid *wqids =
+				    va_arg(ap, struct p9_qid *);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "w", nwqid);
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < nwqid; i++) {
+						errcode =
+						    p9pdu_writef(pdu, optional,
+								 "Q",
+								 &wqids[i]);
+						if (errcode)
+							break;
+					}
+				}
+			}
+			break;
+		case '?':
+			if (!optional)
+				return 0;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		if (errcode)
+			break;
+	}
+
+	return errcode;
+}
+
+int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = p9pdu_vreadf(pdu, optional, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+static int
+p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = p9pdu_vwritef(pdu, optional, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
+{
+	struct p9_fcall fake_pdu;
+	int ret;
+
+	fake_pdu.size = len;
+	fake_pdu.capacity = len;
+	fake_pdu.sdata = buf;
+	fake_pdu.offset = 0;
+
+	ret = p9pdu_readf(&fake_pdu, dotu, "S", st);
+	if (ret) {
+		P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
+		p9pdu_dump(1, &fake_pdu);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(p9stat_read);
+
+int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
+{
+	return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
+}
+
+int p9pdu_finalize(struct p9_fcall *pdu)
+{
+	int size = pdu->size;
+	int err;
+
+	pdu->size = 0;
+	err = p9pdu_writef(pdu, 0, "d", size);
+	pdu->size = size;
+
+	if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT)
+		p9pdu_dump(0, pdu);
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
+							pdu->id, pdu->tag);
+
+	return err;
+}
+
+void p9pdu_reset(struct p9_fcall *pdu)
+{
+	pdu->offset = 0;
+	pdu->size = 0;
+}
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
new file mode 100644
index 00000000000..ccde462e7ac
--- /dev/null
+++ b/net/9p/protocol.h
@@ -0,0 +1,34 @@
+/*
+ * net/9p/protocol.h
+ *
+ * 9P Protocol Support Code
+ *
+ *  Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  Base on code from Anthony Liguori <aliguori@us.ibm.com>
+ *  Copyright (C) 2008 by IBM, Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+int
+p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
+int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
+int p9pdu_finalize(struct p9_fcall *pdu);
+void p9pdu_dump(int, struct p9_fcall *);
+void p9pdu_reset(struct p9_fcall *pdu);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6dabbdb6665..be65d8242fd 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -39,12 +39,11 @@
 #include <linux/file.h>
 #include <linux/parser.h>
 #include <net/9p/9p.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 
 #define P9_PORT 564
 #define MAX_SOCK_BUF (64*1024)
-#define ERREQFLUSH	1
-#define SCHED_TIMEOUT	10
 #define MAXPOLLWADDR	2
 
 /**
@@ -61,7 +60,6 @@ struct p9_fd_opts {
 	u16 port;
 };
 
-
 /**
  * struct p9_trans_fd - transport state
  * @rd: reference to file to read from
@@ -100,60 +98,22 @@ enum {
 	Wpending = 8,		/* can write */
 };
 
-enum {
-	None,
-	Flushing,
-	Flushed,
-};
-
-struct p9_req;
-typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a);
-
-/**
- * struct p9_req - fd mux encoding of an rpc transaction
- * @lock: protects req_list
- * @tag: numeric tag for rpc transaction
- * @tcall: request &p9_fcall structure
- * @rcall: response &p9_fcall structure
- * @err: error state
- * @cb: callback for when response is received
- * @cba: argument to pass to callback
- * @flush: flag to indicate RPC has been flushed
- * @req_list: list link for higher level objects to chain requests
- *
- */
-
-struct p9_req {
-	spinlock_t lock;
-	int tag;
-	struct p9_fcall *tcall;
-	struct p9_fcall *rcall;
-	int err;
-	p9_conn_req_callback cb;
-	void *cba;
-	int flush;
-	struct list_head req_list;
-};
-
-struct p9_mux_poll_task {
-	struct task_struct *task;
-	struct list_head mux_list;
-	int muxnum;
+struct p9_poll_wait {
+	struct p9_conn *conn;
+	wait_queue_t wait;
+	wait_queue_head_t *wait_addr;
 };
 
 /**
  * struct p9_conn - fd mux connection state information
- * @lock: protects mux_list (?)
  * @mux_list: list link for mux to manage multiple connections (?)
- * @poll_task: task polling on this connection
- * @msize: maximum size for connection (dup)
- * @extended: 9p2000.u flag (dup)
- * @trans: reference to transport instance for this connection
- * @tagpool: id accounting for transactions
+ * @client: reference to client instance for this connection
  * @err: error state
  * @req_list: accounting for requests which have been sent
  * @unsent_req_list: accounting for requests that haven't been sent
- * @rcall: current response &p9_fcall structure
+ * @req: current request being processed (if any)
+ * @tmp_buf: temporary buffer to read in header
+ * @rsize: amount to read for current frame
  * @rpos: read position in current frame
  * @rbuf: current read buffer
  * @wpos: write position for current frame
@@ -169,409 +129,300 @@ struct p9_mux_poll_task {
  */
 
 struct p9_conn {
-	spinlock_t lock; /* protect lock structure */
 	struct list_head mux_list;
-	struct p9_mux_poll_task *poll_task;
-	int msize;
-	unsigned char extended;
-	struct p9_trans *trans;
-	struct p9_idpool *tagpool;
+	struct p9_client *client;
 	int err;
 	struct list_head req_list;
 	struct list_head unsent_req_list;
-	struct p9_fcall *rcall;
+	struct p9_req_t *req;
+	char tmp_buf[7];
+	int rsize;
 	int rpos;
 	char *rbuf;
 	int wpos;
 	int wsize;
 	char *wbuf;
-	wait_queue_t poll_wait[MAXPOLLWADDR];
-	wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
+	struct list_head poll_pending_link;
+	struct p9_poll_wait poll_wait[MAXPOLLWADDR];
 	poll_table pt;
 	struct work_struct rq;
 	struct work_struct wq;
 	unsigned long wsched;
 };
 
-/**
- * struct p9_mux_rpc - fd mux rpc accounting structure
- * @m: connection this request was issued on
- * @err: error state
- * @tcall: request &p9_fcall
- * @rcall: response &p9_fcall
- * @wqueue: wait queue that client is blocked on for this rpc
- *
- * Bug: isn't this information duplicated elsewhere like &p9_req
- */
-
-struct p9_mux_rpc {
-	struct p9_conn *m;
-	int err;
-	struct p9_fcall *tcall;
-	struct p9_fcall *rcall;
-	wait_queue_head_t wqueue;
-};
-
-static int p9_poll_proc(void *);
-static void p9_read_work(struct work_struct *work);
-static void p9_write_work(struct work_struct *work);
-static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address,
-								poll_table *p);
-static int p9_fd_write(struct p9_trans *trans, void *v, int len);
-static int p9_fd_read(struct p9_trans *trans, void *v, int len);
-
-static DEFINE_MUTEX(p9_mux_task_lock);
+static DEFINE_SPINLOCK(p9_poll_lock);
+static LIST_HEAD(p9_poll_pending_list);
 static struct workqueue_struct *p9_mux_wq;
+static struct task_struct *p9_poll_task;
 
-static int p9_mux_num;
-static int p9_mux_poll_task_num;
-static struct p9_mux_poll_task p9_mux_poll_tasks[100];
-
-static void p9_conn_destroy(struct p9_conn *);
-static unsigned int p9_fd_poll(struct p9_trans *trans,
-						struct poll_table_struct *pt);
-
-#ifdef P9_NONBLOCK
-static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
-	p9_conn_req_callback cb, void *a);
-#endif /* P9_NONBLOCK */
-
-static void p9_conn_cancel(struct p9_conn *m, int err);
-
-static u16 p9_mux_get_tag(struct p9_conn *m)
+static void p9_mux_poll_stop(struct p9_conn *m)
 {
-	int tag;
+	unsigned long flags;
+	int i;
 
-	tag = p9_idpool_get(m->tagpool);
-	if (tag < 0)
-		return P9_NOTAG;
-	else
-		return (u16) tag;
-}
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		struct p9_poll_wait *pwait = &m->poll_wait[i];
 
-static void p9_mux_put_tag(struct p9_conn *m, u16 tag)
-{
-	if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool))
-		p9_idpool_put(tag, m->tagpool);
+		if (pwait->wait_addr) {
+			remove_wait_queue(pwait->wait_addr, &pwait->wait);
+			pwait->wait_addr = NULL;
+		}
+	}
+
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	list_del_init(&m->poll_pending_link);
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
 }
 
 /**
- * p9_mux_calc_poll_procs - calculates the number of polling procs
- * @muxnum: number of mounts
+ * p9_conn_cancel - cancel all pending requests with error
+ * @m: mux data
+ * @err: error code
  *
- * Calculation is based on the number of mounted v9fs filesystems.
- * The current implementation returns sqrt of the number of mounts.
  */
 
-static int p9_mux_calc_poll_procs(int muxnum)
+static void p9_conn_cancel(struct p9_conn *m, int err)
 {
-	int n;
-
-	if (p9_mux_poll_task_num)
-		n = muxnum / p9_mux_poll_task_num +
-		    (muxnum % p9_mux_poll_task_num ? 1 : 0);
-	else
-		n = 1;
-
-	if (n > ARRAY_SIZE(p9_mux_poll_tasks))
-		n = ARRAY_SIZE(p9_mux_poll_tasks);
-
-	return n;
-}
+	struct p9_req_t *req, *rtmp;
+	unsigned long flags;
+	LIST_HEAD(cancel_list);
 
-static int p9_mux_poll_start(struct p9_conn *m)
-{
-	int i, n;
-	struct p9_mux_poll_task *vpt, *vptlast;
-	struct task_struct *pproc;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num,
-		p9_mux_poll_task_num);
-	mutex_lock(&p9_mux_task_lock);
-
-	n = p9_mux_calc_poll_procs(p9_mux_num + 1);
-	if (n > p9_mux_poll_task_num) {
-		for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
-			if (p9_mux_poll_tasks[i].task == NULL) {
-				vpt = &p9_mux_poll_tasks[i];
-				P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n",
-									vpt);
-				pproc = kthread_create(p9_poll_proc, vpt,
-								"v9fs-poll");
-
-				if (!IS_ERR(pproc)) {
-					vpt->task = pproc;
-					INIT_LIST_HEAD(&vpt->mux_list);
-					vpt->muxnum = 0;
-					p9_mux_poll_task_num++;
-					wake_up_process(vpt->task);
-				}
-				break;
-			}
-		}
+	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
 
-		if (i >= ARRAY_SIZE(p9_mux_poll_tasks))
-			P9_DPRINTK(P9_DEBUG_ERROR,
-					"warning: no free poll slots\n");
-	}
+	spin_lock_irqsave(&m->client->lock, flags);
 
-	n = (p9_mux_num + 1) / p9_mux_poll_task_num +
-	    ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0);
-
-	vptlast = NULL;
-	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
-		vpt = &p9_mux_poll_tasks[i];
-		if (vpt->task != NULL) {
-			vptlast = vpt;
-			if (vpt->muxnum < n) {
-				P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
-				list_add(&m->mux_list, &vpt->mux_list);
-				vpt->muxnum++;
-				m->poll_task = vpt;
-				memset(&m->poll_waddr, 0,
-							sizeof(m->poll_waddr));
-				init_poll_funcptr(&m->pt, p9_pollwait);
-				break;
-			}
-		}
+	if (m->err) {
+		spin_unlock_irqrestore(&m->client->lock, flags);
+		return;
 	}
 
-	if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) {
-		if (vptlast == NULL) {
-			mutex_unlock(&p9_mux_task_lock);
-			return -ENOMEM;
-		}
+	m->err = err;
 
-		P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
-		list_add(&m->mux_list, &vptlast->mux_list);
-		vptlast->muxnum++;
-		m->poll_task = vptlast;
-		memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-		init_poll_funcptr(&m->pt, p9_pollwait);
+	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
+		req->status = REQ_STATUS_ERROR;
+		if (!req->t_err)
+			req->t_err = err;
+		list_move(&req->req_list, &cancel_list);
 	}
-
-	p9_mux_num++;
-	mutex_unlock(&p9_mux_task_lock);
-
-	return 0;
-}
-
-static void p9_mux_poll_stop(struct p9_conn *m)
-{
-	int i;
-	struct p9_mux_poll_task *vpt;
-
-	mutex_lock(&p9_mux_task_lock);
-	vpt = m->poll_task;
-	list_del(&m->mux_list);
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-		if (m->poll_waddr[i] != NULL) {
-			remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
-			m->poll_waddr[i] = NULL;
-		}
+	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
+		req->status = REQ_STATUS_ERROR;
+		if (!req->t_err)
+			req->t_err = err;
+		list_move(&req->req_list, &cancel_list);
 	}
-	vpt->muxnum--;
-	if (!vpt->muxnum) {
-		P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt);
-		kthread_stop(vpt->task);
-		vpt->task = NULL;
-		p9_mux_poll_task_num--;
+	spin_unlock_irqrestore(&m->client->lock, flags);
+
+	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
+		list_del(&req->req_list);
+		P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req);
+		p9_client_cb(m->client, req);
 	}
-	p9_mux_num--;
-	mutex_unlock(&p9_mux_task_lock);
 }
 
-/**
- * p9_conn_create - allocate and initialize the per-session mux data
- * @trans: transport structure
- *
- * Note: Creates the polling task if this is the first session.
- */
-
-static struct p9_conn *p9_conn_create(struct p9_trans *trans)
+static unsigned int
+p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
 {
-	int i, n;
-	struct p9_conn *m;
+	int ret, n;
+	struct p9_trans_fd *ts = NULL;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans,
-								trans->msize);
-	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
-	if (!m)
-		return ERR_PTR(-ENOMEM);
+	if (client && client->status == Connected)
+		ts = client->trans;
 
-	spin_lock_init(&m->lock);
-	INIT_LIST_HEAD(&m->mux_list);
-	m->msize = trans->msize;
-	m->extended = trans->extended;
-	m->trans = trans;
-	m->tagpool = p9_idpool_create();
-	if (IS_ERR(m->tagpool)) {
-		kfree(m);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!ts)
+		return -EREMOTEIO;
 
-	INIT_LIST_HEAD(&m->req_list);
-	INIT_LIST_HEAD(&m->unsent_req_list);
-	INIT_WORK(&m->rq, p9_read_work);
-	INIT_WORK(&m->wq, p9_write_work);
-	n = p9_mux_poll_start(m);
-	if (n) {
-		kfree(m);
-		return ERR_PTR(n);
-	}
+	if (!ts->rd->f_op || !ts->rd->f_op->poll)
+		return -EIO;
 
-	n = p9_fd_poll(trans, &m->pt);
-	if (n & POLLIN) {
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
-		set_bit(Rpending, &m->wsched);
-	}
+	if (!ts->wr->f_op || !ts->wr->f_op->poll)
+		return -EIO;
 
-	if (n & POLLOUT) {
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
-		set_bit(Wpending, &m->wsched);
-	}
+	ret = ts->rd->f_op->poll(ts->rd, pt);
+	if (ret < 0)
+		return ret;
 
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-		if (IS_ERR(m->poll_waddr[i])) {
-			p9_mux_poll_stop(m);
-			kfree(m);
-			return (void *)m->poll_waddr;	/* the error code */
-		}
+	if (ts->rd != ts->wr) {
+		n = ts->wr->f_op->poll(ts->wr, pt);
+		if (n < 0)
+			return n;
+		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
 	}
 
-	return m;
+	return ret;
 }
 
 /**
- * p9_mux_destroy - cancels all pending requests and frees mux resources
- * @m: mux to destroy
+ * p9_fd_read- read from a fd
+ * @client: client instance
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
  *
  */
 
-static void p9_conn_destroy(struct p9_conn *m)
+static int p9_fd_read(struct p9_client *client, void *v, int len)
 {
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
-		m->mux_list.prev, m->mux_list.next);
+	int ret;
+	struct p9_trans_fd *ts = NULL;
 
-	p9_mux_poll_stop(m);
-	cancel_work_sync(&m->rq);
-	cancel_work_sync(&m->wq);
+	if (client && client->status != Disconnected)
+		ts = client->trans;
 
-	p9_conn_cancel(m, -ECONNRESET);
+	if (!ts)
+		return -EREMOTEIO;
 
-	m->trans = NULL;
-	p9_idpool_destroy(m->tagpool);
-	kfree(m);
+	if (!(ts->rd->f_flags & O_NONBLOCK))
+		P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n");
+
+	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		client->status = Disconnected;
+	return ret;
 }
 
 /**
- * p9_pollwait - add poll task to the wait queue
- * @filp: file pointer being polled
- * @wait_address: wait_q to block on
- * @p: poll state
+ * p9_read_work - called when there is some data to be read from a transport
+ * @work: container of work to be done
  *
- * called by files poll operation to add v9fs-poll task to files wait queue
  */
 
-static void
-p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
+static void p9_read_work(struct work_struct *work)
 {
-	int i;
+	int n, err;
 	struct p9_conn *m;
 
-	m = container_of(p, struct p9_conn, pt);
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
-		if (m->poll_waddr[i] == NULL)
-			break;
+	m = container_of(work, struct p9_conn, rq);
 
-	if (i >= ARRAY_SIZE(m->poll_waddr)) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
+	if (m->err < 0)
 		return;
-	}
 
-	m->poll_waddr[i] = wait_address;
+	P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
 
-	if (!wait_address) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n");
-		m->poll_waddr[i] = ERR_PTR(-EIO);
+	if (!m->rbuf) {
+		m->rbuf = m->tmp_buf;
+		m->rpos = 0;
+		m->rsize = 7; /* start by reading header */
+	}
+
+	clear_bit(Rpending, &m->wsched);
+	P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m,
+					m->rpos, m->rsize, m->rsize-m->rpos);
+	err = p9_fd_read(m->client, m->rbuf + m->rpos,
+						m->rsize - m->rpos);
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
+	if (err == -EAGAIN) {
+		clear_bit(Rworksched, &m->wsched);
 		return;
 	}
 
-	init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
-	add_wait_queue(wait_address, &m->poll_wait[i]);
-}
+	if (err <= 0)
+		goto error;
 
-/**
- * p9_poll_mux - polls a mux and schedules read or write works if necessary
- * @m: connection to poll
- *
- */
+	m->rpos += err;
 
-static void p9_poll_mux(struct p9_conn *m)
-{
-	int n;
+	if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
+		u16 tag;
+		P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n");
 
-	if (m->err < 0)
-		return;
+		n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
+		if (n >= m->client->msize) {
+			P9_DPRINTK(P9_DEBUG_ERROR,
+				"requested packet size too big: %d\n", n);
+			err = -EIO;
+			goto error;
+		}
 
-	n = p9_fd_poll(m->trans, NULL);
-	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
-		P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n);
-		if (n >= 0)
-			n = -ECONNRESET;
-		p9_conn_cancel(m, n);
-	}
+		tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
+		P9_DPRINTK(P9_DEBUG_TRANS,
+			"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
 
-	if (n & POLLIN) {
-		set_bit(Rpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
-		if (!test_and_set_bit(Rworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
-			queue_work(p9_mux_wq, &m->rq);
+		m->req = p9_tag_lookup(m->client, tag);
+		if (!m->req) {
+			P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
+								 tag);
+			err = -EIO;
+			goto error;
 		}
-	}
 
-	if (n & POLLOUT) {
-		set_bit(Wpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
-		if ((m->wsize || !list_empty(&m->unsent_req_list))
-		    && !test_and_set_bit(Wworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
-			queue_work(p9_mux_wq, &m->wq);
+		if (m->req->rc == NULL) {
+			m->req->rc = kmalloc(sizeof(struct p9_fcall) +
+						m->client->msize, GFP_KERNEL);
+			if (!m->req->rc) {
+				m->req = NULL;
+				err = -ENOMEM;
+				goto error;
+			}
 		}
+		m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
+		memcpy(m->rbuf, m->tmp_buf, m->rsize);
+		m->rsize = n;
 	}
+
+	/* not an else because some packets (like clunk) have no payload */
+	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
+		spin_lock(&m->client->lock);
+		list_del(&m->req->req_list);
+		spin_unlock(&m->client->lock);
+		p9_client_cb(m->client, m->req);
+
+		m->rbuf = NULL;
+		m->rpos = 0;
+		m->rsize = 0;
+		m->req = NULL;
+	}
+
+	if (!list_empty(&m->req_list)) {
+		if (test_and_clear_bit(Rpending, &m->wsched))
+			n = POLLIN;
+		else
+			n = p9_fd_poll(m->client, NULL);
+
+		if (n & POLLIN) {
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
+			queue_work(p9_mux_wq, &m->rq);
+		} else
+			clear_bit(Rworksched, &m->wsched);
+	} else
+		clear_bit(Rworksched, &m->wsched);
+
+	return;
+error:
+	p9_conn_cancel(m, err);
+	clear_bit(Rworksched, &m->wsched);
 }
 
 /**
- * p9_poll_proc - poll worker thread
- * @a: thread state and arguments
- *
- * polls all v9fs transports for new events and queues the appropriate
- * work to the work queue
+ * p9_fd_write - write to a socket
+ * @client: client instance
+ * @v: buffer to send data from
+ * @len: size of send buffer
  *
  */
 
-static int p9_poll_proc(void *a)
+static int p9_fd_write(struct p9_client *client, void *v, int len)
 {
-	struct p9_conn *m, *mtmp;
-	struct p9_mux_poll_task *vpt;
+	int ret;
+	mm_segment_t oldfs;
+	struct p9_trans_fd *ts = NULL;
 
-	vpt = a;
-	P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt);
-	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
+	if (client && client->status != Disconnected)
+		ts = client->trans;
 
-		list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
-			p9_poll_mux(m);
-		}
+	if (!ts)
+		return -EREMOTEIO;
 
-		P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
-		schedule_timeout(SCHED_TIMEOUT * HZ);
-	}
+	if (!(ts->wr->f_flags & O_NONBLOCK))
+		P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n");
 
-	__set_current_state(TASK_RUNNING);
-	P9_DPRINTK(P9_DEBUG_MUX, "finish\n");
-	return 0;
+	oldfs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
+	set_fs(oldfs);
+
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		client->status = Disconnected;
+	return ret;
 }
 
 /**
@@ -584,7 +435,7 @@ static void p9_write_work(struct work_struct *work)
 {
 	int n, err;
 	struct p9_conn *m;
-	struct p9_req *req;
+	struct p9_req_t *req;
 
 	m = container_of(work, struct p9_conn, wq);
 
@@ -599,25 +450,23 @@ static void p9_write_work(struct work_struct *work)
 			return;
 		}
 
-		spin_lock(&m->lock);
-again:
-		req = list_entry(m->unsent_req_list.next, struct p9_req,
+		spin_lock(&m->client->lock);
+		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
 			       req_list);
+		req->status = REQ_STATUS_SENT;
 		list_move_tail(&req->req_list, &m->req_list);
-		if (req->err == ERREQFLUSH)
-			goto again;
 
-		m->wbuf = req->tcall->sdata;
-		m->wsize = req->tcall->size;
+		m->wbuf = req->tc->sdata;
+		m->wsize = req->tc->size;
 		m->wpos = 0;
-		spin_unlock(&m->lock);
+		spin_unlock(&m->client->lock);
 	}
 
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos,
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos,
 								m->wsize);
 	clear_bit(Wpending, &m->wsched);
-	err = p9_fd_write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
+	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
 	if (err == -EAGAIN) {
 		clear_bit(Wworksched, &m->wsched);
 		return;
@@ -638,10 +487,10 @@ again:
 		if (test_and_clear_bit(Wpending, &m->wsched))
 			n = POLLOUT;
 		else
-			n = p9_fd_poll(m->trans, NULL);
+			n = p9_fd_poll(m->client, NULL);
 
 		if (n & POLLOUT) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
 			queue_work(p9_mux_wq, &m->wq);
 		} else
 			clear_bit(Wworksched, &m->wsched);
@@ -655,504 +504,197 @@ error:
 	clear_bit(Wworksched, &m->wsched);
 }
 
-static void process_request(struct p9_conn *m, struct p9_req *req)
+static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
-	int ecode;
-	struct p9_str *ename;
-
-	if (!req->err && req->rcall->id == P9_RERROR) {
-		ecode = req->rcall->params.rerror.errno;
-		ename = &req->rcall->params.rerror.error;
-
-		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
-								ename->str);
-
-		if (m->extended)
-			req->err = -ecode;
+	struct p9_poll_wait *pwait =
+		container_of(wait, struct p9_poll_wait, wait);
+	struct p9_conn *m = pwait->conn;
+	unsigned long flags;
+	DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
 
-		if (!req->err) {
-			req->err = p9_errstr2errno(ename->str, ename->len);
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	if (list_empty(&m->poll_pending_link))
+		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
 
-			/* string match failed */
-			if (!req->err) {
-				PRINT_FCALL_ERROR("unknown error", req->rcall);
-				req->err = -ESERVERFAULT;
-			}
-		}
-	} else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-				"fcall mismatch: expected %d, got %d\n",
-				req->tcall->id + 1, req->rcall->id);
-		if (!req->err)
-			req->err = -EIO;
-	}
+	/* perform the default wake up operation */
+	return default_wake_function(&dummy_wait, mode, sync, key);
 }
 
 /**
- * p9_read_work - called when there is some data to be read from a transport
- * @work: container of work to be done
+ * p9_pollwait - add poll task to the wait queue
+ * @filp: file pointer being polled
+ * @wait_address: wait_q to block on
+ * @p: poll state
  *
+ * called by files poll operation to add v9fs-poll task to files wait queue
  */
 
-static void p9_read_work(struct work_struct *work)
+static void
+p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 {
-	int n, err;
-	struct p9_conn *m;
-	struct p9_req *req, *rptr, *rreq;
-	struct p9_fcall *rcall;
-	char *rbuf;
-
-	m = container_of(work, struct p9_conn, rq);
-
-	if (m->err < 0)
-		return;
-
-	rcall = NULL;
-	P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
+	struct p9_conn *m = container_of(p, struct p9_conn, pt);
+	struct p9_poll_wait *pwait = NULL;
+	int i;
 
-	if (!m->rcall) {
-		m->rcall =
-		    kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL);
-		if (!m->rcall) {
-			err = -ENOMEM;
-			goto error;
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		if (m->poll_wait[i].wait_addr == NULL) {
+			pwait = &m->poll_wait[i];
+			break;
 		}
-
-		m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
-		m->rpos = 0;
 	}
 
-	clear_bit(Rpending, &m->wsched);
-	err = p9_fd_read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err);
-	if (err == -EAGAIN) {
-		clear_bit(Rworksched, &m->wsched);
+	if (!pwait) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
 		return;
 	}
 
-	if (err <= 0)
-		goto error;
-
-	m->rpos += err;
-	while (m->rpos > 4) {
-		n = le32_to_cpu(*(__le32 *) m->rbuf);
-		if (n >= m->msize) {
-			P9_DPRINTK(P9_DEBUG_ERROR,
-				"requested packet size too big: %d\n", n);
-			err = -EIO;
-			goto error;
-		}
-
-		if (m->rpos < n)
-			break;
-
-		err =
-		    p9_deserialize_fcall(m->rbuf, n, m->rcall, m->extended);
-		if (err < 0)
-			goto error;
-
-#ifdef CONFIG_NET_9P_DEBUG
-		if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-			char buf[150];
-
-			p9_printfcall(buf, sizeof(buf), m->rcall,
-				m->extended);
-			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
-		}
-#endif
-
-		rcall = m->rcall;
-		rbuf = m->rbuf;
-		if (m->rpos > n) {
-			m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize,
-					   GFP_KERNEL);
-			if (!m->rcall) {
-				err = -ENOMEM;
-				goto error;
-			}
-
-			m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
-			memmove(m->rbuf, rbuf + n, m->rpos - n);
-			m->rpos -= n;
-		} else {
-			m->rcall = NULL;
-			m->rbuf = NULL;
-			m->rpos = 0;
-		}
-
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m,
-							rcall->id, rcall->tag);
-
-		req = NULL;
-		spin_lock(&m->lock);
-		list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-			if (rreq->tag == rcall->tag) {
-				req = rreq;
-				if (req->flush != Flushing)
-					list_del(&req->req_list);
-				break;
-			}
-		}
-		spin_unlock(&m->lock);
-
-		if (req) {
-			req->rcall = rcall;
-			process_request(m, req);
-
-			if (req->flush != Flushing) {
-				if (req->cb)
-					(*req->cb) (req, req->cba);
-				else
-					kfree(req->rcall);
-			}
-		} else {
-			if (err >= 0 && rcall->id != P9_RFLUSH)
-				P9_DPRINTK(P9_DEBUG_ERROR,
-				  "unexpected response mux %p id %d tag %d\n",
-				  m, rcall->id, rcall->tag);
-			kfree(rcall);
-		}
-	}
-
-	if (!list_empty(&m->req_list)) {
-		if (test_and_clear_bit(Rpending, &m->wsched))
-			n = POLLIN;
-		else
-			n = p9_fd_poll(m->trans, NULL);
-
-		if (n & POLLIN) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
-			queue_work(p9_mux_wq, &m->rq);
-		} else
-			clear_bit(Rworksched, &m->wsched);
-	} else
-		clear_bit(Rworksched, &m->wsched);
-
-	return;
-
-error:
-	p9_conn_cancel(m, err);
-	clear_bit(Rworksched, &m->wsched);
+	pwait->conn = m;
+	pwait->wait_addr = wait_address;
+	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
+	add_wait_queue(wait_address, &pwait->wait);
 }
 
 /**
- * p9_send_request - send 9P request
- * The function can sleep until the request is scheduled for sending.
- * The function can be interrupted. Return from the function is not
- * a guarantee that the request is sent successfully. Can return errors
- * that can be retrieved by PTR_ERR macros.
- *
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to call when response is received
- * @cba: parameter to pass to the callback function
+ * p9_conn_create - allocate and initialize the per-session mux data
+ * @client: client instance
  *
+ * Note: Creates the polling task if this is the first session.
  */
 
-static struct p9_req *p9_send_request(struct p9_conn *m,
-					  struct p9_fcall *tc,
-					  p9_conn_req_callback cb, void *cba)
+static struct p9_conn *p9_conn_create(struct p9_client *client)
 {
 	int n;
-	struct p9_req *req;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
-		tc, tc->id);
-	if (m->err < 0)
-		return ERR_PTR(m->err);
-
-	req = kmalloc(sizeof(struct p9_req), GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
-
-	if (tc->id == P9_TVERSION)
-		n = P9_NOTAG;
-	else
-		n = p9_mux_get_tag(m);
+	struct p9_conn *m;
 
-	if (n < 0) {
-		kfree(req);
+	P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client,
+								client->msize);
+	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
+	if (!m)
 		return ERR_PTR(-ENOMEM);
-	}
 
-	p9_set_tag(tc, n);
+	INIT_LIST_HEAD(&m->mux_list);
+	m->client = client;
 
-#ifdef CONFIG_NET_9P_DEBUG
-	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-		char buf[150];
+	INIT_LIST_HEAD(&m->req_list);
+	INIT_LIST_HEAD(&m->unsent_req_list);
+	INIT_WORK(&m->rq, p9_read_work);
+	INIT_WORK(&m->wq, p9_write_work);
+	INIT_LIST_HEAD(&m->poll_pending_link);
+	init_poll_funcptr(&m->pt, p9_pollwait);
 
-		p9_printfcall(buf, sizeof(buf), tc, m->extended);
-		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
+	n = p9_fd_poll(client, &m->pt);
+	if (n & POLLIN) {
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
+		set_bit(Rpending, &m->wsched);
 	}
-#endif
-
-	spin_lock_init(&req->lock);
-	req->tag = n;
-	req->tcall = tc;
-	req->rcall = NULL;
-	req->err = 0;
-	req->cb = cb;
-	req->cba = cba;
-	req->flush = None;
-
-	spin_lock(&m->lock);
-	list_add_tail(&req->req_list, &m->unsent_req_list);
-	spin_unlock(&m->lock);
-
-	if (test_and_clear_bit(Wpending, &m->wsched))
-		n = POLLOUT;
-	else
-		n = p9_fd_poll(m->trans, NULL);
 
-	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
-		queue_work(p9_mux_wq, &m->wq);
+	if (n & POLLOUT) {
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m);
+		set_bit(Wpending, &m->wsched);
+	}
 
-	return req;
+	return m;
 }
 
-static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req)
-{
-	p9_mux_put_tag(m, req->tag);
-	kfree(req);
-}
+/**
+ * p9_poll_mux - polls a mux and schedules read or write works if necessary
+ * @m: connection to poll
+ *
+ */
 
-static void p9_mux_flush_cb(struct p9_req *freq, void *a)
+static void p9_poll_mux(struct p9_conn *m)
 {
-	int tag;
-	struct p9_conn *m;
-	struct p9_req *req, *rreq, *rptr;
-
-	m = a;
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
-		freq->tcall, freq->rcall, freq->err,
-		freq->tcall->params.tflush.oldtag);
-
-	spin_lock(&m->lock);
-	tag = freq->tcall->params.tflush.oldtag;
-	req = NULL;
-	list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-		if (rreq->tag == tag) {
-			req = rreq;
-			list_del(&req->req_list);
-			break;
-		}
-	}
-	spin_unlock(&m->lock);
+	int n;
 
-	if (req) {
-		spin_lock(&req->lock);
-		req->flush = Flushed;
-		spin_unlock(&req->lock);
+	if (m->err < 0)
+		return;
 
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
+	n = p9_fd_poll(m->client, NULL);
+	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
+		P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
+		if (n >= 0)
+			n = -ECONNRESET;
+		p9_conn_cancel(m, n);
 	}
 
-	kfree(freq->tcall);
-	kfree(freq->rcall);
-	p9_mux_free_request(m, freq);
-}
-
-static int
-p9_mux_flush_request(struct p9_conn *m, struct p9_req *req)
-{
-	struct p9_fcall *fc;
-	struct p9_req *rreq, *rptr;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
-
-	/* if a response was received for a request, do nothing */
-	spin_lock(&req->lock);
-	if (req->rcall || req->err) {
-		spin_unlock(&req->lock);
-		P9_DPRINTK(P9_DEBUG_MUX,
-			"mux %p req %p response already received\n", m, req);
-		return 0;
+	if (n & POLLIN) {
+		set_bit(Rpending, &m->wsched);
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
+		if (!test_and_set_bit(Rworksched, &m->wsched)) {
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
+			queue_work(p9_mux_wq, &m->rq);
+		}
 	}
 
-	req->flush = Flushing;
-	spin_unlock(&req->lock);
-
-	spin_lock(&m->lock);
-	/* if the request is not sent yet, just remove it from the list */
-	list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
-		if (rreq->tag == req->tag) {
-			P9_DPRINTK(P9_DEBUG_MUX,
-			   "mux %p req %p request is not sent yet\n", m, req);
-			list_del(&rreq->req_list);
-			req->flush = Flushed;
-			spin_unlock(&m->lock);
-			if (req->cb)
-				(*req->cb) (req, req->cba);
-			return 0;
+	if (n & POLLOUT) {
+		set_bit(Wpending, &m->wsched);
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m);
+		if ((m->wsize || !list_empty(&m->unsent_req_list))
+		    && !test_and_set_bit(Wworksched, &m->wsched)) {
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
+			queue_work(p9_mux_wq, &m->wq);
 		}
 	}
-	spin_unlock(&m->lock);
-
-	clear_thread_flag(TIF_SIGPENDING);
-	fc = p9_create_tflush(req->tag);
-	p9_send_request(m, fc, p9_mux_flush_cb, m);
-	return 1;
-}
-
-static void
-p9_conn_rpc_cb(struct p9_req *req, void *a)
-{
-	struct p9_mux_rpc *r;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a);
-	r = a;
-	r->rcall = req->rcall;
-	r->err = req->err;
-
-	if (req->flush != None && !req->err)
-		r->err = -ERESTARTSYS;
-
-	wake_up(&r->wqueue);
 }
 
 /**
- * p9_fd_rpc- sends 9P request and waits until a response is available.
- *	The function can be interrupted.
- * @t: transport data
- * @tc: request to be sent
- * @rc: pointer where a pointer to the response is stored
+ * p9_fd_request - send 9P request
+ * The function can sleep until the request is scheduled for sending.
+ * The function can be interrupted. Return from the function is not
+ * a guarantee that the request is sent successfully.
+ *
+ * @client: client instance
+ * @req: request to be sent
  *
  */
 
-int
-p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
+static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
 {
-	struct p9_trans_fd *p = t->priv;
-	struct p9_conn *m = p->conn;
-	int err, sigpending;
-	unsigned long flags;
-	struct p9_req *req;
-	struct p9_mux_rpc r;
-
-	r.err = 0;
-	r.tcall = tc;
-	r.rcall = NULL;
-	r.m = m;
-	init_waitqueue_head(&r.wqueue);
-
-	if (rc)
-		*rc = NULL;
-
-	sigpending = 0;
-	if (signal_pending(current)) {
-		sigpending = 1;
-		clear_thread_flag(TIF_SIGPENDING);
-	}
-
-	req = p9_send_request(m, tc, p9_conn_rpc_cb, &r);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err);
-		return err;
-	}
+	int n;
+	struct p9_trans_fd *ts = client->trans;
+	struct p9_conn *m = ts->conn;
 
-	err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
-	if (r.err < 0)
-		err = r.err;
-
-	if (err == -ERESTARTSYS && m->trans->status == Connected
-							&& m->err == 0) {
-		if (p9_mux_flush_request(m, req)) {
-			/* wait until we get response of the flush message */
-			do {
-				clear_thread_flag(TIF_SIGPENDING);
-				err = wait_event_interruptible(r.wqueue,
-					r.rcall || r.err);
-			} while (!r.rcall && !r.err && err == -ERESTARTSYS &&
-				m->trans->status == Connected && !m->err);
-
-			err = -ERESTARTSYS;
-		}
-		sigpending = 1;
-	}
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m,
+						current, req->tc, req->tc->id);
+	if (m->err < 0)
+		return m->err;
 
-	if (sigpending) {
-		spin_lock_irqsave(&current->sighand->siglock, flags);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-	}
+	spin_lock(&client->lock);
+	req->status = REQ_STATUS_UNSENT;
+	list_add_tail(&req->req_list, &m->unsent_req_list);
+	spin_unlock(&client->lock);
 
-	if (rc)
-		*rc = r.rcall;
+	if (test_and_clear_bit(Wpending, &m->wsched))
+		n = POLLOUT;
 	else
-		kfree(r.rcall);
+		n = p9_fd_poll(m->client, NULL);
 
-	p9_mux_free_request(m, req);
-	if (err > 0)
-		err = -EIO;
+	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
+		queue_work(p9_mux_wq, &m->wq);
 
-	return err;
+	return 0;
 }
 
-#ifdef P9_NONBLOCK
-/**
- * p9_conn_rpcnb - sends 9P request without waiting for response.
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to be called when response arrives
- * @a: value to pass to the callback function
- *
- */
-
-int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
-		   p9_conn_req_callback cb, void *a)
+static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 {
-	int err;
-	struct p9_req *req;
+	struct p9_trans_fd *ts = client->trans;
+	struct p9_conn *m = ts->conn;
+	int ret = 1;
 
-	req = p9_send_request(m, tc, cb, a);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err);
-		return PTR_ERR(req);
-	}
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p req %p\n", m, req);
 
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
-	return 0;
-}
-#endif /* P9_NONBLOCK */
+	spin_lock(&client->lock);
+	list_del(&req->req_list);
 
-/**
- * p9_conn_cancel - cancel all pending requests with error
- * @m: mux data
- * @err: error code
- *
- */
-
-void p9_conn_cancel(struct p9_conn *m, int err)
-{
-	struct p9_req *req, *rtmp;
-	LIST_HEAD(cancel_list);
-
-	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
-	m->err = err;
-	spin_lock(&m->lock);
-	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
-		list_move(&req->req_list, &cancel_list);
+	if (req->status == REQ_STATUS_UNSENT) {
+		req->status = REQ_STATUS_FLSHD;
+		ret = 0;
 	}
-	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
-		list_move(&req->req_list, &cancel_list);
-	}
-	spin_unlock(&m->lock);
 
-	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-		list_del(&req->req_list);
-		if (!req->err)
-			req->err = err;
+	spin_unlock(&client->lock);
 
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
-	}
+	return ret;
 }
 
 /**
@@ -1216,7 +758,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 	return 0;
 }
 
-static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd)
+static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
 {
 	struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd),
 					   GFP_KERNEL);
@@ -1234,13 +776,13 @@ static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd)
 		return -EIO;
 	}
 
-	trans->priv = ts;
-	trans->status = Connected;
+	client->trans = ts;
+	client->status = Connected;
 
 	return 0;
 }
 
-static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
+static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 {
 	int fd, ret;
 
@@ -1251,137 +793,65 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
 		return fd;
 	}
 
-	ret = p9_fd_open(trans, fd, fd);
+	ret = p9_fd_open(client, fd, fd);
 	if (ret < 0) {
 		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n");
 		sockfd_put(csocket);
 		return ret;
 	}
 
-	((struct p9_trans_fd *)trans->priv)->rd->f_flags |= O_NONBLOCK;
+	((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK;
 
 	return 0;
 }
 
 /**
- * p9_fd_read- read from a fd
- * @trans: transport instance state
- * @v: buffer to receive data into
- * @len: size of receive buffer
- *
- */
-
-static int p9_fd_read(struct p9_trans *trans, void *v, int len)
-{
-	int ret;
-	struct p9_trans_fd *ts = NULL;
-
-	if (trans && trans->status != Disconnected)
-		ts = trans->priv;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!(ts->rd->f_flags & O_NONBLOCK))
-		P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n");
-
-	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
-	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		trans->status = Disconnected;
-	return ret;
-}
-
-/**
- * p9_fd_write - write to a socket
- * @trans: transport instance state
- * @v: buffer to send data from
- * @len: size of send buffer
+ * p9_mux_destroy - cancels all pending requests and frees mux resources
+ * @m: mux to destroy
  *
  */
 
-static int p9_fd_write(struct p9_trans *trans, void *v, int len)
-{
-	int ret;
-	mm_segment_t oldfs;
-	struct p9_trans_fd *ts = NULL;
-
-	if (trans && trans->status != Disconnected)
-		ts = trans->priv;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!(ts->wr->f_flags & O_NONBLOCK))
-		P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n");
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	/* The cast to a user pointer is valid due to the set_fs() */
-	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
-	set_fs(oldfs);
-
-	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		trans->status = Disconnected;
-	return ret;
-}
-
-static unsigned int
-p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
+static void p9_conn_destroy(struct p9_conn *m)
 {
-	int ret, n;
-	struct p9_trans_fd *ts = NULL;
-
-	if (trans && trans->status == Connected)
-		ts = trans->priv;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!ts->rd->f_op || !ts->rd->f_op->poll)
-		return -EIO;
-
-	if (!ts->wr->f_op || !ts->wr->f_op->poll)
-		return -EIO;
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m,
+		m->mux_list.prev, m->mux_list.next);
 
-	ret = ts->rd->f_op->poll(ts->rd, pt);
-	if (ret < 0)
-		return ret;
+	p9_mux_poll_stop(m);
+	cancel_work_sync(&m->rq);
+	cancel_work_sync(&m->wq);
 
-	if (ts->rd != ts->wr) {
-		n = ts->wr->f_op->poll(ts->wr, pt);
-		if (n < 0)
-			return n;
-		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
-	}
+	p9_conn_cancel(m, -ECONNRESET);
 
-	return ret;
+	m->client = NULL;
+	kfree(m);
 }
 
 /**
- * p9_fd_close - shutdown socket
- * @trans: private socket structure
+ * p9_fd_close - shutdown file descriptor transport
+ * @client: client instance
  *
  */
 
-static void p9_fd_close(struct p9_trans *trans)
+static void p9_fd_close(struct p9_client *client)
 {
 	struct p9_trans_fd *ts;
 
-	if (!trans)
+	if (!client)
 		return;
 
-	ts = xchg(&trans->priv, NULL);
-
+	ts = client->trans;
 	if (!ts)
 		return;
 
+	client->status = Disconnected;
+
 	p9_conn_destroy(ts->conn);
 
-	trans->status = Disconnected;
 	if (ts->rd)
 		fput(ts->rd);
 	if (ts->wr)
 		fput(ts->wr);
+
 	kfree(ts);
 }
 
@@ -1402,31 +872,23 @@ static inline int valid_ipaddr4(const char *buf)
 	return 0;
 }
 
-static struct p9_trans *
-p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
+static int
+p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
-	struct p9_trans *trans;
 	struct socket *csocket;
 	struct sockaddr_in sin_server;
 	struct p9_fd_opts opts;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */
 
 	err = parse_opts(args, &opts);
 	if (err < 0)
-		return ERR_PTR(err);
+		return err;
 
 	if (valid_ipaddr4(addr) < 0)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	csocket = NULL;
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-	trans->msize = msize;
-	trans->extended = dotu;
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
 
 	sin_server.sin_family = AF_INET;
 	sin_server.sin_addr.s_addr = in_aton(addr);
@@ -1449,45 +911,38 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
 		goto error;
 	}
 
-	err = p9_socket_open(trans, csocket);
+	err = p9_socket_open(client, csocket);
 	if (err < 0)
 		goto error;
 
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
 	if (csocket)
 		sock_release(csocket);
 
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+
+	return err;
 }
 
-static struct p9_trans *
-p9_trans_create_unix(const char *addr, char *args, int msize,
-							unsigned char dotu)
+static int
+p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
 	struct socket *csocket;
 	struct sockaddr_un sun_server;
-	struct p9_trans *trans;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */
 
 	csocket = NULL;
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
 
 	if (strlen(addr) > UNIX_PATH_MAX) {
 		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
@@ -1508,79 +963,69 @@ p9_trans_create_unix(const char *addr, char *args, int msize,
 		goto error;
 	}
 
-	err = p9_socket_open(trans, csocket);
+	err = p9_socket_open(client, csocket);
 	if (err < 0)
 		goto error;
 
-	trans->msize = msize;
-	trans->extended = dotu;
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
 	if (csocket)
 		sock_release(csocket);
 
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+	return err;
 }
 
-static struct p9_trans *
-p9_trans_create_fd(const char *name, char *args, int msize,
-							unsigned char extended)
+static int
+p9_fd_create(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
-	struct p9_trans *trans;
 	struct p9_fd_opts opts;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */
 
 	parse_opts(args, &opts);
 
 	if (opts.rfd == ~0 || opts.wfd == ~0) {
 		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-		return ERR_PTR(-ENOPROTOOPT);
+		return -ENOPROTOOPT;
 	}
 
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
-
-	err = p9_fd_open(trans, opts.rfd, opts.wfd);
+	err = p9_fd_open(client, opts.rfd, opts.wfd);
 	if (err < 0)
 		goto error;
 
-	trans->msize = msize;
-	trans->extended = extended;
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+	return err;
 }
 
 static struct p9_trans_module p9_tcp_trans = {
 	.name = "tcp",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 1,
-	.create = p9_trans_create_tcp,
+	.create = p9_fd_create_tcp,
+	.close = p9_fd_close,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
@@ -1588,7 +1033,10 @@ static struct p9_trans_module p9_unix_trans = {
 	.name = "unix",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
-	.create = p9_trans_create_unix,
+	.create = p9_fd_create_unix,
+	.close = p9_fd_close,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
@@ -1596,23 +1044,71 @@ static struct p9_trans_module p9_fd_trans = {
 	.name = "fd",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
-	.create = p9_trans_create_fd,
+	.create = p9_fd_create,
+	.close = p9_fd_close,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
-int p9_trans_fd_init(void)
+/**
+ * p9_poll_proc - poll worker thread
+ * @a: thread state and arguments
+ *
+ * polls all v9fs transports for new events and queues the appropriate
+ * work to the work queue
+ *
+ */
+
+static int p9_poll_proc(void *a)
 {
-	int i;
+	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
+ repeat:
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	while (!list_empty(&p9_poll_pending_list)) {
+		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
+							struct p9_conn,
+							poll_pending_link);
+		list_del_init(&conn->poll_pending_link);
+		spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+		p9_poll_mux(conn);
+
+		spin_lock_irqsave(&p9_poll_lock, flags);
+	}
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (list_empty(&p9_poll_pending_list)) {
+		P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	if (!kthread_should_stop())
+		goto repeat;
 
-	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
-		p9_mux_poll_tasks[i].task = NULL;
+	P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
+	return 0;
+}
 
+int p9_trans_fd_init(void)
+{
 	p9_mux_wq = create_workqueue("v9fs");
 	if (!p9_mux_wq) {
 		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
 		return -ENOMEM;
 	}
 
+	p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
+	if (IS_ERR(p9_poll_task)) {
+		destroy_workqueue(p9_mux_wq);
+		printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
+		return PTR_ERR(p9_poll_task);
+	}
+
 	v9fs_register_trans(&p9_tcp_trans);
 	v9fs_register_trans(&p9_unix_trans);
 	v9fs_register_trans(&p9_fd_trans);
@@ -1622,6 +1118,7 @@ int p9_trans_fd_init(void)
 
 void p9_trans_fd_exit(void)
 {
+	kthread_stop(p9_poll_task);
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 94912e077a5..2d7781ec663 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -1,12 +1,10 @@
 /*
- * The Guest 9p transport driver
+ * The Virtio 9p transport driver
  *
  * This is a block based transport driver based on the lguest block driver
  * code.
  *
- */
-/*
- *  Copyright (C) 2007 Eric Van Hensbergen, IBM Corporation
+ *  Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation
  *
  *  Based on virtio console driver
  *  Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
@@ -41,6 +39,7 @@
 #include <linux/file.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
@@ -53,50 +52,6 @@ static DEFINE_MUTEX(virtio_9p_lock);
 /* global which tracks highest initialized channel */
 static int chan_index;
 
-#define P9_INIT_MAXTAG	16
-
-
-/**
- * enum p9_req_status_t - virtio request status
- * @REQ_STATUS_IDLE: request slot unused
- * @REQ_STATUS_SENT: request sent to server
- * @REQ_STATUS_RCVD: response received from server
- * @REQ_STATUS_FLSH: request has been flushed
- *
- * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
- * but use is actually tracked by the idpool structure which handles tag
- * id allocation.
- *
- */
-
-enum p9_req_status_t {
-	REQ_STATUS_IDLE,
-	REQ_STATUS_SENT,
-	REQ_STATUS_RCVD,
-	REQ_STATUS_FLSH,
-};
-
-/**
- * struct p9_req_t - virtio request slots
- * @status: status of this request slot
- * @wq: wait_queue for the client to block on for this request
- *
- * The virtio transport uses an array to track outstanding requests
- * instead of a list.  While this may incurr overhead during initial
- * allocation or expansion, it makes request lookup much easier as the
- * tag id is a index into an array.  (We use tag+1 so that we can accomodate
- * the -1 tag for the T_VERSION request).
- * This also has the nice effect of only having to allocate wait_queues
- * once, instead of constantly allocating and freeing them.  Its possible
- * other resources could benefit from this scheme as well.
- *
- */
-
-struct p9_req_t {
-	int status;
-	wait_queue_head_t *wq;
-};
-
 /**
  * struct virtio_chan - per-instance transport information
  * @initialized: whether the channel is initialized
@@ -121,67 +76,14 @@ static struct virtio_chan {
 
 	spinlock_t lock;
 
+	struct p9_client *client;
 	struct virtio_device *vdev;
 	struct virtqueue *vq;
 
-	struct p9_idpool *tagpool;
-	struct p9_req_t *reqs;
-	int max_tag;
-
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 } channels[MAX_9P_CHAN];
 
-/**
- * p9_lookup_tag - Lookup requests by tag
- * @c: virtio channel to lookup tag within
- * @tag: numeric id for transaction
- *
- * this is a simple array lookup, but will grow the
- * request_slots as necessary to accomodate transaction
- * ids which did not previously have a slot.
- *
- * Bugs: there is currently no upper limit on request slots set
- * here, but that should be constrained by the id accounting.
- */
-
-static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag)
-{
-	/* This looks up the original request by tag so we know which
-	 * buffer to read the data into */
-	tag++;
-
-	while (tag >= c->max_tag) {
-		int old_max = c->max_tag;
-		int count;
-
-		if (c->max_tag)
-			c->max_tag *= 2;
-		else
-			c->max_tag = P9_INIT_MAXTAG;
-
-		c->reqs = krealloc(c->reqs, sizeof(struct p9_req_t)*c->max_tag,
-								GFP_ATOMIC);
-		if (!c->reqs) {
-			printk(KERN_ERR "Couldn't grow tag array\n");
-			BUG();
-		}
-		for (count = old_max; count < c->max_tag; count++) {
-			c->reqs[count].status = REQ_STATUS_IDLE;
-			c->reqs[count].wq = kmalloc(sizeof(wait_queue_head_t),
-								GFP_ATOMIC);
-			if (!c->reqs[count].wq) {
-				printk(KERN_ERR "Couldn't grow tag array\n");
-				BUG();
-			}
-			init_waitqueue_head(c->reqs[count].wq);
-		}
-	}
-
-	return &c->reqs[tag];
-}
-
-
 /* How many bytes left in this page. */
 static unsigned int rest_of_page(void *data)
 {
@@ -197,25 +99,13 @@ static unsigned int rest_of_page(void *data)
  *
  */
 
-static void p9_virtio_close(struct p9_trans *trans)
+static void p9_virtio_close(struct p9_client *client)
 {
-	struct virtio_chan *chan = trans->priv;
-	int count;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->lock, flags);
-	p9_idpool_destroy(chan->tagpool);
-	for (count = 0; count < chan->max_tag; count++)
-		kfree(chan->reqs[count].wq);
-	kfree(chan->reqs);
-	chan->max_tag = 0;
-	spin_unlock_irqrestore(&chan->lock, flags);
+	struct virtio_chan *chan = client->trans;
 
 	mutex_lock(&virtio_9p_lock);
 	chan->inuse = false;
 	mutex_unlock(&virtio_9p_lock);
-
-	kfree(trans);
 }
 
 /**
@@ -236,17 +126,16 @@ static void req_done(struct virtqueue *vq)
 	struct virtio_chan *chan = vq->vdev->priv;
 	struct p9_fcall *rc;
 	unsigned int len;
-	unsigned long flags;
 	struct p9_req_t *req;
 
-	spin_lock_irqsave(&chan->lock, flags);
+	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
+
 	while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) {
-		req = p9_lookup_tag(chan, rc->tag);
-		req->status = REQ_STATUS_RCVD;
-		wake_up(req->wq);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
+		req = p9_tag_lookup(chan->client, rc->tag);
+		p9_client_cb(chan->client, req);
 	}
-	/* In case queue is stopped waiting for more buffers. */
-	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
 /**
@@ -283,8 +172,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
 	return index-start;
 }
 
+/* We don't currently allow canceling of virtio requests */
+static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
+{
+	return 1;
+}
+
 /**
- * p9_virtio_rpc - issue a request and wait for a response
+ * p9_virtio_request - issue a request
  * @t: transport state
  * @tc: &p9_fcall request to transmit
  * @rc: &p9_fcall to put reponse into
@@ -292,44 +187,22 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
  */
 
 static int
-p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
+p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
 	int in, out;
-	int n, err, size;
-	struct virtio_chan *chan = t->priv;
-	char *rdata;
-	struct p9_req_t *req;
-	unsigned long flags;
-
-	if (*rc == NULL) {
-		*rc = kmalloc(sizeof(struct p9_fcall) + t->msize, GFP_KERNEL);
-		if (!*rc)
-			return -ENOMEM;
-	}
-
-	rdata = (char *)*rc+sizeof(struct p9_fcall);
-
-	n = P9_NOTAG;
-	if (tc->id != P9_TVERSION) {
-		n = p9_idpool_get(chan->tagpool);
-		if (n < 0)
-			return -ENOMEM;
-	}
-
-	spin_lock_irqsave(&chan->lock, flags);
-	req = p9_lookup_tag(chan, n);
-	spin_unlock_irqrestore(&chan->lock, flags);
+	struct virtio_chan *chan = client->trans;
+	char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
 
-	p9_set_tag(tc, n);
+	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc tag %d\n", n);
-
-	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, tc->sdata, tc->size);
-	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, t->msize);
+	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
+								req->tc->size);
+	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
+								client->msize);
 
 	req->status = REQ_STATUS_SENT;
 
-	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, tc)) {
+	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc add_buf returned failure");
 		return -EIO;
@@ -337,31 +210,7 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 
 	chan->vq->vq_ops->kick(chan->vq);
 
-	wait_event(*req->wq, req->status == REQ_STATUS_RCVD);
-
-	size = le32_to_cpu(*(__le32 *) rdata);
-
-	err = p9_deserialize_fcall(rdata, size, *rc, t->extended);
-	if (err < 0) {
-		P9_DPRINTK(P9_DEBUG_TRANS,
-			"9p debug: virtio rpc deserialize returned %d\n", err);
-		return err;
-	}
-
-#ifdef CONFIG_NET_9P_DEBUG
-	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-		char buf[150];
-
-		p9_printfcall(buf, sizeof(buf), *rc, t->extended);
-		printk(KERN_NOTICE ">>> %p %s\n", t, buf);
-	}
-#endif
-
-	if (n != P9_NOTAG && p9_idpool_check(n, chan->tagpool))
-		p9_idpool_put(n, chan->tagpool);
-
-	req->status = REQ_STATUS_IDLE;
-
+	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
 	return 0;
 }
 
@@ -422,10 +271,9 @@ fail:
 
 /**
  * p9_virtio_create - allocate a new virtio channel
+ * @client: client instance invoking this transport
  * @devname: string identifying the channel to connect to (unused)
  * @args: args passed from sys_mount() for per-transport options (unused)
- * @msize: requested maximum packet size
- * @extended: 9p2000.u enabled flag
  *
  * This sets up a transport channel for 9p communication.  Right now
  * we only match the first available channel, but eventually we couldlook up
@@ -441,11 +289,9 @@ fail:
  *
  */
 
-static struct p9_trans *
-p9_virtio_create(const char *devname, char *args, int msize,
-							unsigned char extended)
+static int
+p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 {
-	struct p9_trans *trans;
 	struct virtio_chan *chan = channels;
 	int index = 0;
 
@@ -463,30 +309,13 @@ p9_virtio_create(const char *devname, char *args, int msize,
 
 	if (index >= MAX_9P_CHAN) {
 		printk(KERN_ERR "9p: no channels available\n");
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 	}
 
-	chan->tagpool = p9_idpool_create();
-	if (IS_ERR(chan->tagpool)) {
-		printk(KERN_ERR "9p: couldn't allocate tagpool\n");
-		return ERR_PTR(-ENOMEM);
-	}
-	p9_idpool_get(chan->tagpool); /* reserve tag 0 */
-	chan->max_tag = 0;
-	chan->reqs = NULL;
-
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans) {
-		printk(KERN_ERR "9p: couldn't allocate transport\n");
-		return ERR_PTR(-ENOMEM);
-	}
-	trans->extended = extended;
-	trans->msize = msize;
-	trans->close = p9_virtio_close;
-	trans->rpc = p9_virtio_rpc;
-	trans->priv = chan;
+	client->trans = (void *)chan;
+	chan->client = client;
 
-	return trans;
+	return 0;
 }
 
 /**
@@ -526,6 +355,9 @@ static struct virtio_driver p9_virtio_drv = {
 static struct p9_trans_module p9_virtio_trans = {
 	.name = "virtio",
 	.create = p9_virtio_create,
+	.close = p9_virtio_close,
+	.request = p9_virtio_request,
+	.cancel = p9_virtio_cancel,
 	.maxsize = PAGE_SIZE*16,
 	.def = 0,
 	.owner = THIS_MODULE,
diff --git a/net/9p/util.c b/net/9p/util.c
index 958fc58cd1f..dc4ec05ad93 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -105,6 +105,7 @@ retry:
 	else if (error)
 		return -1;
 
+	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
 	return i;
 }
 EXPORT_SYMBOL(p9_idpool_get);
@@ -121,6 +122,9 @@ EXPORT_SYMBOL(p9_idpool_get);
 void p9_idpool_put(int id, struct p9_idpool *p)
 {
 	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
+
 	spin_lock_irqsave(&p->lock, flags);
 	idr_remove(&p->pool, id);
 	spin_unlock_irqrestore(&p->lock, flags);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a4abed5b4c4..fa5cda4e552 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -719,7 +719,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 	*d = (struct net_device *)in;
-	NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
+	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
 		(struct net_device *)out, br_nf_forward_finish);
 
 	return NF_STOLEN;
diff --git a/net/core/dev.c b/net/core/dev.c
index 868ec0ba8b7..b8a4fd0806a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -924,10 +924,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
 		strlcpy(dev->name, newname, IFNAMSIZ);
 
 rollback:
-	err = device_rename(&dev->dev, dev->name);
-	if (err) {
+	ret = device_rename(&dev->dev, dev->name);
+	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
-		return err;
+		return ret;
 	}
 
 	write_lock_bh(&dev_base_lock);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 11062780bb0..d4ce1224e00 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -259,7 +259,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 	fl.fl6_flowlabel = 0;
 	fl.oif = ireq6->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-	fl.fl_ip_sport = inet_sk(sk)->sport;
+	fl.fl_ip_sport = inet_rsk(req)->loc_port;
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
@@ -558,7 +558,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
+		fl.fl_ip_sport = inet_rsk(req)->loc_port;
 		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b2804e2d1b8..e6bf99e3e41 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -309,6 +309,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
 	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
+	inet_rsk(req)->loc_port	  = dccp_hdr(skb)->dccph_dport;
 	inet_rsk(req)->acked	  = 0;
 	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
 	dreq->dreq_timestamp_echo = 0;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d06945c7d3d..809d803d500 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -347,7 +347,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	/* Build and checksum header */
 	dh = dccp_zeroed_hdr(skb, dccp_header_size);
 
-	dh->dccph_sport	= inet_sk(sk)->sport;
+	dh->dccph_sport	= inet_rsk(req)->loc_port;
 	dh->dccph_dport	= inet_rsk(req)->rmt_port;
 	dh->dccph_doff	= (dccp_header_size +
 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b043eda60b0..1a9dd66511f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -663,7 +663,7 @@ out:
 void arp_xmit(struct sk_buff *skb)
 {
 	/* Send it off, maybe filter it using firewalling first.  */
-	NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
+	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
 }
 
 /*
@@ -928,7 +928,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
 
-	return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
+	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
 
 freeskb:
 	kfree_skb(skb);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ffeaffc3fff..8303e4b406c 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -742,6 +742,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			*obj = kmalloc(sizeof(struct snmp_object) + len,
 				       GFP_ATOMIC);
 			if (*obj == NULL) {
+				kfree(p);
 				kfree(id);
 				if (net_ratelimit())
 					printk("OOM in bsalg (%d)\n", __LINE__);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index ec394cf5a19..676c80b5b14 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -204,6 +204,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->mss = mss;
 	ireq->rmt_port = th->source;
+	ireq->loc_port = th->dest;
 	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
 	if (ipv6_opt_accepted(sk, skb) ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5310c9b84d..b6b356b7912 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -476,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 	fl.fl6_flowlabel = 0;
 	fl.oif = treq->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-	fl.fl_ip_sport = inet_sk(sk)->sport;
+	fl.fl_ip_sport = inet_rsk(req)->loc_port;
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
@@ -1309,7 +1309,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
+		fl.fl_ip_sport = inet_rsk(req)->loc_port;
 		security_req_classify_flow(req, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1865,7 +1865,7 @@ static void get_openreq6(struct seq_file *seq,
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3],
-		   ntohs(inet_sk(sk)->sport),
+		   ntohs(inet_rsk(req)->loc_port),
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3],
 		   ntohs(inet_rsk(req)->rmt_port),
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 78892cf2b02..25dcef9f219 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -271,7 +271,6 @@ config NF_CONNTRACK_TFTP
 config NF_CT_NETLINK
 	tristate 'Connection tracking netlink interface'
 	select NETFILTER_NETLINK
-	depends on NF_NAT=n || NF_NAT
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables support for a netlink-based userspace interface
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 05048e40326..79a69805221 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -25,11 +25,13 @@ menuconfig IP_VS
 if IP_VS
 
 config	IP_VS_IPV6
-	bool "IPv6 support for IPVS (DANGEROUS)"
+	bool "IPv6 support for IPVS"
 	depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
 	---help---
 	  Add IPv6 support to IPVS. This is incomplete and might be dangerous.
 
+	  See http://www.mindbasket.com/ipvs for more information.
+
 	  Say N if unsure.
 
 config	IP_VS_DEBUG
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2e4ad9671e1..a040d46f85d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -813,6 +813,7 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_NF_NAT_NEEDED
 static int
 ctnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
@@ -840,6 +841,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 
 	return parse_nat_setup(ct, manip, attr);
 }
+#endif
 
 static int
 ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 2cc1fff4930..f9977b3311f 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -48,7 +48,7 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "NFQUEUE",
-		.family		= NF_ARP,
+		.family		= NFPROTO_ARP,
 		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 6f62c36948d..7ac54eab0b0 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -61,7 +61,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_SRC) {
 		m  = ntohl(iph->saddr) < ntohl(info->src_min.ip);
 		m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
-		m ^= info->flags & IPRANGE_SRC_INV;
+		m ^= !!(info->flags & IPRANGE_SRC_INV);
 		if (m) {
 			pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
 			         NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -75,7 +75,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_DST) {
 		m  = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
 		m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
-		m ^= info->flags & IPRANGE_DST_INV;
+		m ^= !!(info->flags & IPRANGE_DST_INV);
 		if (m) {
 			pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
 			         NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -114,14 +114,14 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_SRC) {
 		m  = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
 		m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
-		m ^= info->flags & IPRANGE_SRC_INV;
+		m ^= !!(info->flags & IPRANGE_SRC_INV);
 		if (m)
 			return false;
 	}
 	if (info->flags & IPRANGE_DST) {
 		m  = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
 		m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
-		m ^= info->flags & IPRANGE_DST_INV;
+		m ^= !!(info->flags & IPRANGE_DST_INV);
 		if (m)
 			return false;
 	}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 4ebd4ca9a99..280c471bcdf 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -318,15 +318,15 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	for (i = 0; i < ip_list_hash_size; i++)
 		INIT_LIST_HEAD(&t->iphash[i]);
 #ifdef CONFIG_PROC_FS
-	t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir,
-		  &recent_mt_fops);
+	t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+		  &recent_mt_fops, t);
 	if (t->proc == NULL) {
 		kfree(t);
 		goto out;
 	}
 #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir,
-		      &recent_old_fops);
+	t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+		      &recent_old_fops, t);
 	if (t->proc_old == NULL) {
 		remove_proc_entry(t->name, proc_old_dir);
 		kfree(t);
@@ -334,11 +334,9 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	}
 	t->proc_old->uid   = ip_list_uid;
 	t->proc_old->gid   = ip_list_gid;
-	t->proc_old->data  = t;
 #endif
 	t->proc->uid       = ip_list_uid;
 	t->proc->gid       = ip_list_gid;
-	t->proc->data      = t;
 #endif
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &tables);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7b5572d6beb..93cd30ce650 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -326,6 +326,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 
 static struct netdev_queue noop_netdev_queue = {
 	.qdisc		=	&noop_qdisc,
+	.qdisc_sleeping	=	&noop_qdisc,
 };
 
 struct Qdisc noop_qdisc = {
@@ -352,6 +353,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 static struct Qdisc noqueue_qdisc;
 static struct netdev_queue noqueue_netdev_queue = {
 	.qdisc		=	&noqueue_qdisc,
+	.qdisc_sleeping	=	&noqueue_qdisc,
 };
 
 static struct Qdisc noqueue_qdisc = {
diff --git a/samples/Kconfig b/samples/Kconfig
index e1fb471cc50..4b02f5a0e65 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -13,6 +13,12 @@ config SAMPLE_MARKERS
 	help
 	  This build markers example modules.
 
+config SAMPLE_TRACEPOINTS
+	tristate "Build tracepoints examples -- loadable modules only"
+	depends on TRACEPOINTS && m
+	help
+	  This build tracepoints example modules.
+
 config SAMPLE_KOBJECT
 	tristate "Build kobject examples"
 	help
diff --git a/samples/Makefile b/samples/Makefile
index 2e02575f779..10eaca89fe1 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/
+obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
index c8e099d4d1f..2dfb3b32937 100644
--- a/samples/markers/probe-example.c
+++ b/samples/markers/probe-example.c
@@ -81,6 +81,7 @@ static void __exit probe_fini(void)
 			probe_array[i].probe_func, &probe_array[i]);
 	printk(KERN_INFO "Number of event b : %u\n",
 			atomic_read(&eventb_count));
+	marker_synchronize_unregister();
 }
 
 module_init(probe_init);
diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile
new file mode 100644
index 00000000000..36479ad9ae1
--- /dev/null
+++ b/samples/tracepoints/Makefile
@@ -0,0 +1,6 @@
+# builds the tracepoint example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
new file mode 100644
index 00000000000..0216b55bd64
--- /dev/null
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -0,0 +1,13 @@
+#ifndef _TP_SAMPLES_TRACE_H
+#define _TP_SAMPLES_TRACE_H
+
+#include <linux/proc_fs.h>	/* for struct inode and struct file */
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_event,
+	TPPROTO(struct inode *inode, struct file *file),
+	TPARGS(inode, file));
+DEFINE_TRACE(subsys_eventb,
+	TPPROTO(void),
+	TPARGS());
+#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
new file mode 100644
index 00000000000..55abfdda4bd
--- /dev/null
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -0,0 +1,55 @@
+/*
+ * tracepoint-probe-sample.c
+ *
+ * sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+	path_get(&file->f_path);
+	dget(file->f_path.dentry);
+	printk(KERN_INFO "Event is encountered with filename %s\n",
+		file->f_path.dentry->d_name.name);
+	dput(file->f_path.dentry);
+	path_put(&file->f_path);
+}
+
+static void probe_subsys_eventb(void)
+{
+	printk(KERN_INFO "Event B is encountered\n");
+}
+
+int __init tp_sample_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_subsys_event(probe_subsys_event);
+	WARN_ON(ret);
+	ret = register_trace_subsys_eventb(probe_subsys_eventb);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+	unregister_trace_subsys_eventb(probe_subsys_eventb);
+	unregister_trace_subsys_event(probe_subsys_event);
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
new file mode 100644
index 00000000000..5e9fcf4afff
--- /dev/null
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -0,0 +1,42 @@
+/*
+ * tracepoint-probe-sample2.c
+ *
+ * 2nd sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+	printk(KERN_INFO "Event is encountered with inode number %lu\n",
+		inode->i_ino);
+}
+
+int __init tp_sample_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_subsys_event(probe_subsys_event);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+	unregister_trace_subsys_event(probe_subsys_event);
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
new file mode 100644
index 00000000000..4ae4b7fcc04
--- /dev/null
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -0,0 +1,53 @@
+/* tracepoint-sample.c
+ *
+ * Executes a tracepoint when /proc/tracepoint-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include "tp-samples-trace.h"
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+	int i;
+
+	trace_subsys_event(inode, file);
+	for (i = 0; i < 10; i++)
+		trace_subsys_eventb();
+	return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+	.open = my_open,
+};
+
+static int example_init(void)
+{
+	printk(KERN_ALERT "example init\n");
+	pentry_example = proc_create("tracepoint-example", 0444, NULL,
+		&mark_ops);
+	if (!pentry_example)
+		return -EPERM;
+	return 0;
+}
+
+static void example_exit(void)
+{
+	printk(KERN_ALERT "example exit\n");
+	remove_proc_entry("tracepoint-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint example");
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 277cfe0b710..5ed4cbf1e0e 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,10 +198,17 @@ cmd_modversions =							\
 	fi;
 endif
 
+ifdef CONFIG_FTRACE_MCOUNT_RECORD
+cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \
+	"$(ARCH)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" \
+	"$(MV)" "$(@)";
+endif
+
 define rule_cc_o_c
 	$(call echo-cmd,checksrc) $(cmd_checksrc)			  \
 	$(call echo-cmd,cc_o_c) $(cmd_cc_o_c);				  \
 	$(cmd_modversions)						  \
+	$(cmd_record_mcount)						  \
 	scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' >    \
 	                                              $(dot-target).tmp;  \
 	rm -f $(depfile);						  \
diff --git a/scripts/basic/.gitignore b/scripts/basic/.gitignore
index 7304e19782c..bf8b199ec59 100644
--- a/scripts/basic/.gitignore
+++ b/scripts/basic/.gitignore
@@ -1,3 +1,3 @@
+hash
 fixdep
-split-include
 docproc
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index 2243353fe55..5e7316e5aa3 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -37,13 +37,13 @@
 # 	dmesg | perl scripts/bootgraph.pl > output.svg
 #
 
-my @rows;
-my %start, %end, %row;
+my %start, %end;
 my $done = 0;
-my $rowcount = 0;
 my $maxtime = 0;
 my $firsttime = 100;
 my $count = 0;
+my %pids;
+
 while (<>) {
 	my $line = $_;
 	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z0-9\_]+)\+/) {
@@ -54,14 +54,8 @@ while (<>) {
 				$firsttime = $1;
 			}
 		}
-		$row{$func} = 1;
 		if ($line =~ /\@ ([0-9]+)/) {
-			my $pid = $1;
-			if (!defined($rows[$pid])) {
-				$rowcount = $rowcount + 1;
-				$rows[$pid] = $rowcount;
-			}
-			$row{$func} = $rows[$pid];
+			$pids{$func} = $1;
 		}
 		$count = $count + 1;
 	}
@@ -109,17 +103,25 @@ $styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(
 my $mult = 950.0 / ($maxtime - $firsttime);
 my $threshold = ($maxtime - $firsttime) / 60.0;
 my $stylecounter = 0;
+my %rows;
+my $rowscount = 1;
 while (($key,$value) = each %start) {
 	my $duration = $end{$key} - $start{$key};
 
 	if ($duration >= $threshold) {
 		my $s, $s2, $e, $y;
+		$pid = $pids{$key};
+
+		if (!defined($rows{$pid})) {
+			$rows{$pid} = $rowscount;
+			$rowscount = $rowscount + 1;
+		}
 		$s = ($value - $firsttime) * $mult;
 		$s2 = $s + 6;
 		$e = ($end{$key} - $firsttime) * $mult;
 		$w = $e - $s;
 
-		$y = $row{$key} * 150;
+		$y = $rows{$pid} * 150;
 		$y2 = $y + 4;
 
 		$style = $styles[$stylecounter];
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e30bac141b2..f88bb3e21cd 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# (c) 2001, Dave Jones. <davej@codemonkey.org.uk> (the file handling bit)
+# (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit)
 # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
 # (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc)
 # Licensed under the terms of the GNU GPL License version 2
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
new file mode 100755
index 00000000000..f56d760bd58
--- /dev/null
+++ b/scripts/recordmcount.pl
@@ -0,0 +1,395 @@
+#!/usr/bin/perl -w
+# (c) 2008, Steven Rostedt <srostedt@redhat.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# recordmcount.pl - makes a section called __mcount_loc that holds
+#                   all the offsets to the calls to mcount.
+#
+#
+# What we want to end up with is a section in vmlinux called
+# __mcount_loc that contains a list of pointers to all the
+# call sites in the kernel that call mcount. Later on boot up, the kernel
+# will read this list, save the locations and turn them into nops.
+# When tracing or profiling is later enabled, these locations will then
+# be converted back to pointers to some function.
+#
+# This is no easy feat. This script is called just after the original
+# object is compiled and before it is linked.
+#
+# The references to the call sites are offsets from the section of text
+# that the call site is in. Hence, all functions in a section that
+# has a call site to mcount, will have the offset from the beginning of
+# the section and not the beginning of the function.
+#
+# The trick is to find a way to record the beginning of the section.
+# The way we do this is to look at the first function in the section
+# which will also be the location of that section after final link.
+# e.g.
+#
+#  .section ".text.sched"
+#  .globl my_func
+#  my_func:
+#        [...]
+#        call mcount  (offset: 0x5)
+#        [...]
+#        ret
+#  other_func:
+#        [...]
+#        call mcount (offset: 0x1b)
+#        [...]
+#
+# Both relocation offsets for the mcounts in the above example will be
+# offset from .text.sched. If we make another file called tmp.s with:
+#
+#  .section __mcount_loc
+#  .quad  my_func + 0x5
+#  .quad  my_func + 0x1b
+#
+# We can then compile this tmp.s into tmp.o, and link it to the original
+# object.
+#
+# But this gets hard if my_func is not globl (a static function).
+# In such a case we have:
+#
+#  .section ".text.sched"
+#  my_func:
+#        [...]
+#        call mcount  (offset: 0x5)
+#        [...]
+#        ret
+#  .globl my_func
+#  other_func:
+#        [...]
+#        call mcount (offset: 0x1b)
+#        [...]
+#
+# If we make the tmp.s the same as above, when we link together with
+# the original object, we will end up with two symbols for my_func:
+# one local, one global.  After final compile, we will end up with
+# an undefined reference to my_func.
+#
+# Since local objects can reference local variables, we need to find
+# a way to make tmp.o reference the local objects of the original object
+# file after it is linked together. To do this, we convert the my_func
+# into a global symbol before linking tmp.o. Then after we link tmp.o
+# we will only have a single symbol for my_func that is global.
+# We can convert my_func back into a local symbol and we are done.
+#
+# Here are the steps we take:
+#
+# 1) Record all the local symbols by using 'nm'
+# 2) Use objdump to find all the call site offsets and sections for
+#    mcount.
+# 3) Compile the list into its own object.
+# 4) Do we have to deal with local functions? If not, go to step 8.
+# 5) Make an object that converts these local functions to global symbols
+#    with objcopy.
+# 6) Link together this new object with the list object.
+# 7) Convert the local functions back to local symbols and rename
+#    the result as the original object.
+#    End.
+# 8) Link the object with the list object.
+# 9) Move the result back to the original object.
+#    End.
+#
+
+use strict;
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+my $V = '0.1';
+
+if ($#ARGV < 6) {
+	print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n";
+	print "version: $V\n";
+	exit(1);
+}
+
+my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
+
+$objdump = "objdump" if ((length $objdump) == 0);
+$objcopy = "objcopy" if ((length $objcopy) == 0);
+$cc = "gcc" if ((length $cc) == 0);
+$ld = "ld" if ((length $ld) == 0);
+$nm = "nm" if ((length $nm) == 0);
+$rm = "rm" if ((length $rm) == 0);
+$mv = "mv" if ((length $mv) == 0);
+
+#print STDERR "running: $P '$arch' '$objdump' '$objcopy' '$cc' '$ld' " .
+#    "'$nm' '$rm' '$mv' '$inputfile'\n";
+
+my %locals;		# List of local (static) functions
+my %weak;		# List of weak functions
+my %convert;		# List of local functions used that needs conversion
+
+my $type;
+my $section_regex;	# Find the start of a section
+my $function_regex;	# Find the name of a function
+			#    (return offset and func name)
+my $mcount_regex;	# Find the call site to mcount (return offset)
+
+if ($arch eq "x86_64") {
+    $section_regex = "Disassembly of section";
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
+    $type = ".quad";
+
+    # force flags for this arch
+    $ld .= " -m elf_x86_64";
+    $objdump .= " -M x86-64";
+    $objcopy .= " -O elf64-x86-64";
+    $cc .= " -m64";
+
+} elsif ($arch eq "i386") {
+    $section_regex = "Disassembly of section";
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
+    $type = ".long";
+
+    # force flags for this arch
+    $ld .= " -m elf_i386";
+    $objdump .= " -M i386";
+    $objcopy .= " -O elf32-i386";
+    $cc .= " -m32";
+
+} else {
+    die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
+}
+
+my $text_found = 0;
+my $read_function = 0;
+my $opened = 0;
+my $mcount_section = "__mcount_loc";
+
+my $dirname;
+my $filename;
+my $prefix;
+my $ext;
+
+if ($inputfile =~ m,^(.*)/([^/]*)$,) {
+    $dirname = $1;
+    $filename = $2;
+} else {
+    $dirname = ".";
+    $filename = $inputfile;
+}
+
+if ($filename =~ m,^(.*)(\.\S),) {
+    $prefix = $1;
+    $ext = $2;
+} else {
+    $prefix = $filename;
+    $ext = "";
+}
+
+my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
+my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
+
+#
+# --globalize-symbols came out in 2.17, we must test the version
+# of objcopy, and if it is less than 2.17, then we can not
+# record local functions.
+my $use_locals = 01;
+my $local_warn_once = 0;
+my $found_version = 0;
+
+open (IN, "$objcopy --version |") || die "error running $objcopy";
+while (<IN>) {
+    if (/objcopy.*\s(\d+)\.(\d+)/) {
+	my $major = $1;
+	my $minor = $2;
+
+	$found_version = 1;
+	if ($major < 2 ||
+	    ($major == 2 && $minor < 17)) {
+	    $use_locals = 0;
+	}
+	last;
+    }
+}
+close (IN);
+
+if (!$found_version) {
+    print STDERR "WARNING: could not find objcopy version.\n" .
+	"\tDisabling local function references.\n";
+}
+
+
+#
+# Step 1: find all the local (static functions) and weak symbols.
+#        't' is local, 'w/W' is weak (we never use a weak function)
+#
+open (IN, "$nm $inputfile|") || die "error running $nm";
+while (<IN>) {
+    if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) {
+	$locals{$1} = 1;
+    } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
+	$weak{$2} = $1;
+    }
+}
+close(IN);
+
+my @offsets;		# Array of offsets of mcount callers
+my $ref_func;		# reference function to use for offsets
+my $offset = 0;		# offset of ref_func to section beginning
+
+##
+# update_funcs - print out the current mcount callers
+#
+#  Go through the list of offsets to callers and write them to
+#  the output file in a format that can be read by an assembler.
+#
+sub update_funcs
+{
+    return if ($#offsets < 0);
+
+    defined($ref_func) || die "No function to reference";
+
+    # A section only had a weak function, to represent it.
+    # Unfortunately, a weak function may be overwritten by another
+    # function of the same name, making all these offsets incorrect.
+    # To be safe, we simply print a warning and bail.
+    if (defined $weak{$ref_func}) {
+	print STDERR
+	    "$inputfile: WARNING: referencing weak function" .
+	    " $ref_func for mcount\n";
+	return;
+    }
+
+    # is this function static? If so, note this fact.
+    if (defined $locals{$ref_func}) {
+
+	# only use locals if objcopy supports globalize-symbols
+	if (!$use_locals) {
+	    return;
+	}
+	$convert{$ref_func} = 1;
+    }
+
+    # Loop through all the mcount caller offsets and print a reference
+    # to the caller based from the ref_func.
+    for (my $i=0; $i <= $#offsets; $i++) {
+	if (!$opened) {
+	    open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
+	    $opened = 1;
+	    print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
+	}
+	printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
+    }
+}
+
+#
+# Step 2: find the sections and mcount call sites
+#
+open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
+
+my $text;
+
+while (<IN>) {
+    # is it a section?
+    if (/$section_regex/) {
+	$read_function = 1;
+	# print out any recorded offsets
+	update_funcs() if ($text_found);
+
+	# reset all markers and arrays
+	$text_found = 0;
+	undef($ref_func);
+	undef(@offsets);
+
+    # section found, now is this a start of a function?
+    } elsif ($read_function && /$function_regex/) {
+	$text_found = 1;
+	$offset = hex $1;
+	$text = $2;
+
+	# if this is either a local function or a weak function
+	# keep looking for functions that are global that
+	# we can use safely.
+	if (!defined($locals{$text}) && !defined($weak{$text})) {
+	    $ref_func = $text;
+	    $read_function = 0;
+	} else {
+	    # if we already have a function, and this is weak, skip it
+	    if (!defined($ref_func) || !defined($weak{$text})) {
+		$ref_func = $text;
+	    }
+	}
+    }
+
+    # is this a call site to mcount? If so, record it to print later
+    if ($text_found && /$mcount_regex/) {
+	$offsets[$#offsets + 1] = hex $1;
+    }
+}
+
+# dump out anymore offsets that may have been found
+update_funcs() if ($text_found);
+
+# If we did not find any mcount callers, we are done (do nothing).
+if (!$opened) {
+    exit(0);
+}
+
+close(FILE);
+
+#
+# Step 3: Compile the file that holds the list of call sites to mcount.
+#
+`$cc -o $mcount_o -c $mcount_s`;
+
+my @converts = keys %convert;
+
+#
+# Step 4: Do we have sections that started with local functions?
+#
+if ($#converts >= 0) {
+    my $globallist = "";
+    my $locallist = "";
+
+    foreach my $con (@converts) {
+	$globallist .= " --globalize-symbol $con";
+	$locallist .= " --localize-symbol $con";
+    }
+
+    my $globalobj = $dirname . "/.tmp_gl_" . $filename;
+    my $globalmix = $dirname . "/.tmp_mx_" . $filename;
+
+    #
+    # Step 5: set up each local function as a global
+    #
+    `$objcopy $globallist $inputfile $globalobj`;
+
+    #
+    # Step 6: Link the global version to our list.
+    #
+    `$ld -r $globalobj $mcount_o -o $globalmix`;
+
+    #
+    # Step 7: Convert the local functions back into local symbols
+    #
+    `$objcopy $locallist $globalmix $inputfile`;
+
+    # Remove the temp files
+    `$rm $globalobj $globalmix`;
+
+} else {
+
+    my $mix = $dirname . "/.tmp_mx_" . $filename;
+
+    #
+    # Step 8: Link the object with our list of call sites object.
+    #
+    `$ld -r $inputfile $mcount_o -o $mix`;
+
+    #
+    # Step 9: Move the result back to the original object.
+    #
+    `$mv $mix $inputfile`;
+}
+
+# Clean up the temp files
+`$rm $mcount_o $mcount_s`;
+
+exit(0);
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 46f23971f7e..5ba78701adc 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -1,5 +1,5 @@
 /*
- * dev_cgroup.c - device cgroup subsystem
+ * device_cgroup.c - device cgroup subsystem
  *
  * Copyright 2007 IBM Corp
  */
@@ -10,6 +10,7 @@
 #include <linux/list.h>
 #include <linux/uaccess.h>
 #include <linux/seq_file.h>
+#include <linux/rcupdate.h>
 
 #define ACC_MKNOD 1
 #define ACC_READ  2
@@ -22,18 +23,8 @@
 
 /*
  * whitelist locking rules:
- * cgroup_lock() cannot be taken under dev_cgroup->lock.
- * dev_cgroup->lock can be taken with or without cgroup_lock().
- *
- * modifications always require cgroup_lock
- * modifications to a list which is visible require the
- *   dev_cgroup->lock *and* cgroup_lock()
- * walking the list requires dev_cgroup->lock or cgroup_lock().
- *
- * reasoning: dev_whitelist_copy() needs to kmalloc, so needs
- *   a mutex, which the cgroup_lock() is.  Since modifying
- *   a visible list requires both locks, either lock can be
- *   taken for walking the list.
+ * hold cgroup_lock() for update/read.
+ * hold rcu_read_lock() for read.
  */
 
 struct dev_whitelist_item {
@@ -47,7 +38,6 @@ struct dev_whitelist_item {
 struct dev_cgroup {
 	struct cgroup_subsys_state css;
 	struct list_head whitelist;
-	spinlock_t lock;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
@@ -84,13 +74,9 @@ static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
 	struct dev_whitelist_item *wh, *tmp, *new;
 
 	list_for_each_entry(wh, orig, list) {
-		new = kmalloc(sizeof(*wh), GFP_KERNEL);
+		new = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
 		if (!new)
 			goto free_and_exit;
-		new->major = wh->major;
-		new->minor = wh->minor;
-		new->type = wh->type;
-		new->access = wh->access;
 		list_add_tail(&new->list, dest);
 	}
 
@@ -107,19 +93,16 @@ free_and_exit:
 /* Stupid prototype - don't bother combining existing entries */
 /*
  * called under cgroup_lock()
- * since the list is visible to other tasks, we need the spinlock also
  */
 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 			struct dev_whitelist_item *wh)
 {
 	struct dev_whitelist_item *whcopy, *walk;
 
-	whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL);
+	whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
 	if (!whcopy)
 		return -ENOMEM;
 
-	memcpy(whcopy, wh, sizeof(*whcopy));
-	spin_lock(&dev_cgroup->lock);
 	list_for_each_entry(walk, &dev_cgroup->whitelist, list) {
 		if (walk->type != wh->type)
 			continue;
@@ -135,7 +118,6 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 
 	if (whcopy != NULL)
 		list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
-	spin_unlock(&dev_cgroup->lock);
 	return 0;
 }
 
@@ -149,14 +131,12 @@ static void whitelist_item_free(struct rcu_head *rcu)
 
 /*
  * called under cgroup_lock()
- * since the list is visible to other tasks, we need the spinlock also
  */
 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
 			struct dev_whitelist_item *wh)
 {
 	struct dev_whitelist_item *walk, *tmp;
 
-	spin_lock(&dev_cgroup->lock);
 	list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
 		if (walk->type == DEV_ALL)
 			goto remove;
@@ -174,7 +154,6 @@ remove:
 			call_rcu(&walk->rcu, whitelist_item_free);
 		}
 	}
-	spin_unlock(&dev_cgroup->lock);
 }
 
 /*
@@ -214,7 +193,6 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
 		}
 	}
 
-	spin_lock_init(&dev_cgroup->lock);
 	return &dev_cgroup->css;
 }
 
@@ -330,15 +308,11 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 {
 	struct cgroup *pcg = childcg->css.cgroup->parent;
 	struct dev_cgroup *parent;
-	int ret;
 
 	if (!pcg)
 		return 1;
 	parent = cgroup_to_devcgroup(pcg);
-	spin_lock(&parent->lock);
-	ret = may_access_whitelist(parent, wh);
-	spin_unlock(&parent->lock);
-	return ret;
+	return may_access_whitelist(parent, wh);
 }
 
 /*
@@ -357,17 +331,14 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 				   int filetype, const char *buffer)
 {
-	struct dev_cgroup *cur_devcgroup;
 	const char *b;
 	char *endp;
-	int retval = 0, count;
+	int count;
 	struct dev_whitelist_item wh;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	cur_devcgroup = task_devcgroup(current);
-
 	memset(&wh, 0, sizeof(wh));
 	b = buffer;
 
@@ -437,7 +408,6 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	}
 
 handle:
-	retval = 0;
 	switch (filetype) {
 	case DEVCG_ALLOW:
 		if (!parent_has_perm(devcgroup, &wh))
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 576e5119907..3e3fde7c1d2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -75,6 +75,7 @@
 #include <linux/string.h>
 #include <linux/selinux.h>
 #include <linux/mutex.h>
+#include <linux/posix-timers.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -2322,13 +2323,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 			initrlim = init_task.signal->rlim+i;
 			rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
 		}
-		if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-			/*
-			 * This will cause RLIMIT_CPU calculations
-			 * to be refigured.
-			 */
-			current->it_prof_expires = jiffies_to_cputime(1);
-		}
+		update_rlimit_cpu(rlim->rlim_cur);
 	}
 
 	/* Wake up the parent if it is waiting so that it can
diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c
index 89b7f549beb..ea2bf82c937 100644
--- a/sound/core/pcm_misc.c
+++ b/sound/core/pcm_misc.c
@@ -319,6 +319,7 @@ EXPORT_SYMBOL(snd_pcm_format_physical_width);
 /**
  * snd_pcm_format_size - return the byte size of samples on the given format
  * @format: the format to check
+ * @samples: sampling rate
  *
  * Returns the byte size of the given samples for the format, or a
  * negative error code if unknown format.
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index e5e749f3e0e..73be7e14a60 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -51,7 +51,7 @@ static int emu10k1_playback_constraints(struct snd_pcm_runtime *runtime)
 	if (err < 0)
 		return err;
 	err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 256, UINT_MAX);
-	if (err) < 0)
+	if (err < 0)
 		return err;
 	return 0;
 }
diff --git a/sound/oss/sh_dac_audio.c b/sound/oss/sh_dac_audio.c
index b493660deb3..e5d42399491 100644
--- a/sound/oss/sh_dac_audio.c
+++ b/sound/oss/sh_dac_audio.c
@@ -26,7 +26,7 @@
 #include <asm/cpu/dac.h>
 #include <asm/cpu/timer.h>
 #include <asm/machvec.h>
-#include <asm/hp6xx.h>
+#include <mach/hp6xx.h>
 #include <asm/hd64461.h>
 
 #define MODNAME "sh_dac_audio"
diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c
index a7d89662acf..88fbf285d2b 100644
--- a/sound/pci/ca0106/ca0106_main.c
+++ b/sound/pci/ca0106/ca0106_main.c
@@ -759,7 +759,6 @@ static int snd_ca0106_pcm_prepare_playback(struct snd_pcm_substream *substream)
 			       SPCS_CHANNELNUM_LEFT | SPCS_SOURCENUM_UNSPEC |
 			       SPCS_GENERATIONSTATUS | 0x00001200 |
 			       0x00000000 | SPCS_EMPHASIS_NONE | SPCS_COPYRIGHT );
-	}
 #endif
 
 	return 0;
diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index 20d0e328288..8f9e3859c37 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -666,6 +666,7 @@ static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
 	card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
 	card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM;
 	card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
+	memcpy(card->avs.avs_cs_info, ps3av_mode_cs_info, 8);
 
 	ret = snd_ps3_change_avsetting(card);
 
@@ -685,6 +686,7 @@ static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
 {
 	struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
 	struct snd_ps3_avsetting_info avs;
+	int ret;
 
 	avs = card->avs;
 
@@ -729,19 +731,92 @@ static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
 		return 1;
 	}
 
-	if ((card->avs.avs_audio_width != avs.avs_audio_width) ||
-	    (card->avs.avs_audio_rate != avs.avs_audio_rate)) {
-		card->avs = avs;
-		snd_ps3_change_avsetting(card);
+	memcpy(avs.avs_cs_info, ps3av_mode_cs_info, 8);
 
+	if (memcmp(&card->avs, &avs, sizeof(avs))) {
 		pr_debug("%s: after freq=%d width=%d\n", __func__,
 			 card->avs.avs_audio_rate, card->avs.avs_audio_width);
 
-		return 0;
+		card->avs = avs;
+		snd_ps3_change_avsetting(card);
+		ret = 0;
 	} else
+		ret = 1;
+
+	/* check CS non-audio bit and mute accordingly */
+	if (avs.avs_cs_info[0] & 0x02)
+		ps3av_audio_mute_analog(1); /* mute if non-audio */
+	else
+		ps3av_audio_mute_analog(0);
+
+	return ret;
+}
+
+/*
+ * SPDIF status bits controls
+ */
+static int snd_ps3_spdif_mask_info(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958;
+	uinfo->count = 1;
+	return 0;
+}
+
+/* FIXME: ps3av_set_audio_mode() assumes only consumer mode */
+static int snd_ps3_spdif_cmask_get(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	memset(ucontrol->value.iec958.status, 0xff, 8);
+	return 0;
+}
+
+static int snd_ps3_spdif_pmask_get(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	return 0;
+}
+
+static int snd_ps3_spdif_default_get(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	memcpy(ucontrol->value.iec958.status, ps3av_mode_cs_info, 8);
+	return 0;
+}
+
+static int snd_ps3_spdif_default_put(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	if (memcmp(ps3av_mode_cs_info, ucontrol->value.iec958.status, 8)) {
+		memcpy(ps3av_mode_cs_info, ucontrol->value.iec958.status, 8);
 		return 1;
+	}
+	return 0;
 }
 
+static struct snd_kcontrol_new spdif_ctls[] = {
+	{
+		.access = SNDRV_CTL_ELEM_ACCESS_READ,
+		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
+		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK),
+		.info = snd_ps3_spdif_mask_info,
+		.get = snd_ps3_spdif_cmask_get,
+	},
+	{
+		.access = SNDRV_CTL_ELEM_ACCESS_READ,
+		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
+		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK),
+		.info = snd_ps3_spdif_mask_info,
+		.get = snd_ps3_spdif_pmask_get,
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
+		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT),
+		.info = snd_ps3_spdif_mask_info,
+		.get = snd_ps3_spdif_default_get,
+		.put = snd_ps3_spdif_default_put,
+	},
+};
 
 
 static int snd_ps3_map_mmio(void)
@@ -842,7 +917,7 @@ static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
 
 static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
 {
-	int ret;
+	int i, ret;
 	u64 lpar_addr, lpar_size;
 
 	BUG_ON(!firmware_has_feature(FW_FEATURE_PS3_LV1));
@@ -903,6 +978,15 @@ static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
 	strcpy(the_card.card->driver, "PS3");
 	strcpy(the_card.card->shortname, "PS3");
 	strcpy(the_card.card->longname, "PS3 sound");
+
+	/* create control elements */
+	for (i = 0; i < ARRAY_SIZE(spdif_ctls); i++) {
+		ret = snd_ctl_add(the_card.card,
+				  snd_ctl_new1(&spdif_ctls[i], &the_card));
+		if (ret < 0)
+			goto clean_card;
+	}
+
 	/* create PCM devices instance */
 	/* NOTE:this driver works assuming pcm:substream = 1:1 */
 	ret = snd_pcm_new(the_card.card,
diff --git a/sound/ppc/snd_ps3.h b/sound/ppc/snd_ps3.h
index 4b7e6fbbe50..326fb29e82d 100644
--- a/sound/ppc/snd_ps3.h
+++ b/sound/ppc/snd_ps3.h
@@ -51,6 +51,7 @@ struct snd_ps3_avsetting_info {
 	uint32_t avs_audio_width;
 	uint32_t avs_audio_format; /* fixed */
 	uint32_t avs_audio_source; /* fixed */
+	unsigned char avs_cs_info[8];
 };
 /*
  * PS3 audio 'card' instance
diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 0a063a98a66..853b33ae343 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -43,6 +43,7 @@
 struct omap_mcbsp_data {
 	unsigned int			bus_id;
 	struct omap_mcbsp_reg_cfg	regs;
+	unsigned int			fmt;
 	/*
 	 * Flags indicating is the bus already activated and configured by
 	 * another substream
@@ -200,6 +201,7 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
 	struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
 	struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
 	int dma, bus_id = mcbsp_data->bus_id, id = cpu_dai->id;
+	int wlen;
 	unsigned long port;
 
 	if (cpu_class_is_omap1()) {
@@ -244,19 +246,29 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
 	switch (params_format(params)) {
 	case SNDRV_PCM_FORMAT_S16_LE:
 		/* Set word lengths */
+		wlen = 16;
 		regs->rcr2	|= RWDLEN2(OMAP_MCBSP_WORD_16);
 		regs->rcr1	|= RWDLEN1(OMAP_MCBSP_WORD_16);
 		regs->xcr2	|= XWDLEN2(OMAP_MCBSP_WORD_16);
 		regs->xcr1	|= XWDLEN1(OMAP_MCBSP_WORD_16);
-		/* Set FS period and length in terms of bit clock periods */
-		regs->srgr2	|= FPER(16 * 2 - 1);
-		regs->srgr1	|= FWID(16 - 1);
 		break;
 	default:
 		/* Unsupported PCM format */
 		return -EINVAL;
 	}
 
+	/* Set FS period and length in terms of bit clock periods */
+	switch (mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		regs->srgr2	|= FPER(wlen * 2 - 1);
+		regs->srgr1	|= FWID(wlen - 1);
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		regs->srgr2	|= FPER(wlen * 2 - 1);
+		regs->srgr1	|= FWID(0);
+		break;
+	}
+
 	omap_mcbsp_config(bus_id, &mcbsp_data->regs);
 	mcbsp_data->configured = 1;
 
@@ -272,10 +284,12 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 {
 	struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
 	struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
+	unsigned int temp_fmt = fmt;
 
 	if (mcbsp_data->configured)
 		return 0;
 
+	mcbsp_data->fmt = fmt;
 	memset(regs, 0, sizeof(*regs));
 	/* Generic McBSP register settings */
 	regs->spcr2	|= XINTM(3) | FREE;
@@ -293,6 +307,8 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 		/* 0-bit data delay */
 		regs->rcr2      |= RDATDLY(0);
 		regs->xcr2      |= XDATDLY(0);
+		/* Invert bit clock and FS polarity configuration for DSP_A */
+		temp_fmt ^= SND_SOC_DAIFMT_IB_IF;
 		break;
 	default:
 		/* Unsupported data format */
@@ -316,7 +332,7 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 	}
 
 	/* Set bit clock (CLKX/CLKR) and FS polarities */
-	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	switch (temp_fmt & SND_SOC_DAIFMT_INV_MASK) {
 	case SND_SOC_DAIFMT_NB_NF:
 		/*
 		 * Normal BCLK + FS.